Commit ee1f6361 authored by Christoph Conrads's avatar Christoph Conrads

Toy around with AWC/SWB implementations

parent 63c58cdf
......@@ -78,6 +78,18 @@ std::string get_name(
}
template<typename T, std::size_t W, std::size_t R, std::size_t S>
std::string get_name(const subtract_with_borrow_engine<T, W, R, S>&)
{
constexpr auto FORMAT = "SWB(2^%zu, %2zu, %2zu)";
char buffer[80] = { 0 };
snprintf(buffer, sizeof(buffer), FORMAT, W, R, S);
return buffer;
}
template<typename T, std::size_t W, std::size_t S, std::size_t R>
std::string get_name(
const std::subtract_with_carry_engine<T, W, S, R>&
......
......@@ -11,6 +11,7 @@
#include <cstdint>
#include <limits>
#include <random>
#include <cstring>
#include <type_traits>
......@@ -89,6 +90,196 @@ struct xoshiro128plus
};
namespace impl
{
template<typename T, std::size_t w, std::size_t s, std::size_t r>
struct add_with_carry_engine
{
static T execute(std::size_t* p_index, T* p_carry, T(&xs)[r])
{
using UInt = typename std::conditional<
w == 16u, std::uint32_t, typename std::conditional<
w == 32u, std::uint64_t, typename std::conditional<
w == 64u, unsigned __int128, void
>::type>::type>::type;
assert(p_index);
assert(p_carry);
auto index = *p_index;
auto carry = *p_carry;
auto i = index;
auto j = index >= s ? index - s : index + r - s;
auto u = xs[i];
auto v = xs[j];
assert(carry == 0 or carry == 1);
auto x = UInt{u} + v + carry;
auto c = x >> w;
xs[i] = x;
*p_carry = c;
*p_index = index + 1u == r ? 0u : index + 1u;
return x;
}
};
template<std::size_t r>
struct add_with_carry_engine<std::uint16_t, 16u, 2u, r>
{
static constexpr auto w = 16u;
static constexpr auto s = std::size_t{2};
static constexpr auto short_lag = s;
static constexpr auto long_lag = r;
static_assert(long_lag > short_lag, "");
static std::uint16_t execute(
std::size_t* p_index, std::uint16_t* p_carry, std::uint16_t(&xs)[r]
)
{
using UInt = std::uint64_t;
assert(p_index);
assert(p_carry);
auto index = *p_index;
auto carry = *p_carry;
assert(carry == 0 or carry == 1);
if(index > 0)
{
auto ret = xs[index];
*p_index = index + 1u == r ? 0u : index + 1u;
return ret;
}
for(auto i = std::size_t{0}; i+1u < short_lag; i += 2)
{
auto x = (UInt{xs[i+1]} << 16) | UInt{xs[i+0]};
auto y = (UInt{xs[r-s+i+1]} << 16) | UInt{xs[r-s+i+0]};
auto z = x + y + carry;
xs[i+0] = z;
xs[i+1] = z >> w;
carry = z >> 32;
}
for(auto i = short_lag; i+1u < long_lag; i += 2)
{
auto x = (UInt{xs[i+1]} << 16) | UInt{xs[i+0]};
auto y = (UInt{xs[i-s+1]} << 16) | UInt{xs[i-s+0]};
auto z = x + y + carry;
xs[i+0] = z;
xs[i+1] = z >> w;
carry = z >> 32;
}
static_assert(long_lag % 2 == 1, "");
auto x = std::uint32_t{xs[r-1]} + std::uint32_t{xs[r-s-1]} + carry;
xs[r-1] = x;
*p_carry = x >> w;
*p_index = 1u;
return xs[0];
}
};
template<std::size_t r>
struct add_with_carry_engine<std::uint32_t, 32u, 3u, r>
{
static constexpr auto w = std::size_t{32};
static constexpr auto s = std::size_t{3};
static constexpr auto short_lag = s;
static constexpr auto long_lag = r;
static_assert(long_lag > short_lag, "");
static std::uint32_t execute(
std::size_t* p_index, std::uint32_t* p_carry, std::uint32_t(&xs)[r]
)
{
assert(p_index);
assert(p_carry);
auto index = *p_index;
auto carry = *p_carry;
assert(carry == 0 or carry == 1);
if(index > 0)
{
auto ret = xs[index];
*p_index = index + 1u == r ? 0u : index + 1u;
return ret;
}
using T = std::uint64_t;
for(auto i = std::size_t{0}; i+2u < long_lag; i += 3u)
{
auto x1 = T{0};
auto x2 = T{0};
auto p_x = reinterpret_cast<char*>(xs + i);
std::memcpy(&x1, p_x + 0, 6);
std::memcpy(&x2, p_x + 6, 6);
auto y1 = T{0};
auto y2 = T{0};
auto p_y = i == 0
? reinterpret_cast<char*>(xs + r - s)
: reinterpret_cast<char*>(xs + i - short_lag)
;
std::memcpy(&y1, p_y + 0, 6);
std::memcpy(&y2, p_y + 6, 6);
auto z1 = x1 + y1 + carry;
auto c = z1 >> 48;
auto z2 = x2 + z1 + c;
std::memcpy(p_x + 0, &z1, 6);
std::memcpy(p_x + 6, &z2, 6);
carry = z2 >> 48;
}
static_assert(long_lag == 16u, "");
using UInt = std::uint64_t;
auto x = UInt{xs[r-1]} + UInt{xs[r-s-1]} + carry;
xs[r-1] = x;
*p_carry = x >> 32;
*p_index = 1u;
return xs[0];
}
};
}
/**
* This class implements an add-with-carry (AWC) pseudo-random number generator.
*
......@@ -114,7 +305,7 @@ struct add_with_carry_engine
// prototype implementation
static_assert(w == std::numeric_limits<T>::digits, "");
static_assert(w == 16u or w == 32u, "");
static_assert(w == 16u or w == 32u or w == 64u, "");
using result_type = T;
......@@ -145,10 +336,68 @@ struct add_with_carry_engine
T operator() ()
{
using UInt = typename std::conditional<
w == 16u, std::uint32_t, typename std::conditional<
w == 32u, std::uint64_t, void
>::type>::type;
using Implementation = impl::add_with_carry_engine<T, w, s, r>;
return Implementation::execute(&index_, &carry_, xs_);
}
void discard(unsigned long long n)
{
for(auto i = 0ull; i < n; ++i)
(*this)();
}
std::size_t index_ = 0;
T carry_ = 0;
T xs_[r] = { 0 };
};
template<typename T, std::size_t w, std::size_t r, std::size_t s>
struct subtract_with_borrow_engine
{
static_assert(std::is_integral<T>::value, "");
static_assert(not std::is_signed<T>::value, "");
static_assert(w <= std::numeric_limits<T>::digits, "");
static_assert(s > 0u, "");
static_assert(r > s, "");
// prototype implementation
static_assert(w == std::numeric_limits<T>::digits, "");
static_assert(w == 64u, "");
using result_type = T;
static constexpr auto long_lag = r;
static constexpr auto short_lag = s;
static constexpr auto word_size = w;
// `std::subtract_with_carry_engine::default_seed` is 19780503. I have no
// idea where this value is coming from so I use its larger prime factor
// instead.
static constexpr auto default_seed = std::uint32_t{387853};
static constexpr T max() { return std::numeric_limits<T>::max(); }
static constexpr T min() { return std::numeric_limits<T>::min(); }
explicit subtract_with_borrow_engine(std::uint64_t seed=default_seed)
{
auto gen = xoshiro128plus(seed);
for(auto& x : xs_)
x = gen();
// ensure entering a periodic sequence
discard(r);
}
T operator() ()
{
using UInt = unsigned __int128;
auto i = index_;
auto j = index_ >= s ? index_ - s : index_ + r - s;
......@@ -157,10 +406,10 @@ struct add_with_carry_engine
assert(carry_ == 0 or carry_ == 1);
auto x = UInt{u} + v + carry_;
auto c = x >> w;
auto x = UInt{u} - v - carry_;
auto c = -T(x >> w);
xs_[i] = T(x);
xs_[i] = x;
carry_ = c;
index_ = index_ + 1u == r ? 0u : index_ + 1u;
......@@ -176,12 +425,15 @@ struct add_with_carry_engine
std::size_t index_ = 0;
T carry_ = 0;
T xs_[r] = { 0 };
std::uint8_t carry_ = 0;
};
// RANLUX subtract-with-borrow(2^w, s, r)
using ranlux16_base =
std::subtract_with_carry_engine<std::uint16_t, 16u, 3u, 11u>
......@@ -205,6 +457,9 @@ using fast_ranlux64 = std::discard_block_engine<ranlux64_base, 197u, 26u>;
// RANLUX add-with-carry(2^w, r, s)
using ranlux16_awc_base = add_with_carry_engine<std::uint16_t, 16u, 2u, 9u>;
using ranlux32_awc_base = add_with_carry_engine<std::uint32_t, 32u, 3u, 16u>;
using ranlux64_awc_base = add_with_carry_engine<std::uint64_t, 64u, 14u, 25u>;
using ranlux64_swb_base =
subtract_with_borrow_engine<std::uint64_t, 64u, 62u, 3u>;
using ranlux16_awc = std::discard_block_engine<ranlux16_awc_base, 97u, 9u>;
using ranlux32_awc = std::discard_block_engine<ranlux32_awc_base, 277u, 16u>;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment