rlxt-vs-std.cpp 4.38 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// Copyright 2019 Christoph Conrads
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

// RANLUX Tools versus C++ Standard Library
//
// The code in this file compares the running time and throughput of the
// ranlux-tools subtract-with-borrow implementation with the C++11 standard
// library class `std::subtract_with_carry_engine`.

#include <cassert>
#include <cstdio>
#include <cstdint>
#include <ctime>
#include <limits>
#include <random>
Christoph Conrads's avatar
Christoph Conrads committed
19
#include "random-number-engine.hpp"
20 21 22 23
#include <string>
#include <type_traits>


Christoph Conrads's avatar
Christoph Conrads committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
namespace rlxt = ranlux_tools;

using ranlux8_base =
	std::subtract_with_carry_engine<std::uint8_t, 8u, 4u, 7u>;
using ranlux16_base =
	std::subtract_with_carry_engine<std::uint16_t, 16u, 3u, 11u>;
using ranlux32_base =
	std::subtract_with_carry_engine<std::uint32_t, 32u, 3u, 17u>;
using ranlux64_base =
	std::subtract_with_carry_engine<std::uint64_t, 64u, 4u, 26u>;


using ranlux8_swb47_base =
	rlxt::subtract_with_borrow_engine<std::uint8_t, 8u, 4u, 7u>;
using ranlux8_swb58_base =
	rlxt::subtract_with_borrow_engine<std::uint8_t, 8u, 5u, 8u>;
using ranlux16_swb_base =
	rlxt::subtract_with_borrow_engine<std::uint16_t, 16u, 3u, 11u>;
using ranlux32_swb_base =
	rlxt::subtract_with_borrow_engine<std::uint32_t, 32u, 3u, 17u>;
#if RANLUX_TOOLS_HAS_INT128
using ranlux64_swb_base =
	rlxt::subtract_with_borrow_engine<std::uint64_t, 64u, 4u, 26u>;
#endif
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66



// do not use `clock()` because it does not work on raspberry pi
timespec get_cpu_time()
{
	timespec tm = { 0, 0 };
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tm);

	return tm;
}


std::uintmax_t get_time_nsec(const timespec& tm)
{
	return tm.tv_nsec + std::uintmax_t{1000*1000*1000} * tm.tv_sec;
}


Christoph Conrads's avatar
Christoph Conrads committed
67 68 69 70

std::string get_name(const std::ranlux24_base&) { return "std::ranlux24_base"; }


71
template<typename T, std::size_t W, std::size_t P, std::size_t Q>
Christoph Conrads's avatar
Christoph Conrads committed
72
std::string get_name(const rlxt::subtract_with_borrow_engine<T, W, P, Q>&)
73
{
Christoph Conrads's avatar
Christoph Conrads committed
74
	constexpr auto FORMAT = "RLX-SWB(2^%-2zu, %1zu, %2zu)";
75 76 77 78 79 80 81 82 83 84 85 86 87
	char buffer[80] = { 0 };

	snprintf(buffer, sizeof(buffer), FORMAT, W, P, Q);

	return buffer;
}


template<typename T, std::size_t W, std::size_t S, std::size_t R>
std::string get_name(
	const std::subtract_with_carry_engine<T, W, S, R>&
)
{
Christoph Conrads's avatar
Christoph Conrads committed
88
	constexpr auto FORMAT = "STD-SWC(2^%-2zu, %1zu, %2zu)";
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
	char buffer[80] = { 0 };

	snprintf(buffer, sizeof(buffer), FORMAT, W, S, R);

	return buffer;
}


template<typename Generator>
typename Generator::result_type draw(Generator& gen) __attribute__((noinline));

template<typename Generator>
typename Generator::result_type draw(Generator& gen)
{
	return gen();
}


template<typename Generator>
void run(std::uintmax_t num_draws)
{
	static_assert(Generator::min() == 0, "");

	auto w =
		Generator::max() == std::numeric_limits<std::uint64_t>::max() ? 8u :
		Generator::max() == std::numeric_limits<std::uint32_t>::max() ? 4u :
		Generator::max() == (1u<<24) - 1u                             ? 3u :
Christoph Conrads's avatar
Christoph Conrads committed
116 117
		Generator::max() == std::numeric_limits<std::uint16_t>::max() ? 2u :
		Generator::max() == std::numeric_limits<std::uint8_t>::max() ?  1u : 0u
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
	;

	auto gen = Generator();
	auto t_0 = get_cpu_time();

	for(auto i = std::uintmax_t{0}; i < num_draws; ++i)
		draw(gen);

	auto t_1 = get_cpu_time();
	auto t_nsec = get_time_nsec(t_1) - get_time_nsec(t_0);
	auto t_msec = t_nsec / (1000u*1000u);
	auto bytes_per_msec = w * num_draws * 1000u / double(t_msec);
	auto name = get_name(gen);
	auto dummy = std::uint16_t(gen());

	std::printf(
		"%-25s  | %10ju | %20.2e | %hu\n",
		name.c_str(), t_msec, bytes_per_msec, dummy
	);
}


// the c++11 standard library ranlux has only luxury level 3 meaning it discards
// considerably less values than theoretically required
using ranlux24 =
	std::discard_block_engine<std::ranlux24_base, 389u, 24u>;


int main()
{
	constexpr auto num_draws = std::uintmax_t{1000} * 1000u * 1000u;

	std::printf(
		"%-25s  | %10s | %20s | %s\n",
		"generator", "time(sec)", "throughput(byte/sec)", "dummy"
	);

Christoph Conrads's avatar
Christoph Conrads committed
155 156 157
	run<ranlux8_base>(num_draws);
	run<ranlux8_swb47_base>(num_draws);
	run<ranlux8_swb58_base>(num_draws);
158
	run<ranlux16_base>(num_draws);
Christoph Conrads's avatar
Christoph Conrads committed
159 160
	run<ranlux16_swb_base>(num_draws);
	run<std::ranlux24_base>(num_draws);
161
	run<ranlux32_base>(num_draws);
Christoph Conrads's avatar
Christoph Conrads committed
162 163
	run<ranlux32_swb_base>(num_draws);
#if RANLUX_TOOLS_HAS_INT128
164
	run<ranlux64_base>(num_draws);
Christoph Conrads's avatar
Christoph Conrads committed
165 166
	run<ranlux64_swb_base>(num_draws);
#endif
167
}