From 8ae7a21da22b47c819d380cfb5f452fcec1ef5c4 Mon Sep 17 00:00:00 2001 From: Jan Mercl <0xjnml@gmail.com> Date: Sun, 23 May 2021 14:14:40 +0200 Subject: [PATCH] mersenne: add Sqr goos: linux goarch: amd64 pkg: modernc.org/mathutil/mersenne cpu: AMD Ryzen 9 3900X 12-Core Processor BenchmarkSqr BenchmarkSqr/M1-mersenne-sqr BenchmarkSqr/M1-mersenne-sqr-24 13628048 79.38 ns/op 40 B/op 2 allocs/op BenchmarkSqr/M1-math-big-sqr BenchmarkSqr/M1-math-big-sqr-24 8802544 131.7 ns/op 88 B/op 3 allocs/op BenchmarkSqr/M2-mersenne-sqr BenchmarkSqr/M2-mersenne-sqr-24 16625383 81.08 ns/op 40 B/op 2 allocs/op BenchmarkSqr/M2-math-big-sqr BenchmarkSqr/M2-math-big-sqr-24 9768525 131.8 ns/op 88 B/op 3 allocs/op BenchmarkSqr/M4-mersenne-sqr BenchmarkSqr/M4-mersenne-sqr-24 14647800 85.37 ns/op 40 B/op 2 allocs/op BenchmarkSqr/M4-math-big-sqr BenchmarkSqr/M4-math-big-sqr-24 9056818 130.9 ns/op 88 B/op 3 allocs/op BenchmarkSqr/M8-mersenne-sqr BenchmarkSqr/M8-mersenne-sqr-24 11580205 98.52 ns/op 40 B/op 2 allocs/op BenchmarkSqr/M8-math-big-sqr BenchmarkSqr/M8-math-big-sqr-24 9426870 128.2 ns/op 88 B/op 3 allocs/op BenchmarkSqr/M16-mersenne-sqr BenchmarkSqr/M16-mersenne-sqr-24 10476908 116.7 ns/op 40 B/op 2 allocs/op BenchmarkSqr/M16-math-big-sqr BenchmarkSqr/M16-math-big-sqr-24 8949414 134.0 ns/op 88 B/op 3 allocs/op BenchmarkSqr/M32-mersenne-sqr BenchmarkSqr/M32-mersenne-sqr-24 6495727 173.0 ns/op 48 B/op 2 allocs/op BenchmarkSqr/M32-math-big-sqr BenchmarkSqr/M32-math-big-sqr-24 9296271 132.0 ns/op 88 B/op 3 allocs/op BenchmarkSqr/M64-mersenne-sqr BenchmarkSqr/M64-mersenne-sqr-24 4802661 241.9 ns/op 56 B/op 2 allocs/op BenchmarkSqr/M64-math-big-sqr BenchmarkSqr/M64-math-big-sqr-24 10145700 133.9 ns/op 88 B/op 3 allocs/op BenchmarkSqr/M128-mersenne-sqr BenchmarkSqr/M128-mersenne-sqr-24 3003795 413.0 ns/op 80 B/op 2 allocs/op BenchmarkSqr/M128-math-big-sqr BenchmarkSqr/M128-math-big-sqr-24 5549552 209.3 ns/op 144 B/op 3 allocs/op BenchmarkSqr/M256-mersenne-sqr BenchmarkSqr/M256-mersenne-sqr-24 1659831 722.0 ns/op 112 B/op 2 allocs/op BenchmarkSqr/M256-math-big-sqr BenchmarkSqr/M256-math-big-sqr-24 4880001 251.4 ns/op 192 B/op 3 allocs/op BenchmarkSqr/M512-mersenne-sqr BenchmarkSqr/M512-mersenne-sqr-24 858310 1349 ns/op 176 B/op 2 allocs/op BenchmarkSqr/M512-math-big-sqr BenchmarkSqr/M512-math-big-sqr-24 3462825 329.5 ns/op 288 B/op 3 allocs/op BenchmarkSqr/M1024-mersenne-sqr BenchmarkSqr/M1024-mersenne-sqr-24 420891 2565 ns/op 320 B/op 2 allocs/op BenchmarkSqr/M1024-math-big-sqr BenchmarkSqr/M1024-math-big-sqr-24 1964814 615.2 ns/op 480 B/op 3 allocs/op BenchmarkSqr/M2048-mersenne-sqr BenchmarkSqr/M2048-mersenne-sqr-24 206782 5177 ns/op 608 B/op 2 allocs/op BenchmarkSqr/M2048-math-big-sqr BenchmarkSqr/M2048-math-big-sqr-24 1000000 1255 ns/op 896 B/op 3 allocs/op BenchmarkSqr/M4096-mersenne-sqr BenchmarkSqr/M4096-mersenne-sqr-24 108934 10175 ns/op 1184 B/op 2 allocs/op BenchmarkSqr/M4096-math-big-sqr BenchmarkSqr/M4096-math-big-sqr-24 377995 3055 ns/op 1762 B/op 3 allocs/op BenchmarkSqr/M8192-mersenne-sqr BenchmarkSqr/M8192-mersenne-sqr-24 59828 20511 ns/op 2336 B/op 2 allocs/op BenchmarkSqr/M8192-math-big-sqr BenchmarkSqr/M8192-math-big-sqr-24 148688 8677 ns/op 3492 B/op 3 allocs/op BenchmarkSqr/M16384-mersenne-sqr BenchmarkSqr/M16384-mersenne-sqr-24 30609 38871 ns/op 4896 B/op 2 allocs/op BenchmarkSqr/M16384-math-big-sqr BenchmarkSqr/M16384-math-big-sqr-24 48075 25592 ns/op 7211 B/op 3 allocs/op BenchmarkSqr/M32768-mersenne-sqr BenchmarkSqr/M32768-mersenne-sqr-24 15621 82111 ns/op 9504 B/op 2 allocs/op BenchmarkSqr/M32768-math-big-sqr BenchmarkSqr/M32768-math-big-sqr-24 14744 87142 ns/op 32205 B/op 3 allocs/op BenchmarkSqr/M65536-mersenne-sqr BenchmarkSqr/M65536-mersenne-sqr-24 7050 154968 ns/op 18464 B/op 2 allocs/op BenchmarkSqr/M65536-math-big-sqr BenchmarkSqr/M65536-math-big-sqr-24 6040 237927 ns/op 66936 B/op 3 allocs/op BenchmarkSqr/M131072-mersenne-sqr BenchmarkSqr/M131072-mersenne-sqr-24 3741 324530 ns/op 40992 B/op 2 allocs/op BenchmarkSqr/M131072-math-big-sqr BenchmarkSqr/M131072-math-big-sqr-24 1988 615355 ns/op 125121 B/op 3 allocs/op BenchmarkSqr/M262144-mersenne-sqr BenchmarkSqr/M262144-mersenne-sqr-24 1950 620119 ns/op 73760 B/op 2 allocs/op BenchmarkSqr/M262144-math-big-sqr BenchmarkSqr/M262144-math-big-sqr-24 688 1755040 ns/op 246108 B/op 3 allocs/op BenchmarkSqr/M524288-mersenne-sqr BenchmarkSqr/M524288-mersenne-sqr-24 956 1290968 ns/op 139296 B/op 2 allocs/op BenchmarkSqr/M524288-math-big-sqr BenchmarkSqr/M524288-math-big-sqr-24 246 4932569 ns/op 476234 B/op 3 allocs/op BenchmarkSqr/M1048576-mersenne-sqr BenchmarkSqr/M1048576-mersenne-sqr-24 470 2560887 ns/op 270368 B/op 2 allocs/op BenchmarkSqr/M1048576-math-big-sqr BenchmarkSqr/M1048576-math-big-sqr-24 84 14315411 ns/op 935650 B/op 4 allocs/op BenchmarkSqr/M2097152-mersenne-sqr BenchmarkSqr/M2097152-mersenne-sqr-24 225 5156848 ns/op 532512 B/op 2 allocs/op BenchmarkSqr/M2097152-math-big-sqr BenchmarkSqr/M2097152-math-big-sqr-24 26 42251646 ns/op 1854422 B/op 4 allocs/op BenchmarkSqr/M4194304-mersenne-sqr BenchmarkSqr/M4194304-mersenne-sqr-24 117 10486764 ns/op 1056800 B/op 2 allocs/op BenchmarkSqr/M4194304-math-big-sqr BenchmarkSqr/M4194304-math-big-sqr-24 9 118619660 ns/op 3689229 B/op 4 allocs/op BenchmarkSqr/M8388608-mersenne-sqr BenchmarkSqr/M8388608-mersenne-sqr-24 61 19271563 ns/op 2105377 B/op 2 allocs/op BenchmarkSqr/M8388608-math-big-sqr BenchmarkSqr/M8388608-math-big-sqr-24 3 347243627 ns/op 7360130 B/op 5 allocs/op BenchmarkSqr/M16777216-mersenne-sqr BenchmarkSqr/M16777216-mersenne-sqr-24 27 37213931 ns/op 4202528 B/op 2 allocs/op BenchmarkSqr/M16777216-math-big-sqr BenchmarkSqr/M16777216-math-big-sqr-24 1 1044838615 ns/op 14704448 B/op 7 allocs/op BenchmarkSqr/M33554432-mersenne-sqr BenchmarkSqr/M33554432-mersenne-sqr-24 15 73673501 ns/op 8396838 B/op 2 allocs/op BenchmarkSqr/M33554432-math-big-sqr BenchmarkSqr/M33554432-math-big-sqr-24 1 3144805101 ns/op 29384512 B/op 7 allocs/op BenchmarkSqr/M67108864-mersenne-sqr BenchmarkSqr/M67108864-mersenne-sqr-24 8 145513519 ns/op 16785440 B/op 2 allocs/op BenchmarkSqr/M67108864-math-big-sqr BenchmarkSqr/M67108864-math-big-sqr-24 1 9167346999 ns/op 58744640 B/op 7 allocs/op BenchmarkSqr/M134217728-mersenne-sqr BenchmarkSqr/M134217728-mersenne-sqr-24 4 282906478 ns/op 33562656 B/op 2 allocs/op BenchmarkSqr/M134217728-math-big-sqr BenchmarkSqr/M134217728-math-big-sqr-24 1 27725921736 ns/op 117464896 B/op 7 allocs/op BenchmarkSqr/M268435456-mersenne-sqr BenchmarkSqr/M268435456-mersenne-sqr-24 2 584291840 ns/op 67117088 B/op 2 allocs/op BenchmarkSqr/M268435456-math-big-sqr BenchmarkSqr/M268435456-math-big-sqr-24 1 83496304443 ns/op 234905408 B/op 7 allocs/op PASS ok modernc.org/mathutil/mersenne 203.388s --- mersenne/all_test.go | 37 ++++++++++++++++++++++++ mersenne/mersenne.go | 68 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 3 deletions(-) diff --git a/mersenne/all_test.go b/mersenne/all_test.go index 8ee123d..8c26060 100644 --- a/mersenne/all_test.go +++ b/mersenne/all_test.go @@ -5,6 +5,7 @@ package mersenne // import "modernc.org/mathutil/mersenne" import ( + "fmt" "math" "math/big" "math/rand" @@ -941,3 +942,39 @@ func TestModPow2(t *testing.T) { f(uint32(rg.Next()), uint32(rg.Next())) } } + +func TestSqr(t *testing.T) { + const N = 25000 + for i := 0; i <= N; i++ { + g := Sqr(uint32(i)) + e := New(uint32(i)) + e.Mul(e, e) + if g.Cmp(e) != 0 { + t.Errorf("Sqr of M%d incorrect\ngot %s %#x\nexp %s %#x", i, g, g.Bits(), e, e.Bits()) + } + } +} + +var bigResult *big.Int + +func BenchmarkSqr(b *testing.B) { + for i := 0; i <= 28; i++ { + n := uint32(1) << i + b.Run(fmt.Sprintf("M%d-mersenne-sqr", n), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + bigResult = Sqr(n) + } + }) + b.Run(fmt.Sprintf("M%d-math-big-sqr", n), func(b *testing.B) { + a := New(n) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + b := big.NewInt(0).Set(a) + b.Mul(b, b) + bigResult = b + } + }) + } +} diff --git a/mersenne/mersenne.go b/mersenne/mersenne.go index 535d478..984d4dd 100644 --- a/mersenne/mersenne.go +++ b/mersenne/mersenne.go @@ -24,8 +24,8 @@ import ( "math" "math/big" - "modernc.org/mathutil" "github.com/remyoudompheng/bigfft" + "modernc.org/mathutil" ) var ( @@ -34,7 +34,7 @@ var ( _2 = big.NewInt(2) ) -// Knowns list the exponent of currently (March 2012) known Mersenne primes +// Knowns list the exponent of currently (May 2021) known Mersenne primes // exponents in order. See also: http://oeis.org/A000043 for a partial list. var Knowns = []uint32{ 2, // #1 @@ -91,10 +91,12 @@ var Knowns = []uint32{ 57885161, // #48 74207281, // #49 77232917, // #50 + + 82589933, // #51 } // Known maps the exponent of known Mersenne primes its ordinal number/rank. -// Ranks > 45 are currently provisional. +// Ranks > 47 are currently provisional. var Known map[uint32]int func init() { @@ -296,3 +298,63 @@ func ProbablyPrime(n, a uint32) bool { x := ModPow(a, n-1, n) return x.Cmp(_1) == 0 || x.Cmp(nMinus1) == 0 } + +// Sqr returns the square of Mn. +func Sqr(n uint32) *big.Int { + if n == 0 { + // (2^(0)-1)^2 = 0 + return big.NewInt(0) + } + + bitSize := 2 * int(n) + wordSize := bitSize/mathutil.IntBits + 1 + bits := make([]big.Word, 0, wordSize) + var d, c, w big.Word + m := big.Word(1) + for i := uint32(0); i < n; i++ { + d++ + c += d + if c&1 != 0 { + w |= m + } + m <<= 1 + c >>= 1 + if m == 0 { + bits = append(bits, w) + w = 0 + m = 1 + } + } + for i := uint32(1); i < n; i++ { + d-- + c += d + if c&1 != 0 { + w |= m + } + m <<= 1 + c >>= 1 + if m == 0 { + bits = append(bits, w) + w = 0 + m = 1 + } + } + for c != 0 { + if c&1 != 0 { + w |= m + } + m <<= 1 + c >>= 1 + if m == 0 { + bits = append(bits, w) + w = 0 + m = 1 + } + } + if w != 0 { + bits = append(bits, w) + } + var r big.Int + r.SetBits(bits) + return &r +} -- GitLab