From 8ae7a21da22b47c819d380cfb5f452fcec1ef5c4 Mon Sep 17 00:00:00 2001
From: Jan Mercl <0xjnml@gmail.com>
Date: Sun, 23 May 2021 14:14:40 +0200
Subject: [PATCH] mersenne: add Sqr

goos: linux
goarch: amd64
pkg: modernc.org/mathutil/mersenne
cpu: AMD Ryzen 9 3900X 12-Core Processor
BenchmarkSqr
BenchmarkSqr/M1-mersenne-sqr
BenchmarkSqr/M1-mersenne-sqr-24         	13628048	        79.38 ns/op	      40 B/op	       2 allocs/op
BenchmarkSqr/M1-math-big-sqr
BenchmarkSqr/M1-math-big-sqr-24         	 8802544	       131.7 ns/op	      88 B/op	       3 allocs/op
BenchmarkSqr/M2-mersenne-sqr
BenchmarkSqr/M2-mersenne-sqr-24         	16625383	        81.08 ns/op	      40 B/op	       2 allocs/op
BenchmarkSqr/M2-math-big-sqr
BenchmarkSqr/M2-math-big-sqr-24         	 9768525	       131.8 ns/op	      88 B/op	       3 allocs/op
BenchmarkSqr/M4-mersenne-sqr
BenchmarkSqr/M4-mersenne-sqr-24         	14647800	        85.37 ns/op	      40 B/op	       2 allocs/op
BenchmarkSqr/M4-math-big-sqr
BenchmarkSqr/M4-math-big-sqr-24         	 9056818	       130.9 ns/op	      88 B/op	       3 allocs/op
BenchmarkSqr/M8-mersenne-sqr
BenchmarkSqr/M8-mersenne-sqr-24         	11580205	        98.52 ns/op	      40 B/op	       2 allocs/op
BenchmarkSqr/M8-math-big-sqr
BenchmarkSqr/M8-math-big-sqr-24         	 9426870	       128.2 ns/op	      88 B/op	       3 allocs/op
BenchmarkSqr/M16-mersenne-sqr
BenchmarkSqr/M16-mersenne-sqr-24        	10476908	       116.7 ns/op	      40 B/op	       2 allocs/op
BenchmarkSqr/M16-math-big-sqr
BenchmarkSqr/M16-math-big-sqr-24        	 8949414	       134.0 ns/op	      88 B/op	       3 allocs/op
BenchmarkSqr/M32-mersenne-sqr
BenchmarkSqr/M32-mersenne-sqr-24        	 6495727	       173.0 ns/op	      48 B/op	       2 allocs/op
BenchmarkSqr/M32-math-big-sqr
BenchmarkSqr/M32-math-big-sqr-24        	 9296271	       132.0 ns/op	      88 B/op	       3 allocs/op
BenchmarkSqr/M64-mersenne-sqr
BenchmarkSqr/M64-mersenne-sqr-24        	 4802661	       241.9 ns/op	      56 B/op	       2 allocs/op
BenchmarkSqr/M64-math-big-sqr
BenchmarkSqr/M64-math-big-sqr-24        	10145700	       133.9 ns/op	      88 B/op	       3 allocs/op
BenchmarkSqr/M128-mersenne-sqr
BenchmarkSqr/M128-mersenne-sqr-24       	 3003795	       413.0 ns/op	      80 B/op	       2 allocs/op
BenchmarkSqr/M128-math-big-sqr
BenchmarkSqr/M128-math-big-sqr-24       	 5549552	       209.3 ns/op	     144 B/op	       3 allocs/op
BenchmarkSqr/M256-mersenne-sqr
BenchmarkSqr/M256-mersenne-sqr-24       	 1659831	       722.0 ns/op	     112 B/op	       2 allocs/op
BenchmarkSqr/M256-math-big-sqr
BenchmarkSqr/M256-math-big-sqr-24       	 4880001	       251.4 ns/op	     192 B/op	       3 allocs/op
BenchmarkSqr/M512-mersenne-sqr
BenchmarkSqr/M512-mersenne-sqr-24       	  858310	      1349 ns/op	     176 B/op	       2 allocs/op
BenchmarkSqr/M512-math-big-sqr
BenchmarkSqr/M512-math-big-sqr-24       	 3462825	       329.5 ns/op	     288 B/op	       3 allocs/op
BenchmarkSqr/M1024-mersenne-sqr
BenchmarkSqr/M1024-mersenne-sqr-24      	  420891	      2565 ns/op	     320 B/op	       2 allocs/op
BenchmarkSqr/M1024-math-big-sqr
BenchmarkSqr/M1024-math-big-sqr-24      	 1964814	       615.2 ns/op	     480 B/op	       3 allocs/op
BenchmarkSqr/M2048-mersenne-sqr
BenchmarkSqr/M2048-mersenne-sqr-24      	  206782	      5177 ns/op	     608 B/op	       2 allocs/op
BenchmarkSqr/M2048-math-big-sqr
BenchmarkSqr/M2048-math-big-sqr-24      	 1000000	      1255 ns/op	     896 B/op	       3 allocs/op
BenchmarkSqr/M4096-mersenne-sqr
BenchmarkSqr/M4096-mersenne-sqr-24      	  108934	     10175 ns/op	    1184 B/op	       2 allocs/op
BenchmarkSqr/M4096-math-big-sqr
BenchmarkSqr/M4096-math-big-sqr-24      	  377995	      3055 ns/op	    1762 B/op	       3 allocs/op
BenchmarkSqr/M8192-mersenne-sqr
BenchmarkSqr/M8192-mersenne-sqr-24      	   59828	     20511 ns/op	    2336 B/op	       2 allocs/op
BenchmarkSqr/M8192-math-big-sqr
BenchmarkSqr/M8192-math-big-sqr-24      	  148688	      8677 ns/op	    3492 B/op	       3 allocs/op
BenchmarkSqr/M16384-mersenne-sqr
BenchmarkSqr/M16384-mersenne-sqr-24     	   30609	     38871 ns/op	    4896 B/op	       2 allocs/op
BenchmarkSqr/M16384-math-big-sqr
BenchmarkSqr/M16384-math-big-sqr-24     	   48075	     25592 ns/op	    7211 B/op	       3 allocs/op
BenchmarkSqr/M32768-mersenne-sqr
BenchmarkSqr/M32768-mersenne-sqr-24     	   15621	     82111 ns/op	    9504 B/op	       2 allocs/op
BenchmarkSqr/M32768-math-big-sqr
BenchmarkSqr/M32768-math-big-sqr-24     	   14744	     87142 ns/op	   32205 B/op	       3 allocs/op
BenchmarkSqr/M65536-mersenne-sqr
BenchmarkSqr/M65536-mersenne-sqr-24     	    7050	    154968 ns/op	   18464 B/op	       2 allocs/op
BenchmarkSqr/M65536-math-big-sqr
BenchmarkSqr/M65536-math-big-sqr-24     	    6040	    237927 ns/op	   66936 B/op	       3 allocs/op
BenchmarkSqr/M131072-mersenne-sqr
BenchmarkSqr/M131072-mersenne-sqr-24    	    3741	    324530 ns/op	   40992 B/op	       2 allocs/op
BenchmarkSqr/M131072-math-big-sqr
BenchmarkSqr/M131072-math-big-sqr-24    	    1988	    615355 ns/op	  125121 B/op	       3 allocs/op
BenchmarkSqr/M262144-mersenne-sqr
BenchmarkSqr/M262144-mersenne-sqr-24    	    1950	    620119 ns/op	   73760 B/op	       2 allocs/op
BenchmarkSqr/M262144-math-big-sqr
BenchmarkSqr/M262144-math-big-sqr-24    	     688	   1755040 ns/op	  246108 B/op	       3 allocs/op
BenchmarkSqr/M524288-mersenne-sqr
BenchmarkSqr/M524288-mersenne-sqr-24    	     956	   1290968 ns/op	  139296 B/op	       2 allocs/op
BenchmarkSqr/M524288-math-big-sqr
BenchmarkSqr/M524288-math-big-sqr-24    	     246	   4932569 ns/op	  476234 B/op	       3 allocs/op
BenchmarkSqr/M1048576-mersenne-sqr
BenchmarkSqr/M1048576-mersenne-sqr-24   	     470	   2560887 ns/op	  270368 B/op	       2 allocs/op
BenchmarkSqr/M1048576-math-big-sqr
BenchmarkSqr/M1048576-math-big-sqr-24   	      84	  14315411 ns/op	  935650 B/op	       4 allocs/op
BenchmarkSqr/M2097152-mersenne-sqr
BenchmarkSqr/M2097152-mersenne-sqr-24   	     225	   5156848 ns/op	  532512 B/op	       2 allocs/op
BenchmarkSqr/M2097152-math-big-sqr
BenchmarkSqr/M2097152-math-big-sqr-24   	      26	  42251646 ns/op	 1854422 B/op	       4 allocs/op
BenchmarkSqr/M4194304-mersenne-sqr
BenchmarkSqr/M4194304-mersenne-sqr-24   	     117	  10486764 ns/op	 1056800 B/op	       2 allocs/op
BenchmarkSqr/M4194304-math-big-sqr
BenchmarkSqr/M4194304-math-big-sqr-24   	       9	 118619660 ns/op	 3689229 B/op	       4 allocs/op
BenchmarkSqr/M8388608-mersenne-sqr
BenchmarkSqr/M8388608-mersenne-sqr-24   	      61	  19271563 ns/op	 2105377 B/op	       2 allocs/op
BenchmarkSqr/M8388608-math-big-sqr
BenchmarkSqr/M8388608-math-big-sqr-24   	       3	 347243627 ns/op	 7360130 B/op	       5 allocs/op
BenchmarkSqr/M16777216-mersenne-sqr
BenchmarkSqr/M16777216-mersenne-sqr-24  	      27	  37213931 ns/op	 4202528 B/op	       2 allocs/op
BenchmarkSqr/M16777216-math-big-sqr
BenchmarkSqr/M16777216-math-big-sqr-24  	       1	1044838615 ns/op	14704448 B/op	       7 allocs/op
BenchmarkSqr/M33554432-mersenne-sqr
BenchmarkSqr/M33554432-mersenne-sqr-24  	      15	  73673501 ns/op	 8396838 B/op	       2 allocs/op
BenchmarkSqr/M33554432-math-big-sqr
BenchmarkSqr/M33554432-math-big-sqr-24  	       1	3144805101 ns/op	29384512 B/op	       7 allocs/op
BenchmarkSqr/M67108864-mersenne-sqr
BenchmarkSqr/M67108864-mersenne-sqr-24  	       8	 145513519 ns/op	16785440 B/op	       2 allocs/op
BenchmarkSqr/M67108864-math-big-sqr
BenchmarkSqr/M67108864-math-big-sqr-24  	       1	9167346999 ns/op	58744640 B/op	       7 allocs/op
BenchmarkSqr/M134217728-mersenne-sqr
BenchmarkSqr/M134217728-mersenne-sqr-24 	       4	 282906478 ns/op	33562656 B/op	       2 allocs/op
BenchmarkSqr/M134217728-math-big-sqr
BenchmarkSqr/M134217728-math-big-sqr-24 	       1	27725921736 ns/op	117464896 B/op	       7 allocs/op
BenchmarkSqr/M268435456-mersenne-sqr
BenchmarkSqr/M268435456-mersenne-sqr-24 	       2	 584291840 ns/op	67117088 B/op	       2 allocs/op
BenchmarkSqr/M268435456-math-big-sqr
BenchmarkSqr/M268435456-math-big-sqr-24 	       1	83496304443 ns/op	234905408 B/op	       7 allocs/op
PASS
ok  	modernc.org/mathutil/mersenne	203.388s
---
 mersenne/all_test.go | 37 ++++++++++++++++++++++++
 mersenne/mersenne.go | 68 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 102 insertions(+), 3 deletions(-)

diff --git a/mersenne/all_test.go b/mersenne/all_test.go
index 8ee123d..8c26060 100644
--- a/mersenne/all_test.go
+++ b/mersenne/all_test.go
@@ -5,6 +5,7 @@
 package mersenne // import "modernc.org/mathutil/mersenne"
 
 import (
+	"fmt"
 	"math"
 	"math/big"
 	"math/rand"
@@ -941,3 +942,39 @@ func TestModPow2(t *testing.T) {
 		f(uint32(rg.Next()), uint32(rg.Next()))
 	}
 }
+
+func TestSqr(t *testing.T) {
+	const N = 25000
+	for i := 0; i <= N; i++ {
+		g := Sqr(uint32(i))
+		e := New(uint32(i))
+		e.Mul(e, e)
+		if g.Cmp(e) != 0 {
+			t.Errorf("Sqr of M%d incorrect\ngot %s %#x\nexp %s %#x", i, g, g.Bits(), e, e.Bits())
+		}
+	}
+}
+
+var bigResult *big.Int
+
+func BenchmarkSqr(b *testing.B) {
+	for i := 0; i <= 28; i++ {
+		n := uint32(1) << i
+		b.Run(fmt.Sprintf("M%d-mersenne-sqr", n), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				bigResult = Sqr(n)
+			}
+		})
+		b.Run(fmt.Sprintf("M%d-math-big-sqr", n), func(b *testing.B) {
+			a := New(n)
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				b := big.NewInt(0).Set(a)
+				b.Mul(b, b)
+				bigResult = b
+			}
+		})
+	}
+}
diff --git a/mersenne/mersenne.go b/mersenne/mersenne.go
index 535d478..984d4dd 100644
--- a/mersenne/mersenne.go
+++ b/mersenne/mersenne.go
@@ -24,8 +24,8 @@ import (
 	"math"
 	"math/big"
 
-	"modernc.org/mathutil"
 	"github.com/remyoudompheng/bigfft"
+	"modernc.org/mathutil"
 )
 
 var (
@@ -34,7 +34,7 @@ var (
 	_2 = big.NewInt(2)
 )
 
-// Knowns list the exponent of currently (March 2012) known Mersenne primes
+// Knowns list the exponent of currently (May 2021) known Mersenne primes
 // exponents in order.  See also: http://oeis.org/A000043 for a partial list.
 var Knowns = []uint32{
 	2,  // #1
@@ -91,10 +91,12 @@ var Knowns = []uint32{
 	57885161, // #48
 	74207281, // #49
 	77232917, // #50
+
+	82589933, // #51
 }
 
 // Known maps the exponent of known Mersenne primes its ordinal number/rank.
-// Ranks > 45 are currently provisional.
+// Ranks > 47 are currently provisional.
 var Known map[uint32]int
 
 func init() {
@@ -296,3 +298,63 @@ func ProbablyPrime(n, a uint32) bool {
 	x := ModPow(a, n-1, n)
 	return x.Cmp(_1) == 0 || x.Cmp(nMinus1) == 0
 }
+
+// Sqr returns the square of Mn.
+func Sqr(n uint32) *big.Int {
+	if n == 0 {
+		// (2^(0)-1)^2 = 0
+		return big.NewInt(0)
+	}
+
+	bitSize := 2 * int(n)
+	wordSize := bitSize/mathutil.IntBits + 1
+	bits := make([]big.Word, 0, wordSize)
+	var d, c, w big.Word
+	m := big.Word(1)
+	for i := uint32(0); i < n; i++ {
+		d++
+		c += d
+		if c&1 != 0 {
+			w |= m
+		}
+		m <<= 1
+		c >>= 1
+		if m == 0 {
+			bits = append(bits, w)
+			w = 0
+			m = 1
+		}
+	}
+	for i := uint32(1); i < n; i++ {
+		d--
+		c += d
+		if c&1 != 0 {
+			w |= m
+		}
+		m <<= 1
+		c >>= 1
+		if m == 0 {
+			bits = append(bits, w)
+			w = 0
+			m = 1
+		}
+	}
+	for c != 0 {
+		if c&1 != 0 {
+			w |= m
+		}
+		m <<= 1
+		c >>= 1
+		if m == 0 {
+			bits = append(bits, w)
+			w = 0
+			m = 1
+		}
+	}
+	if w != 0 {
+		bits = append(bits, w)
+	}
+	var r big.Int
+	r.SetBits(bits)
+	return &r
+}
-- 
GitLab