...
 
Commits (4)
......@@ -22,6 +22,14 @@ Given a version number MAJOR.MINOR.PATCH
* PATCH incremented for bug fixes
[0.1.2] - Unreleased
--------------------
Added
* MPI fast square using Comba and Karatsuba methods.
[0.1.1] - 2020-04-01
--------------------
......
......@@ -159,6 +159,12 @@ int cry_mpi_mul_karatsuba(cry_mpi *r, const cry_mpi *a, const cry_mpi *b);
int cry_mpi_mul_toom3(cry_mpi *r, const cry_mpi *a, const cry_mpi *b);
int cry_mpi_sqr_baseline(cry_mpi *r, const cry_mpi *a);
int cry_mpi_sqr_comba(cry_mpi *r, const cry_mpi *a);
int cry_mpi_sqr_karatsuba(cry_mpi *r, const cry_mpi *a);
/*
* Utilities
*/
......
#include "mpi_pvt.h"
#include "misc.h"
#include <stdio.h>
#include <cry/config.h>
#define KARATSUBA_CUTOFF 64
......
#include "mpi_pvt.h"
#include "misc.h"
#include <cry/config.h>
#if defined(CRY_ARCH_X86)
#define MULADD(i, j) asm( \
"movl %6, %%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax, %0 \n\t" \
"adcl %%edx, %1 \n\t" \
"adcl $0, %2 \n\t" \
: "=r"(c0), "=r"(c1), "=r"(c2) \
: "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) \
: "%eax", "%edx", "cc")
#elif defined(CRY_ARCH_X86_64)
#define MULADD(i, j) asm( \
"movq %6, %%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax, %0 \n\t" \
"adcq %%rdx, %1 \n\t" \
"adcq $0, %2 \n\t" \
: "=r"(c0), "=r"(c1), "=r"(c2) \
: "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
: "%rax", "%rdx", "cc")
#elif defined(CRY_ARCH_ARM)
#define MULADD(i, j) asm( \
"umull r0, r1, %6, %7 \n\t" \
"adds %0, %0, r0 \n\t" \
"adcs %1, %1, r1 \n\t" \
"adc %2, %2, #0 \n\t" \
: "=r"(c0), "=r"(c1), "=r"(c2) \
: "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) \
: "r0", "r1", "cc")
#else /* ISO C code */
#define MULADD(i, j) do { \
cry_mpi_dword _t; \
_t = (cry_mpi_dword)c0 + ((cry_mpi_dword)(i)) * ((cry_mpi_dword)(j)); \
c0 = (cry_mpi_digit) _t; \
_t = (cry_mpi_dword)c1 + (_t >> CRY_MPI_DIGIT_BITS); \
c1 = (cry_mpi_digit) _t; \
c2 += (cry_mpi_digit) (_t >> CRY_MPI_DIGIT_BITS); \
} while (0);
#endif
int cry_mpi_mul_comba(cry_mpi *r, const cry_mpi *a, const cry_mpi *b)
{
int res;
......@@ -87,7 +37,7 @@ int cry_mpi_mul_comba(cry_mpi *r, const cry_mpi *a, const cry_mpi *b)
/*
* this is the number of times the loop will iterate, essentially its
* while (tx++ < a->used && ty-- >= 0
* while (tx++ < a->used && ty-- >= 0)
*/
iy = CRY_MIN(a->used - tx, ty + 1);
......@@ -98,7 +48,7 @@ int cry_mpi_mul_comba(cry_mpi *r, const cry_mpi *a, const cry_mpi *b)
/* execute loop */
for (iz = 0; iz < iy; iz++) {
MULADD(*tmpx, *tmpy);
MULADD(c0, c1, c2, *tmpx, *tmpy);
tmpx++;
tmpy--;
}
......
......@@ -13,39 +13,35 @@ int cry_mpi_mul_karatsuba(cry_mpi *r, const cry_mpi *a, const cry_mpi *b)
return 0;
}
/* minimum number of digits */
/* Minimum number of digits */
B = CRY_MIN(a->used, b->used);
/* divide by two */
/* Divide by two */
hB = B >> 1;
B = hB << 1;
/* init copy all the temporaries */
/* Initialize all the temporaries */
CRY_CHK(res = cry_mpi_init_size(&x0, hB), e0);
CRY_CHK(res = cry_mpi_init_size(&x1, a->used - hB), e1);
CRY_CHK(res = cry_mpi_init_size(&y0, hB), e2);
CRY_CHK(res = cry_mpi_init_size(&y1, b->used - hB), e3);
CRY_CHK(res = cry_mpi_init_size(&z0, B), e4);
CRY_CHK(res = cry_mpi_init_size(&z1, B), e5);
CRY_CHK(res = cry_mpi_init_size(&z2, B), e6);
/* Shift the digits */
/* Manually set "in use" digits */
x0.used = y0.used = hB;
x1.used = a->used - hB;
y1.used = b->used - hB;
memcpy(x0.data, a->data, hB * sizeof(cry_mpi_digit));
memcpy(y0.data, b->data, hB * sizeof(cry_mpi_digit));
if (a->used > hB)
memcpy(x1.data, a->data + hB, (a->used - hB) * sizeof(*x1.data));
if (b->used > hB)
memcpy(y1.data, b->data + hB, (b->used - hB) * sizeof(*y1.data));
/*
* Only need to clamp the lower words since by definition the upper
* words x1/y1 must have a known number of digits
* Only need to clamp the lower words since by definition the upper words
* x1 and y1 have a known number of digits
*/
cry_mpi_adjust(&x0);
cry_mpi_adjust(&y0);
......@@ -58,6 +54,7 @@ int cry_mpi_mul_karatsuba(cry_mpi *r, const cry_mpi *a, const cry_mpi *b)
CRY_CHK(res = cry_mpi_mul_abs(&z1, &z1, &x0), e7); /* z1 = (x1+x0)(y1+y0) */
CRY_CHK(res = cry_mpi_add(&x0, &z0, &z2), e7); /* x0 = z0+z2 */
CRY_CHK(res = cry_mpi_sub(&z1, &z1, &x0), e7); /* z1=(x1+x0)*(y1+y0)-(z0+z2) */
CRY_CHK(res = cry_mpi_shld(&z1, hB), e7);
CRY_CHK(res = cry_mpi_shld(&z2, B), e7);
......
......@@ -3,10 +3,12 @@
#include <cry/mpi.h>
#include <cry/assert.h>
#include <string.h> /* memset */
#include <cry/config.h>
#include <string.h>
#include <stdint.h>
#include "../misc.h"
/* Double precision digits */
#if CRY_MPI_DIGIT_BYTES == 1
......@@ -69,4 +71,53 @@ int cry_mpi_shld(cry_mpi *a, size_t n);
(((n)->data[(bit) / CRY_MPI_DIGIT_BITS] & \
((cry_mpi_digit)1 << ((bit) % CRY_MPI_DIGIT_BITS))) != 0)
#if defined(CRY_ARCH_X86)
#define MULADD(c0, c1, c2, i, j) asm( \
"movl %6, %%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax, %0 \n\t" \
"adcl %%edx, %1 \n\t" \
"adcl $0, %2 \n\t" \
: "=r"(c0), "=r"(c1), "=r"(c2) \
: "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) \
: "%eax", "%edx", "cc")
#elif defined(CRY_ARCH_X86_64)
#define MULADD(c0, c1, c2, i, j) asm( \
"movq %6, %%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax, %0 \n\t" \
"adcq %%rdx, %1 \n\t" \
"adcq $0, %2 \n\t" \
: "=r"(c0), "=r"(c1), "=r"(c2) \
: "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
: "%rax", "%rdx", "cc")
#elif defined(CRY_ARCH_ARM)
#define MULADD(c0, c1, c2, i, j) asm( \
"umull r0, r1, %6, %7 \n\t" \
"adds %0, %0, r0 \n\t" \
"adcs %1, %1, r1 \n\t" \
"adc %2, %2, #0 \n\t" \
: "=r"(c0), "=r"(c1), "=r"(c2) \
: "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) \
: "r0", "r1", "cc")
#else /* ANSI C code */
#define MULADD(c0, c1, c2, i, j) do { \
cry_mpi_dword _t; \
_t = (cry_mpi_dword)(c0) + ((cry_mpi_dword)(i)) * ((cry_mpi_dword)(j)); \
(c0) = (cry_mpi_digit) _t; \
_t = (cry_mpi_dword)(c1) + (_t >> CRY_MPI_DIGIT_BITS); \
(c1) = (cry_mpi_digit) _t; \
(c2) += (cry_mpi_digit) (_t >> CRY_MPI_DIGIT_BITS); \
} while (0);
#endif
#endif /* CRY_MPI_PVT_H_ */
#include "mpi_pvt.h"
#include <stdio.h>
#include "misc.h"
#include <cry/config.h>
#define KARATSUBA_CUTOFF 64 /* TODO: take in consideration the digit size */
int cry_mpi_sqr(cry_mpi *r, const cry_mpi *a)
{
int res;
size_t i, j, pa;
cry_mpi t;
cry_mpi_dword dd, c, ch;
cry_mpi_digit tmpx, *tmpt;
pa = a->used;
if ((res = cry_mpi_init_size(&t, 2*pa + 1)) != 0)
return res;
/* default used is maximum possible size */
cry_mpi_set_used(&t, 2*pa + 1);
for (i = 0; i < pa; i++) {
dd = (cry_mpi_dword)t.data[2*i] +
(cry_mpi_dword)a->data[i] * (cry_mpi_dword)a->data[i];
/* store lower part in the result */
t.data[2*i] = (cry_mpi_digit)dd;
/* get the carry */
c = dd >> CRY_MPI_DIGIT_BITS;
/* left hand side of a[i]*a[i] */
tmpx = a->data[i];
/* alias for where to store the result */
tmpt = t.data + (2*i + 1);
for (j = i+1; j < pa; j++) {
/* first calculate the double product */
dd = (cry_mpi_dword)tmpx * (cry_mpi_dword)a->data[j];
/* check if mul by 2 will generate overflow */
ch = (dd & (((cry_mpi_dword)1U) << (2*CRY_MPI_DIGIT_BITS - 1)))
>> (CRY_MPI_DIGIT_BITS - 1);
dd <<= 1; /* mul by 2 */
dd += ((cry_mpi_dword)*tmpt + c);
if (ch == 0 && dd < ((cry_mpi_dword)*tmpt + c))
ch = (cry_mpi_dword)1U << CRY_MPI_DIGIT_BITS;
/* store lower part */
*tmpt++ = (cry_mpi_digit)dd;
/* get the new carry */
c = ch | (dd >> CRY_MPI_DIGIT_BITS);
}
/* propagate carry */
while (c != 0) {
dd = (cry_mpi_dword)*tmpt + c;
*tmpt++ = (cry_mpi_digit)dd;
c = dd >> CRY_MPI_DIGIT_BITS;
}
}
cry_mpi_adjust(&t);
cry_mpi_swap(r, &t);
cry_mpi_clear(&t);
return res;
#ifdef CRY_MPI_MUL_KARATSUBA
if (a->used > KARATSUBA_CUTOFF)
return cry_mpi_sqr_karatsuba(r, a);
#endif
#ifdef CRY_MPI_MUL_COMBA
return cry_mpi_sqr_comba(r, a);
#else
return cry_mpi_sqr_baseline(r, a);
#endif
}
#include "mpi_pvt.h"
int cry_mpi_sqr_baseline(cry_mpi *r, const cry_mpi *a)
{
int res;
size_t i, j, pa;
cry_mpi t;
cry_mpi_dword dd, c, ch;
cry_mpi_digit tmpx, *tmpt;
if (cry_mpi_is_zero(a)) {
cry_mpi_zero(r);
return 0;
}
pa = a->used;
if ((res = cry_mpi_init_size(&t, 2*pa + 1)) != 0)
return res;
/* default used is maximum possible size */
cry_mpi_set_used(&t, 2*pa + 1);
for (i = 0; i < pa; i++) {
dd = (cry_mpi_dword)t.data[2*i] +
(cry_mpi_dword)a->data[i] * (cry_mpi_dword)a->data[i];
/* store lower part in the result */
t.data[2*i] = (cry_mpi_digit)dd;
/* get the carry */
c = dd >> CRY_MPI_DIGIT_BITS;
/* left hand side of a[i]*a[i] */
tmpx = a->data[i];
/* alias for where to store the result */
tmpt = t.data + (2*i + 1);
for (j = i+1; j < pa; j++) {
/* first calculate the double product */
dd = (cry_mpi_dword)tmpx * (cry_mpi_dword)a->data[j];
/* check if mul by 2 will generate overflow */
ch = (dd & (((cry_mpi_dword)1U) << (2*CRY_MPI_DIGIT_BITS - 1)))
>> (CRY_MPI_DIGIT_BITS - 1);
dd <<= 1; /* mul by 2 */
dd += ((cry_mpi_dword)*tmpt + c);
if (ch == 0 && dd < ((cry_mpi_dword)*tmpt + c))
ch = (cry_mpi_dword)1U << CRY_MPI_DIGIT_BITS;
/* store lower part */
*tmpt++ = (cry_mpi_digit)dd;
/* get the new carry */
c = ch | (dd >> CRY_MPI_DIGIT_BITS);
}
/* propagate carry */
while (c != 0) {
dd = (cry_mpi_dword)*tmpt + c;
*tmpt++ = (cry_mpi_digit)dd;
c = dd >> CRY_MPI_DIGIT_BITS;
}
}
cry_mpi_adjust(&t);
cry_mpi_swap(r, &t);
cry_mpi_clear(&t);
return res;
}
#include "mpi_pvt.h"
int cry_mpi_sqr_comba(cry_mpi *r, const cry_mpi *a)
{
int res;
size_t ix, iy, iz, tx, ty, pa;
cry_mpi_digit c0, c1, c2, t0, t1, t2, *tmpx, *tmpy;
cry_mpi tmp, *dst;
cry_mpi_dword cc;
if (cry_mpi_is_zero(a)) {
cry_mpi_zero(r);
return 0;
}
pa = a->used + a->used;
if (r == a) {
if ((res = cry_mpi_init_size(&tmp, pa)) != 0)
return res;
dst = &tmp;
} else {
if ((res = cry_mpi_grow(r, pa)) != 0)
return res;
dst = r;
}
cry_mpi_set_used(dst, pa);;
c1 = c2 = 0;
for (ix = 0; ix < pa; ix++) {
/* Get offsets into the two bignums */
ty = CRY_MIN(ix, a->used - 1);
tx = ix - ty;
/* Setup temporary aliases */
tmpx = a->data + tx;
tmpy = a->data + ty;
/* This is the number of times the loop will iterate, essentially its
* while (tx++ < a->used && ty-- >= 0) */
iy = CRY_MIN(a->used - tx, ty + 1);
/* For squaring tx can never be equal ty.
* We halve the distance since they approach at a rate of 2x and we
* have to roudnd because odd cases need to be executed. */
iy = CRY_MIN(iy, (ty - tx + 1) >> 1);
/* Shift accumulator right */
c0 = c1;
c1 = c2;
c2 = 0;
if (iy != 0) {
/* Execute loop */
t0 = t1 = t2 = 0;
for (iz = 0; iz < iy; iz++) {
MULADD(t0, t1, t2, *tmpx, *tmpy);
tmpx++;
tmpy--;
}
/* Double the inner product */
t2 = (t2 << 1) | (t1 >> (CRY_MPI_DIGIT_BITS - 1));
t1 = (t1 << 1) | (t0 >> (CRY_MPI_DIGIT_BITS - 1));
t0 <<= 1;
/* Add to accumulator */
cc = (cry_mpi_dword)c0 + t0;
c0 = (cry_mpi_digit)cc;
cc = (cry_mpi_dword)c1 + t1 + (cc >> CRY_MPI_DIGIT_BITS);
c1 = (cry_mpi_digit)cc;
cc = (cry_mpi_dword)c2 + t2 + (cc >> CRY_MPI_DIGIT_BITS);
c2 = (cry_mpi_digit)cc;
}
/* Even columns have the square term in them */
if ((ix & 1) == 0) {
tmpx = a->data + (ix >> 1);
MULADD(c0, c1, c2, *tmpx, *tmpx);
}
/* Store term */
dst->data[ix] = c0;
}
if (r != dst) {
cry_mpi_swap(r, dst);
cry_mpi_clear(dst);
}
cry_mpi_adjust(r);
return res;
}
#include "mpi_pvt.h"
int cry_mpi_sqr_karatsuba(cry_mpi *r, const cry_mpi *a)
{
int res;
size_t B, hB;
cry_mpi x0, x1, z0, z1, z2;
if (cry_mpi_is_zero(a)) {
cry_mpi_zero(r);
return 0;
}
/* Number of digits */
B = a->used;
/* Divide by two */
hB = B >> 1;
B = hB << 1;
/* Initialize all the temporaries */
CRY_CHK(res = cry_mpi_init_size(&x0, hB), e0);
CRY_CHK(res = cry_mpi_init_size(&x1, a->used - hB), e1);
CRY_CHK(res = cry_mpi_init_size(&z0, B), e2);
CRY_CHK(res = cry_mpi_init_size(&z1, B), e3);
CRY_CHK(res = cry_mpi_init_size(&z2, B), e4);
/* Manually set "in use" digits */
x0.used = hB;
x1.used = a->used - hB;
memcpy(x0.data, a->data, hB * sizeof(cry_mpi_digit));
memcpy(x1.data, a->data + hB, (a->used - hB) * sizeof(*x1.data));
/*
* Only need to clamp the lower word since by definition the upper word
* x1 have a known number of digits.
*/
cry_mpi_adjust(&x0);
CRY_CHK(res = cry_mpi_sqr(&z0, &x0), e5); /* z0 = x0**2 */
CRY_CHK(res = cry_mpi_sqr(&z2, &x1), e5); /* z2 = x1**2 */
/* Use x0 for temporary storage */
CRY_CHK(res = cry_mpi_add(&z1, &x1, &x0), e5); /* z1 = x1+x0 */
CRY_CHK(res = cry_mpi_sqr(&z1, &z1), e5); /* z1 = (x1+x0)**2 */
CRY_CHK(res = cry_mpi_add(&x0, &z0, &z2), e5); /* x0 = z0+z2 */
CRY_CHK(res = cry_mpi_sub(&z1, &z1, &x0), e5); /* z1=(x1+x0)**2-(z0+z2) */
CRY_CHK(res = cry_mpi_shld(&z1, hB), e5);
CRY_CHK(res = cry_mpi_shld(&z2, B), e5);
CRY_CHK(res = cry_mpi_add(&z1, &z0, &z1), e5);
CRY_CHK(res = cry_mpi_add(r, &z1, &z2), e5); /* r = z2<<B + z1<<hB + z0 */
e5: cry_mpi_clear(&z2);
e4: cry_mpi_clear(&z1);
e3: cry_mpi_clear(&z0);
e2: cry_mpi_clear(&x1);
e1: cry_mpi_clear(&x0);
e0: return res;
}
......@@ -19,6 +19,9 @@ objects-y := \
mpi_div.o \
mpi_div_abs.o \
mpi_sqr.o \
mpi_sqr_baseline.o \
mpi_sqr_comba.o \
mpi_sqr_karatsuba.o \
mpi_sqrt.o \
mpi_shl.o \
mpi_shr.o \
......
......@@ -55,7 +55,7 @@ coverage: test
@mkdir -p out/coverage
@lcov -q -z -d ../build
@lcov -q -c -i -d ../build -o base.info
@./test $(TESTS)
@./test -v $(TESTS)
@lcov -q -c -d ../build -o cry.info
@lcov -q -a base.info -a cry.info -o cry.info
@genhtml -q -o out/coverage cry.info
......
......@@ -267,6 +267,12 @@ static void mpi_dispatch(int argc, char *argv[])
mpi_binary_mod_op(argc, argv, cry_mpi_mod_exp);
else if (strcmp(test, "mpi_sqr") == 0)
mpi_unary_op(argc, argv, cry_mpi_sqr);
else if (strcmp(test, "mpi_sqr_baseline") == 0)
mpi_unary_op(argc, argv, cry_mpi_sqr_baseline);
else if (strcmp(test, "mpi_sqr_comba") == 0)
mpi_unary_op(argc, argv, cry_mpi_sqr_comba);
else if (strcmp(test, "mpi_sqr_karatsuba") == 0)
mpi_unary_op(argc, argv, cry_mpi_sqr_karatsuba);
else
TRACE("Test '%s' not defined\n", test);
......
......@@ -517,26 +517,99 @@ mpi_div
11e836
################################################################################
# Square
# Square (generic)
# p0 : input value as hex string
# p2 : square value
################################################################################
Sqr zero
Square zero
mpi_sqr
0
0
Sqr negative value
Square negative value
mpi_sqr
-ffffffffffffffff
fffffffffffffffe0000000000000001
Sqr test #1
Square test #1
mpi_sqr
24652d5a2646a8fd9b92c281708d3ddb
52c9cbe2beee47c72af113101cb4adb7bf95ad3ddd78ce8716026c2d2301959
################################################################################
# Square (baseline)
# p0 : input value as hex string
# p2 : square value
################################################################################
Square (baseline) zero
mpi_sqr_baseline
0
0
Square (baseline) negative value
mpi_sqr_baseline
-ffffffffffffffff
fffffffffffffffe0000000000000001
Square (baseline) test #1
mpi_sqr_baseline
24652d5a2646a8fd9b92c281708d3ddb
52c9cbe2beee47c72af113101cb4adb7bf95ad3ddd78ce8716026c2d2301959
################################################################################
# Square (Comba)
# p0 : input value as hex string
# p2 : square value
################################################################################
Square (comba) zero
mpi_sqr_comba
0
0
Square (comba) negative value
mpi_sqr_comba
-ffffffffffffffff
fffffffffffffffe0000000000000001
Square (comba) test #1
mpi_sqr_comba
24652d5a2646a8fd9b92c281708d3ddb
52c9cbe2beee47c72af113101cb4adb7bf95ad3ddd78ce8716026c2d2301959
Square (comba) test #2
mpi_sqr_comba
4b607db88599c5246805a0eee8b69eaf0c5fc3b92ba17605a2f11ebfea4c8c49e077f789bc5e13b9fa09e8b6b30ed0828de51be0e04af432816c0b0096575bd3d49491c73fbf1b9b2792587b8571086e23f2f68a5bfd8341b2f607c68700a27dcd610f9ae972dcb34a014b46cda68bb8b32790eb7ac476bde9c7edc2be5f2ff1f940c743bcc8a56fe71558ad93e1951555b4d6f2e0d46eaad63f40bf6b92d234fa74e1a7cb2eb760abdd2c6db1eaeea51519dd972f8c589cfbe36e33c1f2ea96c6d72b5faf0e3752848259fec37843220130c4a7c122684d4c10c0eb75514c601db8a27b9715d143c29e305a5a13f621686290467b313ad68d5e249b40d48a883d38240c52c740411d514a30f17640fbcfd5bb824ba578ffa607ee12cb99c43f11e647585aec12da7fa5e87a5003e8c7639d573f8a44526c4f18a5c582364122c0e10d02803beb1c1caa0a446c56e722ee933e7d262c49a6c35e5f8ad43784d21ace6c2cecff5dddb9cf87750c9301ef49eaffc6d4fdc1703836becc4002a91c6ee9a9a402b0a20273f410e9a408f905
1631ae08a66a044fad8b65a4c8356ddb22ede5a07880d311aa3e448b020b5bbf57cecd830a6b72542fbe6d1396fbba60200177ceae5b33e152750800fd8b68e2f8773c487fc6aee757a224f8e591dabf57be3b5dc2a9e593bbbe06f5030a5b01c72bff6e01cdb71d6d996b024ec153fc7c9206cc24d577d39c521069df6a1406bc621d1631f875783e3633699adfc857b37e8f6ea4f6b61d2fb1748754ce757e9521a462d8f12babe256d05bec95f1968830eeb6b34790a1c1d0e5809e2c5eff2dbc51c074f289fa3b414225808286c92f355eed78e6503d7047f8a11f7dc42a886cea13b8b16fd2918fbf6b75a1d804a5cacca534c542dc982ae9f431a439f9fbd1db5051eaa8971a26482f7e113299f9b1b8dc6649cffe38f43655344dea03fd5ca46f37d70fe10acad498cec869d1b4a37e2789dd06843085b1dca7b9c8f918da4f21bf4e00ac9fad25ad8785d9aad8808758c6d70bd5a7ff88644e6efa4c1ae2d1ac467750b7ed8ef958159c7057970bd8ddcbcb916fec2d83dc25cf060add8a2b4bb7363bade362ec847305000498b66e04529f626d0836db490542aee397eface90108a1d1270c78b6002ca4b731d8e069d22222a44def7023a17552199081824360b75f00bfe800890591789db6ec0e053ad3a0f86913fc21136371238750faa2b79bc4a2ec6179957cf67545ac919ae0189ea63ab6549c4ae91f5ab5ee94aae2a5fb6a57a8de71d26c083c1452969182e53b0c9928b984a8a3919729a97ca6770a72029e3c3e5ffe6015ea81948871ec1758f9060319fb70f368acbf4c4dc0b7a6eb658d4dbd1a0573743a23d314ecf6a49acf7ea747126993ddf948ff44413f33c5258554686ce52f4ed5ea86703bcce7fb1589b5321e6890b1fef54bc6da3407c6b3e6365bfd7c35d4dc81e61c92861c434fe487af9bdbe1ee4f1498d7a70e150db3efc05564eacace5a7ea43f07d4aa15fe14c6163bb99d3d88a5f3d014a07925dbc62b0c68dbd25176f7f3918896922f25864038c5aebbe426fb1bbf64964b2404f7d9128dfd1742ad8c08d6d5e6af5a235ed3bbdcb9c245d83714788652b9757e3e5bca63c1b3d7e1fd6cf07fc68e0d02a650196a78ea8aba19
################################################################################
# Square (Karatsuba)
# p0 : input value as hex string
# p2 : square value
################################################################################
Square (karatsuba) zero
mpi_sqr_karatsuba
0
0
Square (karatsuba) negative value
mpi_sqr_karatsuba
-ffffffffffffffff
fffffffffffffffe0000000000000001
Square (karatsuba) test #1
mpi_sqr_karatsuba
24652d5a2646a8fd9b92c281708d3ddb
52c9cbe2beee47c72af113101cb4adb7bf95ad3ddd78ce8716026c2d2301959
Square (karatsuba) 6400 bits
mpi_sqr_karatsuba
c92e20b990846a1037594736fd02ce34f1776d4c019b0d31c4b5bdb0df568ea2292223e70d6dad392d466500c8d18c1ea21b6433767cc0fa6d4ed344ae38b684c12ab8f06ee44c787e499fda204c4cbb835439ca93827b8f7449ccb624ec1b77837f6cc1d7ac8f8f607820a9bf210d38fa7af0abf8ca93bf9fbfa279246ff36aca41a2bcdf674b2e58e2bee6e1e565b279e4495980c871bc6b29eca61211ddef99c683ce43cddbd1b82a47b1d6d8aedb362acd86af6470be5f9345b4cfa744264e074a60275e81de16583688ca082b5f166b2ba8ed6fb23c88ba46c9a8ed0791628abab6bcc7291100ef301af30913f19e03f2d8c1be35972866aaf639cd7653f9944da1d1cd0174acd428b6859a7f65dfae0c2730bec684b08b8c687b0ca7b47e202c939850b0276062906dd331019190adfa8583882ee4f71584d3b2f866ad7218028eabec5a6909dfbcac7f3082da1b6c22e8fca64ee2544c52fb90a00311cd1d720aaa6bed75d056f8f289266a4e7dd45bdbe5ce97e9320ae5c8860fa2139ef621f88db43ee9d1994f431d4992d54b607db88599c5246805a0eee8b69eaf0c5fc3b92ba17605a2f11ebfea4c8c49e077f789bc5e13b9fa09e8b6b30ed0828de51be0e04af432816c0b0096575bd3d49491c73fbf1b9b2792587b8571086e23f2f68a5bfd8341b2f607c68700a27dcd610f9ae972dcb34a014b46cda68bb8b32790eb7ac476bde9c7edc2be5f2ff1f940c743bcc8a56fe71558ad93e1951555b4d6f2e0d46eaad63f40bf6b92d234fa74e1a7cb2eb760abdd2c6db1eaeea51519dd972f8c589cfbe36e33c1f2ea96c6d72b5faf0e3752848259fec37843220130c4a7c122684d4c10c0eb75514c601db8a27b9715d143c29e305a5a13f621686290467b313ad68d5e249b40d48a883d38240c52c740411d514a30f17640fbcfd5bb824ba578ffa607ee12cb99c43f11e647585aec12da7fa5e87a5003e8c7639d573f8a44526c4f18a5c582364122c0e10d02803beb1c1caa0a446c56e722ee933e7d262c49a6c35e5f8ad43784d21ace6c2cecff5dddb9cf87750c9301ef49eaffc6d4fdc1703836becc4002a91c6ee9a9a402b0a20273f410e9a408f905
9e1977b32bcec8c6e038ab000cd6f497d3419471dde4b01aadfe67c1240bd464657c4b892fe14222402c11fda5d6ac2b2ef94dbcc18f32c5ca8ae32a24210bd842a1c3e1821d5e0ad68e5f7ad6fd8009454dca6ede906455620cd6a8e8434b50403f115671cd667fb736971c42c9b0b843010f68276c267f34f10fc2c891d5854e34aa00703d5d87e81f56efafcc1ec6029c69cea20b61faa409c085653ce9464d1aebf1a462c4b7385ab13ca44f92715866ce0d64fbb9ced45efab94ee976acc29158268e9e4b95e3134fad1405432d793ac014d2b94c6679c4a9845ba243622a756c4698571210fe846d91920f86ef8c564d0207d77c512883db0f26f272fc13864cdda0d587a88e026776f31f61927aea7db2e0b57236a4403f6503cde7042f83828c5dec82c4aca9ea53666bf2598b5aac267edb3edf78a623617b4746d2f093f5edf90885259ec29bdf9f18d63d1550822671c3c8c0feec66a048cf5016aec73e08d09c9d0dbbcedc4fb62cc5221d6998924e28fd11a74321916a5188fda7660785c512043b96a068223c904ddff6655be46b7cd95547e8d867e127689e468485e779271921ce5e21edc0d73d1b02b28795aa9c9dd1de1ea29b449571255c850be3fdde25bc71ff1497024162fecf731d1bcd396d9083b8b480b5440b577b23713a81036294d76f088380ba9dd5d9d7b5b2c7e9e896085c146ef062a9def9f934765180e6ee3f3426b2cd53f3bca5e077273fd70b7ae648698d7a4c6cb78c3923107e4906d5a84cc157ef31612af265e05812f33d9c743be150c22c34138380bf5ba8d14cd75fbc4fbb886089afae2ba1c113b04ad0e9b721354a7f7e49c15e0a8db2ba1ec6dafcbf04124bff0c9e924b1d494384d0e56c5d722cee5797e2885fe8899a90a2657132a21e63d7a83bf83a818f7287dabbc791694553fd177b9e03eb7518231414e09497ea568b96f24784fb16909ea793903881f62fa19e6bd66424da61aeb6f51622aafbcaa10f04b8b11506ae3d9ffde66bc29b7e65f1b6ee04eda1cf234656d443b34601a71cdf1477947b91a006f2bf87491174fb3dfd6ef1764063cad433e8265433bec347fd84c1a3193449f3fd58f58609a0b265eeefb85df63ed811724f74792f6bb0845329dd160e340361ea38aa24bab94886d3b89253356e76b21ceebbf8099369423d0e06a4effbe28e4c3f5f8444834d560279cc9d8602c99fc8e26c5de2c8f4effab6974a4d847b0049eb9fa305bdb805f5a31dab955ce2e3d1a081d0bde90d619a004270aff3a683804b4e240fb6e92a4809e4726ba08bec2f2cb227b3c3123b31bc2ca41a43dcc31bd128f85277173d2351926f5b4d252f54090f6d53d0e76d5db15c4e868b13793eae792989c38c81093a17b1a37f39ade2f126fad568398312c655e8e30c1ba47519a7f7ce845895a0216df70889e94e7ed5baf3cb7ae22698a9aa7b3d726df14fccec390bced9488223739bbaab2e11529606e87e6b1cfd6adbec010f162016c135d2b717ccb43d901679890c64b827d27cb2dd77b5e9d033dea5817b86aabe0417bab93dd7253ae96139393e8cb1ac66e90f8390f89268a0ffb88d50f9da3e31ab1f8022f5a75515449cbb83cfeafe088817f87a70d1c53ce010f18d70085a9a5b5f5b04ed578d45a2cc4c48e38b0307989490ccd7165698b66e04529f626d0836db490542aee397eface90108a1d1270c78b6002ca4b731d8e069d22222a44def7023a17552199081824360b75f00bfe800890591789db6ec0e053ad3a0f86913fc21136371238750faa2b79bc4a2ec6179957cf67545ac919ae0189ea63ab6549c4ae91f5ab5ee94aae2a5fb6a57a8de71d26c083c1452969182e53b0c9928b984a8a3919729a97ca6770a72029e3c3e5ffe6015ea81948871ec1758f9060319fb70f368acbf4c4dc0b7a6eb658d4dbd1a0573743a23d314ecf6a49acf7ea747126993ddf948ff44413f33c5258554686ce52f4ed5ea86703bcce7fb1589b5321e6890b1fef54bc6da3407c6b3e6365bfd7c35d4dc81e61c92861c434fe487af9bdbe1ee4f1498d7a70e150db3efc05564eacace5a7ea43f07d4aa15fe14c6163bb99d3d88a5f3d014a07925dbc62b0c68dbd25176f7f3918896922f25864038c5aebbe426fb1bbf64964b2404f7d9128dfd1742ad8c08d6d5e6af5a235ed3bbdcb9c245d83714788652b9757e3e5bca63c1b3d7e1fd6cf07fc68e0d02a650196a78ea8aba19
################################################################################
# Exponentiation (Binary)
# p0 : base
......