1239 lines
32 KiB
D
1239 lines
32 KiB
D
module angel.utils.cryptography.fieldelem;
|
|
|
|
import angel.utils.cryptography.bitmanip;
|
|
import angel.utils.cryptography.utils;
|
|
|
|
@safe nothrow @nogc:
|
|
|
|
/// fe means field element.
|
|
/// Here the field is \Z/(2^255-19).
|
|
/// An element t, entries t[0]...t[9], represents the integer
|
|
/// t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
|
|
/// Bounds on each t[i] vary depending on context.
|
|
|
|
@safe
|
|
struct fe {
|
|
|
|
enum fe zero = 0;
|
|
enum fe one = [1,0,0,0,0,0,0,0,0,0];
|
|
|
|
private uint[10] value;
|
|
private alias value this;
|
|
|
|
// this(in ref uint[10] v) {
|
|
// value = v;
|
|
// }
|
|
nothrow @nogc:
|
|
|
|
/// Create fe from 32 bytes.
|
|
static fe fromBytes(in ubyte[] bytes)
|
|
in {
|
|
assert(bytes.length == 32, "bytes.length must be 32.");
|
|
} body {
|
|
return fe_frombytes(bytes);
|
|
}
|
|
|
|
this(in uint[] v)
|
|
in {
|
|
assert(v.length == 10, "v.length must be 10.");
|
|
} body {
|
|
value = v;
|
|
}
|
|
|
|
// TODO remove
|
|
this(in uint v)
|
|
{
|
|
value[] = v;
|
|
}
|
|
|
|
fe opAssign(in ref uint[10] v) {
|
|
value = v;
|
|
return this;
|
|
}
|
|
|
|
/// Comparing in constant time.
|
|
bool opEquals()(auto ref const fe rhs) const {
|
|
return crypto_equals(this.value, rhs.value);
|
|
}
|
|
|
|
fe opBinary(string op)(auto ref const fe rhs) const pure
|
|
if (op == "+" || op == "-" || op == "*")
|
|
{
|
|
static if(op == "+") {
|
|
fe tmp;
|
|
tmp.value[] = this.value[] + rhs.value[];
|
|
return tmp;
|
|
} else static if(op == "-") {
|
|
fe tmp;
|
|
tmp.value[] = this.value[] - rhs.value[];
|
|
return tmp;
|
|
} else static if(op == "*") {
|
|
return fe_mul(this, rhs);
|
|
}
|
|
}
|
|
|
|
fe opUnary(string op)() const
|
|
if(op == "-")
|
|
{
|
|
static if(op == "-") {
|
|
fe tmp;
|
|
tmp.value[] = -this.value[];
|
|
return tmp;
|
|
}
|
|
}
|
|
|
|
ref fe opOpAssign(string op)(auto ref const fe rhs) pure
|
|
if (op == "+" || op == "-" || op == "*")
|
|
{
|
|
static if(op == "+") {
|
|
value[] += rhs.value[];
|
|
} else static if(op == "-") {
|
|
value[] -= rhs.value[];
|
|
} else static if(op == "*") {
|
|
this = this * rhs;
|
|
}
|
|
|
|
return this;
|
|
}
|
|
|
|
/*
|
|
return 1 if f == 0
|
|
return 0 if f != 0
|
|
|
|
Preconditions:
|
|
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
|
*/
|
|
@property
|
|
bool isNonzero() const
|
|
{
|
|
immutable ubyte[32] zero = 0;
|
|
return !crypto_equals(fe_tobytes(this), zero);
|
|
}
|
|
|
|
/**
|
|
return 1 if f is in {1,3,5,...,q-2}
|
|
return 0 if f is in {0,2,4,...,q-1}
|
|
|
|
Preconditions:
|
|
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
|
*/
|
|
@property
|
|
bool isNegative() const
|
|
{
|
|
return (toBytes[0] & 1) == 1;
|
|
}
|
|
|
|
@property
|
|
fe inverse() const {
|
|
return fe_invert(this);
|
|
}
|
|
|
|
/// Changes the sign.
|
|
void negate() {
|
|
value[] = -value[];
|
|
}
|
|
|
|
@property
|
|
ubyte[32] toBytes() const {
|
|
return fe_tobytes(this);
|
|
}
|
|
|
|
/// Params:
|
|
/// repeat = Repeat squaring n times.
|
|
/// Returns: f^2 or f^(2^repeat).
|
|
@property
|
|
fe sq(uint repeat = 1)() const pure
|
|
if(repeat > 0)
|
|
{
|
|
fe f = fe_sq(this);
|
|
|
|
static if(repeat > 1) {
|
|
foreach(i; 1..repeat) {
|
|
f = f.sq!1;
|
|
}
|
|
}
|
|
|
|
return f;
|
|
}
|
|
|
|
/// Returns: 2*f*f
|
|
@property
|
|
fe sq2() const pure {
|
|
return fe_sq2(this);
|
|
}
|
|
|
|
/// Power by squaring in constant time.
|
|
/// Returns f^power
|
|
@property
|
|
fe cpow(uint power)() const {
|
|
fe r = fe.one;
|
|
fe sq = this;
|
|
for(uint p = power; p > 0; p >>= 1) {
|
|
if((p & 1) == 1) {
|
|
r *= sq;
|
|
}
|
|
sq = sq.sq;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
///// h = f + g
|
|
///// Can overlap h with f or g.
|
|
/////
|
|
///// Preconditions:
|
|
///// |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
|
///// |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
|
/////
|
|
///// Postconditions:
|
|
///// |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
|
//void fe_add(ref fe h, in ref fe f, in ref fe g)
|
|
//{
|
|
// //h[] = f[] + g[];
|
|
// foreach(i; 0..10) {
|
|
// h[i] = f[i] + g[i];
|
|
// }
|
|
//}
|
|
|
|
|
|
/// Conditional move.
|
|
/// Replace (f,g) with (g,g) if b == 1;
|
|
/// replace (f,g) with (f,g) if b == 0.
|
|
///
|
|
/// Params:
|
|
/// dest = Destination.
|
|
/// src = Source.
|
|
/// condition = Condition.
|
|
void fe_cmov(ref fe dest, in ref fe src, in bool condition)
|
|
in {
|
|
assert(condition == 0 || condition == 1);
|
|
} out {
|
|
if(condition == 1) {
|
|
assert(dest == src);
|
|
}
|
|
} body {
|
|
immutable uint mask = -(cast(int)condition);
|
|
|
|
assert((condition == 0 && mask == 0) || (condition == 1 && mask == 0xFFFFFFFF));
|
|
|
|
dest[] ^= mask & dest[];
|
|
dest[] ^= mask & src[];
|
|
}
|
|
|
|
// test conditional move
|
|
unittest {
|
|
import std.algorithm: all;
|
|
|
|
fe a, b;
|
|
a[] = 13;
|
|
b[] = 42;
|
|
|
|
assert(all!"a == 13"(a[]));
|
|
assert(all!"a == 42"(b[]));
|
|
|
|
fe_cmov(a, b, 0);
|
|
|
|
assert(all!"a == 13"(a[]));
|
|
assert(all!"a == 42"(b[]));
|
|
|
|
fe_cmov(a, b, 1);
|
|
|
|
assert(all!"a == 42"(a[]));
|
|
}
|
|
|
|
ulong load_3(in ubyte[] inp) pure
|
|
in {
|
|
assert(inp.length == 3);
|
|
} body {
|
|
ulong result;
|
|
result = cast(ulong) inp[0];
|
|
result |= (cast(ulong) inp[1]) << 8;
|
|
result |= (cast(ulong) inp[2]) << 16;
|
|
return result;
|
|
}
|
|
|
|
ulong load_4(in ubyte[] inp) pure
|
|
in {
|
|
assert(inp.length == 4);
|
|
} body {
|
|
ulong result;
|
|
result = cast(ulong) inp[0];
|
|
result |= (cast(ulong) inp[1]) << 8;
|
|
result |= (cast(ulong) inp[2]) << 16;
|
|
result |= (cast(ulong) inp[3]) << 24;
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
Ignores top bit of h.
|
|
*/
|
|
private fe fe_frombytes(in ubyte[] s) pure
|
|
in {
|
|
assert(s.length == 32);
|
|
} body {
|
|
long h0 = load_4(s[0..4]);
|
|
long h1 = load_3(s[4..7]) << 6;
|
|
long h2 = load_3(s[7..10]) << 5;
|
|
long h3 = load_3(s[10..13]) << 3;
|
|
long h4 = load_3(s[13..16]) << 2;
|
|
long h5 = load_4(s[16..20]);
|
|
long h6 = load_3(s[20..23]) << 7;
|
|
long h7 = load_3(s[23..26]) << 5;
|
|
long h8 = load_3(s[26..29]) << 4;
|
|
long h9 = (load_3(s[29..32]) & 8388607) << 2;
|
|
|
|
long carry0;
|
|
long carry1;
|
|
long carry2;
|
|
long carry3;
|
|
long carry4;
|
|
long carry5;
|
|
long carry6;
|
|
long carry7;
|
|
long carry8;
|
|
long carry9;
|
|
|
|
carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25);
|
|
carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25);
|
|
carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25);
|
|
carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25);
|
|
carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25);
|
|
|
|
carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
|
|
carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26);
|
|
carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
|
|
carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26);
|
|
carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26);
|
|
|
|
fe h;
|
|
h[0] = cast(int) h0;
|
|
h[1] = cast(int) h1;
|
|
h[2] = cast(int) h2;
|
|
h[3] = cast(int) h3;
|
|
h[4] = cast(int) h4;
|
|
h[5] = cast(int) h5;
|
|
h[6] = cast(int) h6;
|
|
h[7] = cast(int) h7;
|
|
h[8] = cast(int) h8;
|
|
h[9] = cast(int) h9;
|
|
return h;
|
|
}
|
|
|
|
// TODO replace all SHL* with <<
|
|
long SHL64(in long val, in uint shift) pure nothrow @nogc {
|
|
return cast(long)(cast(ulong) val << shift);
|
|
}
|
|
|
|
int SHL32(in int val, in uint shift) pure nothrow @nogc {
|
|
return cast(int)(cast(uint) val << shift);
|
|
}
|
|
|
|
int SHL8(in byte val, in uint shift) pure nothrow @nogc {
|
|
return cast(byte)(cast(ubyte) val << shift);
|
|
}
|
|
|
|
unittest {
|
|
assert(cast(int)(0xFFFFFFFF) << 7 == SHL32(0xFFFFFFFF, 7));
|
|
}
|
|
|
|
private fe fe_invert(in ref fe z)
|
|
{
|
|
fe t0;
|
|
fe t1;
|
|
fe t2;
|
|
fe t3;
|
|
// pow225521
|
|
t0 = z.sq;
|
|
t1 = t0.sq.sq;
|
|
|
|
t1 *= z;
|
|
t0 *= t1;
|
|
t2 = t0.sq;
|
|
t1 *= t2;
|
|
|
|
t2 = t1.sq!5;
|
|
|
|
t1 *= t2;
|
|
t2 = t1.sq!10;
|
|
|
|
t2 *= t1;
|
|
|
|
t3 = t2.sq!20;
|
|
|
|
t2 *= t3;
|
|
t2 = t2.sq!10;
|
|
|
|
t1 *= t2;
|
|
|
|
t2 = t1.sq!50;
|
|
|
|
t2 *= t1;
|
|
t3 = t2.sq!100;
|
|
|
|
t2 *= t3;
|
|
t2 = t2.sq!50;
|
|
|
|
t1 *= t2;
|
|
|
|
t1 = t1.sq!5;
|
|
|
|
return t1 * t0;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
Returns: h = f * g
|
|
Can overlap h with f or g.
|
|
|
|
Preconditions:
|
|
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
|
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
|
|
|
Postconditions:
|
|
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
|
|
|
|
Note:
|
|
Notes on implementation strategy:
|
|
|
|
Using schoolbook multiplication.
|
|
Karatsuba would save a little in some cost models.
|
|
|
|
Most multiplications by 2 and 19 are 32-bit precomputations;
|
|
cheaper than 64-bit postcomputations.
|
|
|
|
There is one remaining multiplication by 19 in the carry chain;
|
|
one *19 precomputation can be merged into this,
|
|
but the resulting data flow is considerably less clean.
|
|
|
|
There are 12 carries below.
|
|
10 of them are 2-way parallelizable and vectorizable.
|
|
Can get away with 11 carries, but then data flow is much deeper.
|
|
|
|
With tighter constraints on inputs can squeeze carries into int32.
|
|
*/
|
|
private fe fe_mul(in ref fe f, in ref fe g) pure
|
|
{
|
|
int f0 = f[0];
|
|
int f1 = f[1];
|
|
int f2 = f[2];
|
|
int f3 = f[3];
|
|
int f4 = f[4];
|
|
int f5 = f[5];
|
|
int f6 = f[6];
|
|
int f7 = f[7];
|
|
int f8 = f[8];
|
|
int f9 = f[9];
|
|
int g0 = g[0];
|
|
int g1 = g[1];
|
|
int g2 = g[2];
|
|
int g3 = g[3];
|
|
int g4 = g[4];
|
|
int g5 = g[5];
|
|
int g6 = g[6];
|
|
int g7 = g[7];
|
|
int g8 = g[8];
|
|
int g9 = g[9];
|
|
int g1_19 = 19 * g1; /* 1.959375*2^29 */
|
|
int g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
|
|
int g3_19 = 19 * g3;
|
|
int g4_19 = 19 * g4;
|
|
int g5_19 = 19 * g5;
|
|
int g6_19 = 19 * g6;
|
|
int g7_19 = 19 * g7;
|
|
int g8_19 = 19 * g8;
|
|
int g9_19 = 19 * g9;
|
|
int f1_2 = 2 * f1;
|
|
int f3_2 = 2 * f3;
|
|
int f5_2 = 2 * f5;
|
|
int f7_2 = 2 * f7;
|
|
int f9_2 = 2 * f9;
|
|
long f0g0 = f0 * cast(long) g0;
|
|
long f0g1 = f0 * cast(long) g1;
|
|
long f0g2 = f0 * cast(long) g2;
|
|
long f0g3 = f0 * cast(long) g3;
|
|
long f0g4 = f0 * cast(long) g4;
|
|
long f0g5 = f0 * cast(long) g5;
|
|
long f0g6 = f0 * cast(long) g6;
|
|
long f0g7 = f0 * cast(long) g7;
|
|
long f0g8 = f0 * cast(long) g8;
|
|
long f0g9 = f0 * cast(long) g9;
|
|
long f1g0 = f1 * cast(long) g0;
|
|
long f1g1_2 = f1_2 * cast(long) g1;
|
|
long f1g2 = f1 * cast(long) g2;
|
|
long f1g3_2 = f1_2 * cast(long) g3;
|
|
long f1g4 = f1 * cast(long) g4;
|
|
long f1g5_2 = f1_2 * cast(long) g5;
|
|
long f1g6 = f1 * cast(long) g6;
|
|
long f1g7_2 = f1_2 * cast(long) g7;
|
|
long f1g8 = f1 * cast(long) g8;
|
|
long f1g9_38 = f1_2 * cast(long) g9_19;
|
|
long f2g0 = f2 * cast(long) g0;
|
|
long f2g1 = f2 * cast(long) g1;
|
|
long f2g2 = f2 * cast(long) g2;
|
|
long f2g3 = f2 * cast(long) g3;
|
|
long f2g4 = f2 * cast(long) g4;
|
|
long f2g5 = f2 * cast(long) g5;
|
|
long f2g6 = f2 * cast(long) g6;
|
|
long f2g7 = f2 * cast(long) g7;
|
|
long f2g8_19 = f2 * cast(long) g8_19;
|
|
long f2g9_19 = f2 * cast(long) g9_19;
|
|
long f3g0 = f3 * cast(long) g0;
|
|
long f3g1_2 = f3_2 * cast(long) g1;
|
|
long f3g2 = f3 * cast(long) g2;
|
|
long f3g3_2 = f3_2 * cast(long) g3;
|
|
long f3g4 = f3 * cast(long) g4;
|
|
long f3g5_2 = f3_2 * cast(long) g5;
|
|
long f3g6 = f3 * cast(long) g6;
|
|
long f3g7_38 = f3_2 * cast(long) g7_19;
|
|
long f3g8_19 = f3 * cast(long) g8_19;
|
|
long f3g9_38 = f3_2 * cast(long) g9_19;
|
|
long f4g0 = f4 * cast(long) g0;
|
|
long f4g1 = f4 * cast(long) g1;
|
|
long f4g2 = f4 * cast(long) g2;
|
|
long f4g3 = f4 * cast(long) g3;
|
|
long f4g4 = f4 * cast(long) g4;
|
|
long f4g5 = f4 * cast(long) g5;
|
|
long f4g6_19 = f4 * cast(long) g6_19;
|
|
long f4g7_19 = f4 * cast(long) g7_19;
|
|
long f4g8_19 = f4 * cast(long) g8_19;
|
|
long f4g9_19 = f4 * cast(long) g9_19;
|
|
long f5g0 = f5 * cast(long) g0;
|
|
long f5g1_2 = f5_2 * cast(long) g1;
|
|
long f5g2 = f5 * cast(long) g2;
|
|
long f5g3_2 = f5_2 * cast(long) g3;
|
|
long f5g4 = f5 * cast(long) g4;
|
|
long f5g5_38 = f5_2 * cast(long) g5_19;
|
|
long f5g6_19 = f5 * cast(long) g6_19;
|
|
long f5g7_38 = f5_2 * cast(long) g7_19;
|
|
long f5g8_19 = f5 * cast(long) g8_19;
|
|
long f5g9_38 = f5_2 * cast(long) g9_19;
|
|
long f6g0 = f6 * cast(long) g0;
|
|
long f6g1 = f6 * cast(long) g1;
|
|
long f6g2 = f6 * cast(long) g2;
|
|
long f6g3 = f6 * cast(long) g3;
|
|
long f6g4_19 = f6 * cast(long) g4_19;
|
|
long f6g5_19 = f6 * cast(long) g5_19;
|
|
long f6g6_19 = f6 * cast(long) g6_19;
|
|
long f6g7_19 = f6 * cast(long) g7_19;
|
|
long f6g8_19 = f6 * cast(long) g8_19;
|
|
long f6g9_19 = f6 * cast(long) g9_19;
|
|
long f7g0 = f7 * cast(long) g0;
|
|
long f7g1_2 = f7_2 * cast(long) g1;
|
|
long f7g2 = f7 * cast(long) g2;
|
|
long f7g3_38 = f7_2 * cast(long) g3_19;
|
|
long f7g4_19 = f7 * cast(long) g4_19;
|
|
long f7g5_38 = f7_2 * cast(long) g5_19;
|
|
long f7g6_19 = f7 * cast(long) g6_19;
|
|
long f7g7_38 = f7_2 * cast(long) g7_19;
|
|
long f7g8_19 = f7 * cast(long) g8_19;
|
|
long f7g9_38 = f7_2 * cast(long) g9_19;
|
|
long f8g0 = f8 * cast(long) g0;
|
|
long f8g1 = f8 * cast(long) g1;
|
|
long f8g2_19 = f8 * cast(long) g2_19;
|
|
long f8g3_19 = f8 * cast(long) g3_19;
|
|
long f8g4_19 = f8 * cast(long) g4_19;
|
|
long f8g5_19 = f8 * cast(long) g5_19;
|
|
long f8g6_19 = f8 * cast(long) g6_19;
|
|
long f8g7_19 = f8 * cast(long) g7_19;
|
|
long f8g8_19 = f8 * cast(long) g8_19;
|
|
long f8g9_19 = f8 * cast(long) g9_19;
|
|
long f9g0 = f9 * cast(long) g0;
|
|
long f9g1_38 = f9_2 * cast(long) g1_19;
|
|
long f9g2_19 = f9 * cast(long) g2_19;
|
|
long f9g3_38 = f9_2 * cast(long) g3_19;
|
|
long f9g4_19 = f9 * cast(long) g4_19;
|
|
long f9g5_38 = f9_2 * cast(long) g5_19;
|
|
long f9g6_19 = f9 * cast(long) g6_19;
|
|
long f9g7_38 = f9_2 * cast(long) g7_19;
|
|
long f9g8_19 = f9 * cast(long) g8_19;
|
|
long f9g9_38 = f9_2 * cast(long) g9_19;
|
|
long h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
|
|
long h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
|
|
long h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
|
|
long h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
|
|
long h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
|
|
long h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
|
|
long h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
|
|
long h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
|
|
long h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
|
|
long h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
|
|
long carry0;
|
|
long carry1;
|
|
long carry2;
|
|
long carry3;
|
|
long carry4;
|
|
long carry5;
|
|
long carry6;
|
|
long carry7;
|
|
long carry8;
|
|
long carry9;
|
|
|
|
/*
|
|
|h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
|
|
i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
|
|
|h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
|
|
i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
|
|
*/
|
|
|
|
carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
|
|
carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
|
|
/* |h0| <= 2^25 */
|
|
/* |h4| <= 2^25 */
|
|
/* |h1| <= 1.71*2^59 */
|
|
/* |h5| <= 1.71*2^59 */
|
|
|
|
carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25);
|
|
carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25);
|
|
/* |h1| <= 2^24; from now on fits into int32 */
|
|
/* |h5| <= 2^24; from now on fits into int32 */
|
|
/* |h2| <= 1.41*2^60 */
|
|
/* |h6| <= 1.41*2^60 */
|
|
|
|
carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26);
|
|
carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26);
|
|
/* |h2| <= 2^25; from now on fits into int32 unchanged */
|
|
/* |h6| <= 2^25; from now on fits into int32 unchanged */
|
|
/* |h3| <= 1.71*2^59 */
|
|
/* |h7| <= 1.71*2^59 */
|
|
|
|
carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25);
|
|
carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25);
|
|
/* |h3| <= 2^24; from now on fits into int32 unchanged */
|
|
/* |h7| <= 2^24; from now on fits into int32 unchanged */
|
|
/* |h4| <= 1.72*2^34 */
|
|
/* |h8| <= 1.41*2^60 */
|
|
|
|
carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
|
|
carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26);
|
|
/* |h4| <= 2^25; from now on fits into int32 unchanged */
|
|
/* |h8| <= 2^25; from now on fits into int32 unchanged */
|
|
/* |h5| <= 1.01*2^24 */
|
|
/* |h9| <= 1.71*2^59 */
|
|
|
|
carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25);
|
|
/* |h9| <= 2^24; from now on fits into int32 unchanged */
|
|
/* |h0| <= 1.1*2^39 */
|
|
|
|
carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
|
|
/* |h0| <= 2^25; from now on fits into int32 unchanged */
|
|
/* |h1| <= 1.01*2^24 */
|
|
fe h;
|
|
h[0] = cast(int) h0;
|
|
h[1] = cast(int) h1;
|
|
h[2] = cast(int) h2;
|
|
h[3] = cast(int) h3;
|
|
h[4] = cast(int) h4;
|
|
h[5] = cast(int) h5;
|
|
h[6] = cast(int) h6;
|
|
h[7] = cast(int) h7;
|
|
h[8] = cast(int) h8;
|
|
h[9] = cast(int) h9;
|
|
|
|
return h;
|
|
}
|
|
|
|
|
|
/// Returns f * 121666
|
|
///
|
|
/// Preconditions:
|
|
/// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
|
///
|
|
/// Postconditions:
|
|
/// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
|
public fe fe_mul121666(in ref fe f) pure
|
|
{
|
|
int f0 = f[0];
|
|
int f1 = f[1];
|
|
int f2 = f[2];
|
|
int f3 = f[3];
|
|
int f4 = f[4];
|
|
int f5 = f[5];
|
|
int f6 = f[6];
|
|
int f7 = f[7];
|
|
int f8 = f[8];
|
|
int f9 = f[9];
|
|
long h0 = f0 * cast(long) 121666;
|
|
long h1 = f1 * cast(long) 121666;
|
|
long h2 = f2 * cast(long) 121666;
|
|
long h3 = f3 * cast(long) 121666;
|
|
long h4 = f4 * cast(long) 121666;
|
|
long h5 = f5 * cast(long) 121666;
|
|
long h6 = f6 * cast(long) 121666;
|
|
long h7 = f7 * cast(long) 121666;
|
|
long h8 = f8 * cast(long) 121666;
|
|
long h9 = f9 * cast(long) 121666;
|
|
long carry0;
|
|
long carry1;
|
|
long carry2;
|
|
long carry3;
|
|
long carry4;
|
|
long carry5;
|
|
long carry6;
|
|
long carry7;
|
|
long carry8;
|
|
long carry9;
|
|
|
|
carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
|
carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
|
carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
|
carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
|
carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
|
|
|
carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
|
carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
|
carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
|
carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
|
carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
|
|
|
fe h;
|
|
h[0] = cast(uint) h0;
|
|
h[1] = cast(uint) h1;
|
|
h[2] = cast(uint) h2;
|
|
h[3] = cast(uint) h3;
|
|
h[4] = cast(uint) h4;
|
|
h[5] = cast(uint) h5;
|
|
h[6] = cast(uint) h6;
|
|
h[7] = cast(uint) h7;
|
|
h[8] = cast(uint) h8;
|
|
h[9] = cast(uint) h9;
|
|
|
|
return h;
|
|
}
|
|
|
|
/// Returns: z^(2^(225-23))
|
|
package fe fe_pow22523(in ref fe z) pure
|
|
{
|
|
fe t0;
|
|
fe t1;
|
|
fe t2;
|
|
t0 = z.sq;
|
|
t1 = t0.sq.sq;
|
|
|
|
t1 *= z;
|
|
t0 *= t1;
|
|
t0 = t0.sq;
|
|
t0 *= t1;
|
|
|
|
// t1 = z.cpow!9;
|
|
// t0 = z.cpow!31;
|
|
|
|
t1 = t0.sq!5;
|
|
|
|
t0 *= t1;
|
|
|
|
t1 = t0.sq!10;
|
|
|
|
t1 *= t0;
|
|
|
|
t2 = t1.sq!20;
|
|
|
|
t1 *= t2;
|
|
|
|
t1 = t1.sq!10;
|
|
|
|
t0 *= t1;
|
|
|
|
t1 = t0.sq!50;
|
|
|
|
t1 *= t0;
|
|
|
|
t2 = t1.sq!100;
|
|
|
|
t1 *= t2;
|
|
|
|
t1 = t1.sq!50;
|
|
|
|
t0 *= t1;
|
|
t0 = t0.sq.sq;
|
|
|
|
return t0 * z;
|
|
}
|
|
|
|
|
|
/**
|
|
Returns: h = f * f
|
|
Can overlap h with f.
|
|
|
|
Preconditions:
|
|
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
|
|
|
Postconditions:
|
|
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
|
|
|
|
Note:
|
|
See fe_mul.c for discussion of implementation strategy.
|
|
*/
|
|
private fe fe_sq(in ref fe f) pure
|
|
{
|
|
int f0 = f[0];
|
|
int f1 = f[1];
|
|
int f2 = f[2];
|
|
int f3 = f[3];
|
|
int f4 = f[4];
|
|
int f5 = f[5];
|
|
int f6 = f[6];
|
|
int f7 = f[7];
|
|
int f8 = f[8];
|
|
int f9 = f[9];
|
|
int f0_2 = 2 * f0;
|
|
int f1_2 = 2 * f1;
|
|
int f2_2 = 2 * f2;
|
|
int f3_2 = 2 * f3;
|
|
int f4_2 = 2 * f4;
|
|
int f5_2 = 2 * f5;
|
|
int f6_2 = 2 * f6;
|
|
int f7_2 = 2 * f7;
|
|
int f5_38 = 38 * f5; /* 1.959375*2^30 */
|
|
int f6_19 = 19 * f6; /* 1.959375*2^30 */
|
|
int f7_38 = 38 * f7; /* 1.959375*2^30 */
|
|
int f8_19 = 19 * f8; /* 1.959375*2^30 */
|
|
int f9_38 = 38 * f9; /* 1.959375*2^30 */
|
|
long f0f0 = f0 * cast(long) f0;
|
|
long f0f1_2 = f0_2 * cast(long) f1;
|
|
long f0f2_2 = f0_2 * cast(long) f2;
|
|
long f0f3_2 = f0_2 * cast(long) f3;
|
|
long f0f4_2 = f0_2 * cast(long) f4;
|
|
long f0f5_2 = f0_2 * cast(long) f5;
|
|
long f0f6_2 = f0_2 * cast(long) f6;
|
|
long f0f7_2 = f0_2 * cast(long) f7;
|
|
long f0f8_2 = f0_2 * cast(long) f8;
|
|
long f0f9_2 = f0_2 * cast(long) f9;
|
|
long f1f1_2 = f1_2 * cast(long) f1;
|
|
long f1f2_2 = f1_2 * cast(long) f2;
|
|
long f1f3_4 = f1_2 * cast(long) f3_2;
|
|
long f1f4_2 = f1_2 * cast(long) f4;
|
|
long f1f5_4 = f1_2 * cast(long) f5_2;
|
|
long f1f6_2 = f1_2 * cast(long) f6;
|
|
long f1f7_4 = f1_2 * cast(long) f7_2;
|
|
long f1f8_2 = f1_2 * cast(long) f8;
|
|
long f1f9_76 = f1_2 * cast(long) f9_38;
|
|
long f2f2 = f2 * cast(long) f2;
|
|
long f2f3_2 = f2_2 * cast(long) f3;
|
|
long f2f4_2 = f2_2 * cast(long) f4;
|
|
long f2f5_2 = f2_2 * cast(long) f5;
|
|
long f2f6_2 = f2_2 * cast(long) f6;
|
|
long f2f7_2 = f2_2 * cast(long) f7;
|
|
long f2f8_38 = f2_2 * cast(long) f8_19;
|
|
long f2f9_38 = f2 * cast(long) f9_38;
|
|
long f3f3_2 = f3_2 * cast(long) f3;
|
|
long f3f4_2 = f3_2 * cast(long) f4;
|
|
long f3f5_4 = f3_2 * cast(long) f5_2;
|
|
long f3f6_2 = f3_2 * cast(long) f6;
|
|
long f3f7_76 = f3_2 * cast(long) f7_38;
|
|
long f3f8_38 = f3_2 * cast(long) f8_19;
|
|
long f3f9_76 = f3_2 * cast(long) f9_38;
|
|
long f4f4 = f4 * cast(long) f4;
|
|
long f4f5_2 = f4_2 * cast(long) f5;
|
|
long f4f6_38 = f4_2 * cast(long) f6_19;
|
|
long f4f7_38 = f4 * cast(long) f7_38;
|
|
long f4f8_38 = f4_2 * cast(long) f8_19;
|
|
long f4f9_38 = f4 * cast(long) f9_38;
|
|
long f5f5_38 = f5 * cast(long) f5_38;
|
|
long f5f6_38 = f5_2 * cast(long) f6_19;
|
|
long f5f7_76 = f5_2 * cast(long) f7_38;
|
|
long f5f8_38 = f5_2 * cast(long) f8_19;
|
|
long f5f9_76 = f5_2 * cast(long) f9_38;
|
|
long f6f6_19 = f6 * cast(long) f6_19;
|
|
long f6f7_38 = f6 * cast(long) f7_38;
|
|
long f6f8_38 = f6_2 * cast(long) f8_19;
|
|
long f6f9_38 = f6 * cast(long) f9_38;
|
|
long f7f7_38 = f7 * cast(long) f7_38;
|
|
long f7f8_38 = f7_2 * cast(long) f8_19;
|
|
long f7f9_76 = f7_2 * cast(long) f9_38;
|
|
long f8f8_19 = f8 * cast(long) f8_19;
|
|
long f8f9_38 = f8 * cast(long) f9_38;
|
|
long f9f9_38 = f9 * cast(long) f9_38;
|
|
long h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
|
|
long h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
|
|
long h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
|
|
long h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
|
|
long h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
|
|
long h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
|
|
long h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
|
|
long h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
|
|
long h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
|
|
long h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
|
|
long carry0;
|
|
long carry1;
|
|
long carry2;
|
|
long carry3;
|
|
long carry4;
|
|
long carry5;
|
|
long carry6;
|
|
long carry7;
|
|
long carry8;
|
|
long carry9;
|
|
|
|
carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
|
|
carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
|
|
|
|
carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25);
|
|
carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25);
|
|
|
|
carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26);
|
|
carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26);
|
|
|
|
carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25);
|
|
carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25);
|
|
|
|
carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
|
|
carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26);
|
|
|
|
carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25);
|
|
|
|
carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
|
|
|
|
fe h;
|
|
h[0] = cast(int) h0;
|
|
h[1] = cast(int) h1;
|
|
h[2] = cast(int) h2;
|
|
h[3] = cast(int) h3;
|
|
h[4] = cast(int) h4;
|
|
h[5] = cast(int) h5;
|
|
h[6] = cast(int) h6;
|
|
h[7] = cast(int) h7;
|
|
h[8] = cast(int) h8;
|
|
h[9] = cast(int) h9;
|
|
return h;
|
|
}
|
|
|
|
/**
|
|
Returns: h = 2 * f * f
|
|
Can overlap h with f.
|
|
|
|
Preconditions:
|
|
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
|
|
|
Postconditions:
|
|
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
|
|
|
|
Note:
|
|
See fe_mul.c for discussion of implementation strategy.
|
|
*/
|
|
private fe fe_sq2(in ref fe f) pure
|
|
{
|
|
int f0 = f[0];
|
|
int f1 = f[1];
|
|
int f2 = f[2];
|
|
int f3 = f[3];
|
|
int f4 = f[4];
|
|
int f5 = f[5];
|
|
int f6 = f[6];
|
|
int f7 = f[7];
|
|
int f8 = f[8];
|
|
int f9 = f[9];
|
|
int f0_2 = 2 * f0;
|
|
int f1_2 = 2 * f1;
|
|
int f2_2 = 2 * f2;
|
|
int f3_2 = 2 * f3;
|
|
int f4_2 = 2 * f4;
|
|
int f5_2 = 2 * f5;
|
|
int f6_2 = 2 * f6;
|
|
int f7_2 = 2 * f7;
|
|
int f5_38 = 38 * f5; /* 1.959375*2^30 */
|
|
int f6_19 = 19 * f6; /* 1.959375*2^30 */
|
|
int f7_38 = 38 * f7; /* 1.959375*2^30 */
|
|
int f8_19 = 19 * f8; /* 1.959375*2^30 */
|
|
int f9_38 = 38 * f9; /* 1.959375*2^30 */
|
|
long f0f0 = f0 * cast(long) f0;
|
|
long f0f1_2 = f0_2 * cast(long) f1;
|
|
long f0f2_2 = f0_2 * cast(long) f2;
|
|
long f0f3_2 = f0_2 * cast(long) f3;
|
|
long f0f4_2 = f0_2 * cast(long) f4;
|
|
long f0f5_2 = f0_2 * cast(long) f5;
|
|
long f0f6_2 = f0_2 * cast(long) f6;
|
|
long f0f7_2 = f0_2 * cast(long) f7;
|
|
long f0f8_2 = f0_2 * cast(long) f8;
|
|
long f0f9_2 = f0_2 * cast(long) f9;
|
|
long f1f1_2 = f1_2 * cast(long) f1;
|
|
long f1f2_2 = f1_2 * cast(long) f2;
|
|
long f1f3_4 = f1_2 * cast(long) f3_2;
|
|
long f1f4_2 = f1_2 * cast(long) f4;
|
|
long f1f5_4 = f1_2 * cast(long) f5_2;
|
|
long f1f6_2 = f1_2 * cast(long) f6;
|
|
long f1f7_4 = f1_2 * cast(long) f7_2;
|
|
long f1f8_2 = f1_2 * cast(long) f8;
|
|
long f1f9_76 = f1_2 * cast(long) f9_38;
|
|
long f2f2 = f2 * cast(long) f2;
|
|
long f2f3_2 = f2_2 * cast(long) f3;
|
|
long f2f4_2 = f2_2 * cast(long) f4;
|
|
long f2f5_2 = f2_2 * cast(long) f5;
|
|
long f2f6_2 = f2_2 * cast(long) f6;
|
|
long f2f7_2 = f2_2 * cast(long) f7;
|
|
long f2f8_38 = f2_2 * cast(long) f8_19;
|
|
long f2f9_38 = f2 * cast(long) f9_38;
|
|
long f3f3_2 = f3_2 * cast(long) f3;
|
|
long f3f4_2 = f3_2 * cast(long) f4;
|
|
long f3f5_4 = f3_2 * cast(long) f5_2;
|
|
long f3f6_2 = f3_2 * cast(long) f6;
|
|
long f3f7_76 = f3_2 * cast(long) f7_38;
|
|
long f3f8_38 = f3_2 * cast(long) f8_19;
|
|
long f3f9_76 = f3_2 * cast(long) f9_38;
|
|
long f4f4 = f4 * cast(long) f4;
|
|
long f4f5_2 = f4_2 * cast(long) f5;
|
|
long f4f6_38 = f4_2 * cast(long) f6_19;
|
|
long f4f7_38 = f4 * cast(long) f7_38;
|
|
long f4f8_38 = f4_2 * cast(long) f8_19;
|
|
long f4f9_38 = f4 * cast(long) f9_38;
|
|
long f5f5_38 = f5 * cast(long) f5_38;
|
|
long f5f6_38 = f5_2 * cast(long) f6_19;
|
|
long f5f7_76 = f5_2 * cast(long) f7_38;
|
|
long f5f8_38 = f5_2 * cast(long) f8_19;
|
|
long f5f9_76 = f5_2 * cast(long) f9_38;
|
|
long f6f6_19 = f6 * cast(long) f6_19;
|
|
long f6f7_38 = f6 * cast(long) f7_38;
|
|
long f6f8_38 = f6_2 * cast(long) f8_19;
|
|
long f6f9_38 = f6 * cast(long) f9_38;
|
|
long f7f7_38 = f7 * cast(long) f7_38;
|
|
long f7f8_38 = f7_2 * cast(long) f8_19;
|
|
long f7f9_76 = f7_2 * cast(long) f9_38;
|
|
long f8f8_19 = f8 * cast(long) f8_19;
|
|
long f8f9_38 = f8 * cast(long) f9_38;
|
|
long f9f9_38 = f9 * cast(long) f9_38;
|
|
long h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
|
|
long h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
|
|
long h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
|
|
long h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
|
|
long h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
|
|
long h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
|
|
long h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
|
|
long h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
|
|
long h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
|
|
long h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
|
|
long carry0;
|
|
long carry1;
|
|
long carry2;
|
|
long carry3;
|
|
long carry4;
|
|
long carry5;
|
|
long carry6;
|
|
long carry7;
|
|
long carry8;
|
|
long carry9;
|
|
|
|
h0 += h0;
|
|
h1 += h1;
|
|
h2 += h2;
|
|
h3 += h3;
|
|
h4 += h4;
|
|
h5 += h5;
|
|
h6 += h6;
|
|
h7 += h7;
|
|
h8 += h8;
|
|
h9 += h9;
|
|
|
|
carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
|
|
carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
|
|
|
|
carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25);
|
|
carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25);
|
|
|
|
carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26);
|
|
carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26);
|
|
|
|
carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25);
|
|
carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25);
|
|
|
|
carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
|
|
carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26);
|
|
|
|
carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25);
|
|
|
|
carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
|
|
|
|
fe h;
|
|
h[0] = cast(int) h0;
|
|
h[1] = cast(int) h1;
|
|
h[2] = cast(int) h2;
|
|
h[3] = cast(int) h3;
|
|
h[4] = cast(int) h4;
|
|
h[5] = cast(int) h5;
|
|
h[6] = cast(int) h6;
|
|
h[7] = cast(int) h7;
|
|
h[8] = cast(int) h8;
|
|
h[9] = cast(int) h9;
|
|
return h;
|
|
}
|
|
|
|
///**
|
|
// Returns: h = f - g
|
|
// Can overlap h with f or g.
|
|
//
|
|
// Preconditions:
|
|
// |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
|
// |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
|
//
|
|
// Postconditions:
|
|
// |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
|
// */
|
|
//void fe_sub(ref fe h, in ref fe f, in ref fe g)
|
|
//{
|
|
// h[] = f[] - g[];
|
|
//}
|
|
|
|
|
|
/**
|
|
*
|
|
* Params:
|
|
* s = 32 byte buffer
|
|
*
|
|
Preconditions:
|
|
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
|
|
|
Write p=2^255-19; q=floor(h/p).
|
|
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
|
|
|
|
Proof:
|
|
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
|
|
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
|
|
|
|
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
|
|
Then 0<y<1.
|
|
|
|
Write r=h-pq.
|
|
Have 0<=r<=p-1=2^255-20.
|
|
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
|
|
|
|
Write x=r+19(2^-255)r+y.
|
|
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
|
|
|
|
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
|
|
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
|
|
*/
|
|
private ubyte[32] fe_tobytes(in ref fe h) pure
|
|
{
|
|
int h0 = h[0];
|
|
int h1 = h[1];
|
|
int h2 = h[2];
|
|
int h3 = h[3];
|
|
int h4 = h[4];
|
|
int h5 = h[5];
|
|
int h6 = h[6];
|
|
int h7 = h[7];
|
|
int h8 = h[8];
|
|
int h9 = h[9];
|
|
int q;
|
|
int carry0;
|
|
int carry1;
|
|
int carry2;
|
|
int carry3;
|
|
int carry4;
|
|
int carry5;
|
|
int carry6;
|
|
int carry7;
|
|
int carry8;
|
|
int carry9;
|
|
|
|
q = (19 * h9 + ((cast(int) 1) << 24)) >> 25;
|
|
q = (h0 + q) >> 26;
|
|
q = (h1 + q) >> 25;
|
|
q = (h2 + q) >> 26;
|
|
q = (h3 + q) >> 25;
|
|
q = (h4 + q) >> 26;
|
|
q = (h5 + q) >> 25;
|
|
q = (h6 + q) >> 26;
|
|
q = (h7 + q) >> 25;
|
|
q = (h8 + q) >> 26;
|
|
q = (h9 + q) >> 25;
|
|
|
|
/* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
|
|
h0 += 19 * q;
|
|
/* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
|
|
|
|
carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
|
|
carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
|
|
carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
|
|
carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
|
|
carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
|
|
carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
|
|
carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
|
|
carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
|
|
carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
|
|
carry9 = h9 >> 25; h9 -= carry9 << 25;
|
|
/* h10 = carry9 */
|
|
|
|
/*
|
|
Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
|
|
Have h0+...+2^230 h9 between 0 and 2^255-1;
|
|
evidently 2^255 h10-2^255 q = 0.
|
|
Goal: Output h0+...+2^230 h9.
|
|
*/
|
|
|
|
ubyte[32] s;
|
|
s[0] = cast(ubyte) (h0 >> 0);
|
|
s[1] = cast(ubyte) (h0 >> 8);
|
|
s[2] = cast(ubyte) (h0 >> 16);
|
|
s[3] = cast(ubyte) (((h0 >> 24) | (h1 << 2)));
|
|
s[4] = cast(ubyte) (h1 >> 6);
|
|
s[5] = cast(ubyte) (h1 >> 14);
|
|
s[6] = cast(ubyte) (((h1 >> 22) | (h2 << 3)));
|
|
s[7] = cast(ubyte) (h2 >> 5);
|
|
s[8] = cast(ubyte) (h2 >> 13);
|
|
s[9] = cast(ubyte) (((h2 >> 21) | (h3 << 5)));
|
|
s[10] = cast(ubyte) (h3 >> 3);
|
|
s[11] = cast(ubyte) (h3 >> 11);
|
|
s[12] = cast(ubyte) (((h3 >> 19) | (h4 << 6)));
|
|
s[13] = cast(ubyte) (h4 >> 2);
|
|
s[14] = cast(ubyte) (h4 >> 10);
|
|
s[15] = cast(ubyte) (h4 >> 18);
|
|
s[16] = cast(ubyte) (h5 >> 0);
|
|
s[17] = cast(ubyte) (h5 >> 8);
|
|
s[18] = cast(ubyte) (h5 >> 16);
|
|
s[19] = cast(ubyte) (((h5 >> 24) | (h6 << 1)));
|
|
s[20] = cast(ubyte) (h6 >> 7);
|
|
s[21] = cast(ubyte) (h6 >> 15);
|
|
s[22] = cast(ubyte) (((h6 >> 23) | (h7 << 3)));
|
|
s[23] = cast(ubyte) (h7 >> 5);
|
|
s[24] = cast(ubyte) (h7 >> 13);
|
|
s[25] = cast(ubyte) (((h7 >> 21) | (h8 << 4)));
|
|
s[26] = cast(ubyte) (h8 >> 4);
|
|
s[27] = cast(ubyte) (h8 >> 12);
|
|
s[28] = cast(ubyte) (((h8 >> 20) | (h9 << 6)));
|
|
s[29] = cast(ubyte) (h9 >> 2);
|
|
s[30] = cast(ubyte) (h9 >> 10);
|
|
s[31] = cast(ubyte) (h9 >> 18);
|
|
|
|
return s;
|
|
}
|
|
|
|
/// Conditional swap.
|
|
/// Replace (f,g) with (g,f) if b == 1;
|
|
/// replace (f,g) with (f,g) if b == 0.
|
|
/// Params:
|
|
/// b = 0 or 1
|
|
void fe_cswap(ref fe f, ref fe g, in uint b)
|
|
in {
|
|
assert(b == 0 || b == 1);
|
|
} body
|
|
{
|
|
// TODO refactor
|
|
immutable uint mask = -b;
|
|
|
|
assert(mask == 0 || mask == 0xFFFFFFFF);
|
|
|
|
f[] ^= mask & g[];
|
|
g[] ^= mask & f[];
|
|
f[] ^= mask & g[];
|
|
}
|
|
|
|
unittest {
|
|
fe a;
|
|
fe b;
|
|
a[] = 1;
|
|
b[] = 2;
|
|
fe A = 1;
|
|
fe B = 2;
|
|
|
|
fe_cswap(A, B, 0);
|
|
assert(A == a);
|
|
assert(B == b);
|
|
fe_cswap(A, B, 1);
|
|
assert(A == b);
|
|
assert(B == a);
|
|
} |