@@ -33,7 +33,7 @@ namespace Nethermind.Int256
33
33
34
34
public UInt256 ( uint r0 , uint r1 , uint r2 , uint r3 , uint r4 , uint r5 , uint r6 , uint r7 )
35
35
{
36
- if ( Avx2 . IsSupported )
36
+ if ( Vector256 < uint > . IsSupported )
37
37
{
38
38
Unsafe . SkipInit ( out this . u0 ) ;
39
39
Unsafe . SkipInit ( out this . u1 ) ;
@@ -52,7 +52,7 @@ public UInt256(uint r0, uint r1, uint r2, uint r3, uint r4, uint r5, uint r6, ui
52
52
53
53
public UInt256 ( ulong u0 = 0 , ulong u1 = 0 , ulong u2 = 0 , ulong u3 = 0 )
54
54
{
55
- if ( Avx2 . IsSupported )
55
+ if ( Vector256 < ulong > . IsSupported )
56
56
{
57
57
Unsafe . SkipInit ( out this . u0 ) ;
58
58
Unsafe . SkipInit ( out this . u1 ) ;
@@ -82,7 +82,7 @@ public UInt256(in ReadOnlySpan<byte> bytes, bool isBigEndian = false)
82
82
}
83
83
else
84
84
{
85
- if ( Avx2 . IsSupported )
85
+ if ( Vector256 < byte > . IsSupported )
86
86
{
87
87
Unsafe . SkipInit ( out this . u0 ) ;
88
88
Unsafe . SkipInit ( out this . u1 ) ;
@@ -187,7 +187,7 @@ public UInt256(in ReadOnlySpan<ulong> data, bool isBigEndian = false)
187
187
}
188
188
else
189
189
{
190
- if ( Avx2 . IsSupported )
190
+ if ( Vector256 < ulong > . IsSupported )
191
191
{
192
192
Unsafe . SkipInit ( out this . u0 ) ;
193
193
Unsafe . SkipInit ( out this . u1 ) ;
@@ -404,6 +404,21 @@ public static void Add(in UInt256 a, in UInt256 b, out UInt256 res)
404
404
405
405
public static bool AddImpl ( in UInt256 a , in UInt256 b , out UInt256 res )
406
406
{
407
+ if ( ( a . u1 | a . u2 | a . u3 | b . u1 | b . u2 | b . u3 ) == 0 )
408
+ {
409
+ // Fast add for numbers less than 2^64 (18,446,744,073,709,551,615)
410
+ ulong u0 = a . u0 + b . u0 ;
411
+ // Assignment to res after in case is used as input for a or b (by ref aliasing)
412
+ res = default ;
413
+ Unsafe . AsRef ( in res . u0 ) = u0;
414
+ if ( u0 < a . u0 )
415
+ {
416
+ Unsafe . AsRef ( in res . u1 ) = 1 ;
417
+ }
418
+ // Never overflows UInt256
419
+ return false ;
420
+ }
421
+
407
422
if ( Avx2 . IsSupported )
408
423
{
409
424
Vector256 < ulong > av = Unsafe . As < UInt256 , Vector256 < ulong > > ( ref Unsafe . AsRef ( in a ) ) ;
@@ -982,6 +997,17 @@ private static void SubtractWithBorrow(ulong a, ulong b, ref ulong borrow, out u
982
997
// Multiply sets res to the product x*y
983
998
public static void Multiply ( in UInt256 x , in UInt256 y , out UInt256 res )
984
999
{
1000
+ if ( ( x . u1 | x . u2 | x . u3 | y . u1 | y . u2 | y . u3 ) == 0 )
1001
+ {
1002
+ // Fast multiply for numbers less than 2^64 (18,446,744,073,709,551,615)
1003
+ ulong high = Math . BigMul ( x . u0 , y . u0 , out ulong low ) ;
1004
+ // Assignment to res after multiply in case is used as input for x or y (by ref aliasing)
1005
+ res = default ;
1006
+ Unsafe . AsRef ( in res . u0 ) = low;
1007
+ Unsafe . AsRef ( in res . u1 ) = high;
1008
+ return ;
1009
+ }
1010
+
985
1011
ref ulong rx = ref Unsafe . As < UInt256 , ulong > ( ref Unsafe . AsRef ( in x ) ) ;
986
1012
ref ulong ry = ref Unsafe . As < UInt256 , ulong > ( ref Unsafe . AsRef ( in y ) ) ;
987
1013
@@ -1019,23 +1045,24 @@ public static bool MultiplyOverflow(in UInt256 x, in UInt256 y, out UInt256 res)
1019
1045
? 64 + Len64 ( u1 )
1020
1046
: Len64 ( u0 ) ;
1021
1047
1048
+ [ SkipLocalsInit ]
1022
1049
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
1023
1050
private void Squared ( out UInt256 result )
1024
1051
{
1025
- UInt256 z = this ;
1026
- Span < ulong > res = stackalloc ulong [ 4 ] ;
1052
+ ( ulong carry0 , ulong res0 ) = Multiply64 ( u0 , u0 ) ;
1053
+ ( carry0 , ulong temp1 ) = UmulHopi ( carry0 , u0 , u1 ) ;
1054
+ ( carry0 , ulong temp2 ) = UmulHopi ( carry0 , u0 , u2 ) ;
1027
1055
1028
- ( ulong carry0 , res [ 0 ] ) = Multiply64 ( z . u0 , z . u0 ) ;
1029
- ( carry0 , ulong res1 ) = UmulHopi ( carry0 , z . u0 , z . u1 ) ;
1030
- ( carry0 , ulong res2 ) = UmulHopi ( carry0 , z . u0 , z . u2 ) ;
1056
+ ( ulong carry1 , ulong res1 ) = UmulHopi ( temp1 , u0 , u1 ) ;
1057
+ ( carry1 , temp2 ) = UmulStepi ( temp2 , u1 , u1 , carry1 ) ;
1031
1058
1032
- ( ulong carry1 , res [ 1 ] ) = UmulHopi ( res1 , z . u0 , z . u1 ) ;
1033
- ( carry1 , res2 ) = UmulStepi ( res2 , z . u1 , z . u1 , carry1 ) ;
1059
+ ( ulong carry2 , ulong res2 ) = UmulHopi ( temp2 , u0 , u2 ) ;
1034
1060
1035
- ( ulong carry2 , res [ 2 ] ) = UmulHopi ( res2 , z . u0 , z . u2 ) ;
1061
+ // Don't care about carry here
1062
+ ulong res3 = 2 * ( u0 * u3 + u1 * u2 ) + carry0 + carry1 + carry2 ;
1036
1063
1037
- res [ 3 ] = 2 * ( z . u0 * z . u3 + z . u1 * z . u2 ) + carry0 + carry1 + carry2 ;
1038
- result = new UInt256 ( res ) ;
1064
+ Unsafe . SkipInit ( out result ) ;
1065
+ Unsafe . As < ulong , Vector256 < ulong > > ( ref Unsafe . AsRef ( in result . u0 ) ) = Vector256 . Create ( res0 , res1 , res2 , res3 ) ;
1039
1066
}
1040
1067
1041
1068
public static void Exp ( in UInt256 b , in UInt256 e , out UInt256 result )
@@ -1114,6 +1141,18 @@ public static void MultiplyMod(in UInt256 x, in UInt256 y, in UInt256 m, out UIn
1114
1141
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
1115
1142
private static void Umul ( in UInt256 x , in UInt256 y , out UInt256 low , out UInt256 high )
1116
1143
{
1144
+ if ( ( x . u1 | x . u2 | x . u3 | y . u1 | y . u2 | y . u3 ) == 0 )
1145
+ {
1146
+ // Fast multiply for numbers less than 2^64 (18,446,744,073,709,551,615)
1147
+ ulong highUL = Math . BigMul ( x . u0 , y . u0 , out ulong lowUL ) ;
1148
+ // Assignment to high, low after multiply in case either is used as input for x or y (by ref aliasing)
1149
+ high = default ;
1150
+ low = default ;
1151
+ Unsafe . AsRef ( in low . u0 ) = lowUL;
1152
+ Unsafe . AsRef ( in low . u1 ) = highUL;
1153
+ return ;
1154
+ }
1155
+
1117
1156
( ulong carry , ulong l0 ) = Multiply64 ( x . u0 , y . u0 ) ;
1118
1157
( carry , ulong res1 ) = UmulHopi ( carry , x . u1 , y . u0 ) ;
1119
1158
( carry , ulong res2 ) = UmulHopi ( carry , x . u2 , y . u0 ) ;
0 commit comments