17template <
class Fq,
class Fr,
class T>
24template <
class Fq,
class Fr,
class T>
31template <
class Fq,
class Fr,
class T>
38template <
class Fq,
class Fr,
class T>
45template <
class Fq,
class Fr,
class T>
57template <
class Fq,
class Fr,
class T>
68 if (is_point_at_infinity()) {
76 Fq zz_inv = z_inv.
sqr();
77 Fq zzz_inv = zz_inv * z_inv;
78 affine_element<Fq, Fr, T> result(x * zz_inv, y * zzz_inv);
84 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
85 if (is_point_at_infinity()) {
89 if (x.is_msb_set_word()) {
121 if constexpr (T::has_a) {
122 T3 += (T::a * z.sqr().sqr());
158template <
class Fq,
class Fr,
class T>
161 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
162 if (is_point_at_infinity()) {
163 *
this = { other.
x, other.y,
Fq::one() };
167 const bool edge_case_trigger = x.
is_msb_set() || other.x.is_msb_set();
168 if (edge_case_trigger) {
169 if (x.is_msb_set()) {
170 *
this = { other.x, other.y,
Fq::one() };
180 Fq T1 = other.x * T0;
189 if (__builtin_expect(T1.
is_zero(), 0)) {
247template <
class Fq,
class Fr,
class T>
251 return (result += other);
254template <
class Fq,
class Fr,
class T>
257 const affine_element<Fq, Fr, T> to_add{ other.
x, -other.y };
258 return operator+=(to_add);
261template <
class Fq,
class Fr,
class T>
265 return (result -= other);
268template <
class Fq,
class Fr,
class T>
271 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
272 bool p1_zero = is_point_at_infinity();
273 bool p2_zero = other.is_point_at_infinity();
274 if (__builtin_expect((p1_zero || p2_zero), 0)) {
275 if (p1_zero && !p2_zero) {
279 if (p2_zero && !p1_zero) {
286 bool p1_zero = x.is_msb_set();
287 bool p2_zero = other.x.is_msb_set();
288 if (__builtin_expect((p1_zero || p2_zero), 0)) {
289 if (p1_zero && !p2_zero) {
293 if (p2_zero && !p1_zero) {
301 Fq Z2Z2(other.z.sqr());
303 Fq U2(Z1Z1 * other.x);
306 Fq S1(Z2Z2 * other.z);
313 if (__builtin_expect(H.is_zero(), 0)) {
357template <
class Fq,
class Fr,
class T>
361 return (result += other);
364template <
class Fq,
class Fr,
class T>
367 const element to_add{ other.
x, -other.y, other.z };
368 return operator+=(to_add);
371template <
class Fq,
class Fr,
class T>
375 return (result -= other);
383template <
class Fq,
class Fr,
class T>
386 if constexpr (T::USE_ENDOMORPHISM) {
387 return mul_with_endomorphism(exponent);
389 return mul_without_endomorphism(exponent);
420 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
436 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
441 return (x.is_msb_set());
447 if (is_point_at_infinity()) {
456 Fq bz_6 = zzzz * zz * T::b;
457 if constexpr (T::has_a) {
458 bz_6 += (x * T::a) * zzzz;
460 Fq xxx = x.
sqr() * x + bz_6;
465template <
class Fq,
class Fr,
class T>
469 if ((!on_curve()) || (!other.on_curve())) {
472 bool am_infinity = is_point_at_infinity();
473 bool is_infinity = other.is_point_at_infinity();
474 bool both_infinity = am_infinity && is_infinity;
476 if ((!both_infinity) && (am_infinity || is_infinity)) {
479 const Fq lhs_zz = z.sqr();
480 const Fq lhs_zzz = lhs_zz * z;
481 const Fq rhs_zz = other.z.sqr();
482 const Fq rhs_zzz = rhs_zz * other.z;
484 const Fq lhs_x = x * rhs_zz;
485 const Fq lhs_y = y * rhs_zzz;
487 const Fq rhs_x = other.x * lhs_zz;
488 const Fq rhs_y = other.y * lhs_zzz;
489 return both_infinity || ((lhs_x == rhs_x) && (lhs_y == rhs_y));
492template <
class Fq,
class Fr,
class T>
495 if constexpr (T::can_hash_to_curve) {
498 Fq zz = result.
z.sqr();
499 Fq zzz = zz * result.
z;
509template <
class Fq,
class Fr,
class T>
512 const uint256_t converted_scalar(scalar);
514 if (converted_scalar == 0) {
519 const uint64_t maximum_set_bit = converted_scalar.
get_msb();
522 for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) {
524 if (converted_scalar.
get_bit(i)) {
525 accumulator += *
this;
547 std::array<uint64_t, NUM_ROUNDS * 2>
table;
564template <
class Fq,
class Fr,
class T>
568 if (is_point_at_infinity()) {
571 constexpr size_t NUM_ROUNDS = 32;
574 if (converted_scalar.
is_zero()) {
577 static constexpr size_t LOOKUP_SIZE = 8;
581 lookup_table[0] =
element(*
this);
582 for (
size_t i = 1; i < LOOKUP_SIZE; ++i) {
583 lookup_table[i] = lookup_table[i - 1] + d2;
589 accumulator.self_set_infinity();
592 for (
size_t i = 0; i < NUM_ROUNDS * 2; ++i) {
593 uint64_t wnaf_entry = wnaf.table[i];
594 uint64_t
index = wnaf_entry & 0x0fffffffU;
595 bool sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
596 const bool is_odd = ((i & 1) == 1);
597 auto to_add = lookup_table[
static_cast<size_t>(
index)];
598 to_add.y.self_conditional_negate(sign ^ is_odd);
602 accumulator += to_add;
604 if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) {
605 for (
size_t j = 0; j < 4; ++j) {
606 accumulator.self_dbl();
612 accumulator += -lookup_table[0];
614 if (wnaf.endo_skew) {
615 accumulator +=
element{ lookup_table[0].
x * beta, lookup_table[0].y, lookup_table[0].z };
635template <
typename AffineElement,
typename Fq>
636__attribute__((always_inline))
inline void batch_affine_add_impl(
const AffineElement* lhs,
639 Fq* scratch_space)
noexcept
645 scratch_space[i] = lhs[i].x +
rhs[i].x;
646 rhs[i].x -= lhs[i].x;
647 rhs[i].y -= lhs[i].y;
653 throw_or_abort(
"attempted to invert zero in batch_affine_add_impl");
662 rhs[i].x =
rhs[i].y.sqr();
663 rhs[i].x -= scratch_space[i];
666 Fq temp = lhs[i].x -
rhs[i].x;
668 rhs[i].y = temp - lhs[i].y;
683template <
typename AffineElement,
typename Fq>
684__attribute__((always_inline))
inline void batch_affine_add_interleaved(AffineElement* points,
686 Fq* scratch_space)
noexcept
692 scratch_space[i >> 1] = points[i].x + points[i + 1].x;
693 points[i + 1].x -= points[i].x;
694 points[i + 1].y -= points[i].y;
700 throw_or_abort(
"attempted to invert zero in batch_affine_add_interleaved");
709 points[i + 1].x = points[i + 1].y.sqr();
711 points[(i +
num_points) >> 1].x = points[i + 1].x - scratch_space[i >> 1];
714 __builtin_prefetch(points + i - 2);
715 __builtin_prefetch(points + i - 1);
716 __builtin_prefetch(points + ((i +
num_points - 2) >> 1));
717 __builtin_prefetch(scratch_space + ((i - 2) >> 1));
721 points[i].x -= points[(i +
num_points) >> 1].x;
722 points[i].x *= points[i + 1].y;
723 points[(i +
num_points) >> 1].y = points[i].x - points[i].y;
740template <
typename AffineElement,
typename Fq>
741__attribute__((always_inline))
inline void batch_affine_double_impl(AffineElement* points,
743 Fq* scratch_space)
noexcept
749 scratch_space[i] = points[i].x.sqr();
750 scratch_space[i] = scratch_space[i] + scratch_space[i] + scratch_space[i];
756 throw_or_abort(
"attempted to invert zero in batch_affine_double_impl");
762 for (
size_t i_plus_1 =
num_points; i_plus_1 > 0; --i_plus_1) {
763 size_t i = i_plus_1 - 1;
769 points[i].x = scratch_space[i].
sqr() - (points[i].x + points[i].x);
770 points[i].y = scratch_space[i] * (
temp_x - points[i].x) - points[i].y;
785template <
class Fq,
class Fr,
class T>
803 [&](
size_t start,
size_t end,
BB_UNUSED size_t chunk_index) {
804 batch_affine_add_impl<affine_element, Fq>(
805 &second_group[start], &results[start], end - start, &scratch_space[start]);
820template <
class Fq,
class Fr,
class T>
846 if (converted_scalar.
is_zero()) {
854 constexpr size_t LOOKUP_SIZE = 8;
855 constexpr size_t NUM_ROUNDS = 32;
862 for (
auto& table : lookup_table) {
867 auto execute_range = [&](
size_t start,
size_t end) {
870 batch_affine_add_impl<affine_element, Fq>(&lhs[start], &
rhs[start], end - start, &scratch_space[start]);
875 batch_affine_double_impl<affine_element, Fq>(&lhs[start], end - start, &scratch_space[start]);
879 for (
size_t i = start; i < end; ++i) {
880 if (points[i].is_point_at_infinity()) {
884 temp_point_vector[i] = points[i];
885 lookup_table[0][i] = points[i];
889 double_chunked(&temp_point_vector[0]);
890 for (
size_t j = 1; j < LOOKUP_SIZE; ++j) {
891 for (
size_t i = start; i < end; ++i) {
892 lookup_table[j][i] = lookup_table[j - 1][i];
894 add_chunked(&temp_point_vector[0], &lookup_table[j][0]);
898 uint64_t wnaf_entry = 0;
902 for (
size_t j = 0; j < 2; ++j) {
903 wnaf_entry = wnaf.table[j];
904 index = wnaf_entry & 0x0fffffffU;
905 sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
906 const bool is_odd = ((j & 1) == 1);
907 for (
size_t i = start; i < end; ++i) {
908 auto to_add = lookup_table[
static_cast<size_t>(
index)][i];
909 to_add.y.self_conditional_negate(sign ^ is_odd);
914 work_elements[i] = to_add;
916 temp_point_vector[i] = to_add;
920 add_chunked(&temp_point_vector[0], &work_elements[0]);
922 for (
size_t j = 2; j < NUM_ROUNDS * 2; ++j) {
923 wnaf_entry = wnaf.table[j];
924 index = wnaf_entry & 0x0fffffffU;
925 sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
926 const bool is_odd = ((j & 1) == 1);
928 for (
size_t k = 0; k < 4; ++k) {
929 double_chunked(&work_elements[0]);
932 for (
size_t i = start; i < end; ++i) {
933 auto to_add = lookup_table[
static_cast<size_t>(
index)][i];
934 to_add.y.self_conditional_negate(sign ^ is_odd);
938 temp_point_vector[i] = to_add;
940 add_chunked(&temp_point_vector[0], &work_elements[0]);
944 for (
size_t i = start; i < end; ++i) {
945 temp_point_vector[i] = -lookup_table[0][i];
947 add_chunked(&temp_point_vector[0], &work_elements[0]);
950 if (wnaf.endo_skew) {
951 for (
size_t i = start; i < end; ++i) {
952 temp_point_vector[i] = lookup_table[0][i];
953 temp_point_vector[i].x *= beta;
955 add_chunked(&temp_point_vector[0], &work_elements[0]);
958 for (
size_t i = start; i < end; ++i) {
959 work_elements[i] = points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i];
964 return work_elements;
967template <
typename Fq,
typename Fr,
typename T>
971 temporaries.reserve(num_elements * 2);
976 for (
size_t i = 0; i < num_elements; ++i) {
977 temporaries.emplace_back(accumulator);
978 if (!elements[i].is_point_at_infinity()) {
979 accumulator *= elements[i].z;
984 accumulator = accumulator.invert();
1008 for (
size_t i = num_elements - 1; i < num_elements; --i) {
1009 if (!elements[i].is_point_at_infinity()) {
1010 Fq z_inv = accumulator * temporaries[i];
1011 Fq zz_inv = z_inv.
sqr();
1012 elements[i].x *= zz_inv;
1013 elements[i].y *= (zz_inv * z_inv);
1014 accumulator *= elements[i].z;
1020template <
typename Fq,
typename Fr,
typename T>
1024 bool found_one =
false;
1028 while (!found_one) {
1030 yy = x.sqr() * x + T::b;
1031 if constexpr (T::has_a) {
1034 auto [found_root, y1] = yy.sqrt();
1036 found_one = found_root;
#define BB_ASSERT_EQ(actual, expected,...)
constexpr void self_set_infinity() noexcept
static constexpr affine_element one() noexcept
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
element operator*=(const Fr &exponent) noexcept
BB_INLINE constexpr element set_infinity() const noexcept
element mul_with_endomorphism(const Fr &scalar) const noexcept
static element infinity()
static std::vector< affine_element< Fq, Fr, Params > > batch_mul_with_endomorphism(const std::span< const affine_element< Fq, Fr, Params > > &points, const Fr &scalar) noexcept
Multiply each point by the same scalar.
constexpr element operator-=(const element &other) noexcept
constexpr element operator-() const noexcept
friend constexpr element operator+(const affine_element< Fq, Fr, Params > &left, const element &right) noexcept
constexpr element dbl() const noexcept
constexpr element normalize() const noexcept
constexpr void self_dbl() noexcept
static element random_element(numeric::RNG *engine=nullptr) noexcept
static void batch_normalize(element *elements, size_t num_elements) noexcept
constexpr element operator+=(const element &other) noexcept
static void batch_affine_add(const std::span< affine_element< Fq, Fr, Params > > &first_group, const std::span< affine_element< Fq, Fr, Params > > &second_group, const std::span< affine_element< Fq, Fr, Params > > &results) noexcept
Pairwise affine add points in first and second group.
BB_INLINE constexpr bool on_curve() const noexcept
BB_INLINE constexpr bool operator==(const element &other) const noexcept
element operator*(const Fr &exponent) const noexcept
element() noexcept=default
static element random_coordinates_on_curve(numeric::RNG *engine=nullptr) noexcept
element mul_without_endomorphism(const Fr &scalar) const noexcept
constexpr element & operator=(const element &other) noexcept
BB_INLINE constexpr void self_set_infinity() noexcept
BB_INLINE constexpr bool is_point_at_infinity() const noexcept
constexpr bool get_bit(uint64_t bit_index) const
constexpr uint64_t get_msb() const
std::pair< std::array< uint64_t, 2 >, std::array< uint64_t, 2 > > EndoScalars
__attribute__((always_inline)) inline void batch_affine_add_impl(const AffineElement *lhs
Batch affine addition for parallel arrays: (lhs[i], rhs[i]) → rhs[i].
AffineElement const size_t num_pairs
AffineElement const size_t Fq *scratch_space noexcept
batch_inversion_accumulator
std::conditional_t< IsGoblinBigGroup< C, Fq, Fr, G >, element_goblin::goblin_element< C, goblin_field< C >, Fr, G >, element_default::element< C, Fq, Fr, G > > element
element wraps either element_default::element or element_goblin::goblin_element depending on parametr...
constexpr size_t FF_COPY_COST
constexpr size_t FF_ADDITION_COST
constexpr size_t FF_MULTIPLICATION_COST
void fixed_wnaf(const uint64_t *scalar, uint64_t *wnaf, bool &skew_map, const uint64_t point_index, const uint64_t num_points, const size_t wnaf_bits) noexcept
Performs fixed-window non-adjacent form (WNAF) computation for scalar multiplication.
Univariate< Fr, domain_end > operator*(const Fr &ff, const Univariate< Fr, domain_end > &uv)
void parallel_for_heuristic(size_t num_points, const std::function< void(size_t, size_t, size_t)> &func, size_t heuristic_cost)
Split a loop into several loops running in parallel based on operations in 1 iteration.
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
static constexpr field cube_root_of_unity()
static constexpr field one()
static constexpr uint256_t modulus
static void split_into_endomorphism_scalars(const field &k, field &k1, field &k2)
Full-width endomorphism decomposition: k ≡ k1 - k2·λ (mod r). Modifies the field elements k1 and k2.
BB_INLINE constexpr void self_sqr() &noexcept
constexpr field invert() const noexcept
BB_INLINE constexpr bool is_msb_set() const noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept
BB_INLINE constexpr field sqr() const noexcept
BB_INLINE constexpr bool is_zero() const noexcept
BB_INLINE constexpr field from_montgomery_form() const noexcept
static constexpr field zero()
Handles the WNAF computation for scalars that are split using an endomorphism, achieved through split...
EndomorphismWnaf(const EndoScalars &scalars)
std::array< uint64_t, NUM_ROUNDS *2 > table
static constexpr size_t NUM_WNAF_BITS
void throw_or_abort(std::string const &err)
curve::BN254::BaseField Fq