Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
field_impl.hpp
Go to the documentation of this file.
1// === AUDIT STATUS ===
2// internal: { status: Planned, auditors: [Raju], commit: }
3// external_1: { status: not started, auditors: [], commit: }
4// external_2: { status: not started, auditors: [], commit: }
5// =====================
6
7#pragma once
13#include <memory>
14#include <span>
15#include <type_traits>
16#include <vector>
17
20
21namespace bb {
22
23// clang-format off
24// disable the following style guides:
25// cppcoreguidelines-avoid-c-arrays : we make heavy use of c-style arrays here to prevent default-initialization of memory when constructing `field` objects.
26// The intention is for field to act like a primitive numeric type with the performance/complexity trade-offs expected from this.
27// NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays)
28// clang-format on
34template <class T> constexpr field<T> field<T>::operator*(const field& other) const noexcept
35{
36 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
37 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
38 // >= 255-bits or <= 64-bits.
39 return montgomery_mul(other);
40 } else {
42 return montgomery_mul(other);
43 }
44 return asm_mul_with_coarse_reduction(*this, other);
45 }
46}
47
48template <class T> constexpr field<T>& field<T>::operator*=(const field& other) & noexcept
49{
50 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
51 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
52 // >= 255-bits or <= 64-bits.
53 *this = operator*(other);
54 } else {
56 *this = operator*(other);
57 } else {
58 asm_self_mul_with_coarse_reduction(*this, other);
59 }
60 }
61 return *this;
62}
63
69template <class T> constexpr field<T> field<T>::sqr() const noexcept
70{
71 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
72 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
73 return montgomery_square();
74 } else {
76 return montgomery_square();
77 }
78 return asm_sqr_with_coarse_reduction(*this);
79 }
80}
81
82template <class T> constexpr void field<T>::self_sqr() & noexcept
83{
84 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
85 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
86 *this = montgomery_square();
87 } else {
89 *this = montgomery_square();
90 } else {
91 asm_self_sqr_with_coarse_reduction(*this);
92 }
93 }
94}
95
101template <class T> constexpr field<T> field<T>::operator+(const field& other) const noexcept
102{
103 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
104 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
105 return add(other);
106 } else {
108 return add(other);
109 }
110 return asm_add_with_coarse_reduction(*this, other);
111 }
112}
113
114template <class T> constexpr field<T>& field<T>::operator+=(const field& other) & noexcept
115{
116 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
117 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
118 (*this) = operator+(other);
119 } else {
121 (*this) = operator+(other);
122 } else {
123 asm_self_add_with_coarse_reduction(*this, other);
124 }
125 }
126 return *this;
127}
128
129template <class T> constexpr field<T> field<T>::operator++() noexcept
130{
131 return *this += 1;
132}
133
134// NOLINTNEXTLINE(cert-dcl21-cpp) circular linting errors. If const is added, linter suggests removing
135template <class T> constexpr field<T> field<T>::operator++(int) noexcept
136{
137 field<T> value_before_incrementing = *this;
138 *this += 1;
139 return value_before_incrementing;
140}
141
147template <class T> constexpr field<T> field<T>::operator-(const field& other) const noexcept
148{
149 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
150 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
151 return subtract(other);
152 } else {
154 return subtract(other);
155 }
156 return asm_sub_with_coarse_reduction(*this, other);
157 }
158}
159
160template <class T> constexpr field<T> field<T>::operator-() const noexcept
161{
162 if constexpr ((T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
163 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
164 constexpr field p{ modulus.data[0], modulus.data[1], modulus.data[2], modulus.data[3] };
165 return p - *this;
166 }
167
168 // TODO(@zac-williamson): there are 3 ways we can make this more efficient
169 // 1: we subtract `p` from `*this` instead of `2p`
170 // 2: instead of `p - *this`, we use an asm block that does `p - *this` without the assembly reduction step
171 // 3: we replace `(p - *this).reduce_once()` with an assembly block that is equivalent to `p - *this`,
172 // but we call `REDUCE_FIELD_ELEMENT` with `not_twice_modulus` instead of `twice_modulus`
173 // not sure which is faster and whether any of the above might break something!
174 //
175 // More context below:
176 // the operator-(a, b) method's asm implementation has a sneaky was to check underflow.
177 // if `a - b` underflows we need to add in `2p`. Instead of conditional branching which would cause pipeline
178 // flushes, we add `2p` into the result of `a - b`. If the result triggers the overflow flag, then we know we are
179 // correcting an *underflow* produced from computing `a - b`. Finally...we use the overflow flag to conditionally
180 // move data into registers such that we end up with either `a - b` or `2p + (a - b)` (this is branchless). OK! So
181 // what's the problem? Well we assume that every field element lies between 0 and 2p - 1. But we are computing `2p -
182 // *this`! If *this = 0 then we exceed this bound hence the need for the extra reduction step. HOWEVER, we also know
183 // that 2p - *this won't underflow so we could skip the underflow check present in the assembly code
184 constexpr field p{ twice_modulus.data[0], twice_modulus.data[1], twice_modulus.data[2], twice_modulus.data[3] };
185 return (p - *this).reduce_once();
186}
187
188template <class T> constexpr field<T>& field<T>::operator-=(const field& other) & noexcept
189{
190 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
191 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
192 *this = subtract(other);
193 } else {
195 *this = subtract(other);
196 } else {
197 asm_self_sub_with_coarse_reduction(*this, other);
198 }
199 }
200 return *this;
201}
202
203template <class T> constexpr void field<T>::self_neg() & noexcept
204{
205 if constexpr ((T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
206 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
207 constexpr field p{ modulus.data[0], modulus.data[1], modulus.data[2], modulus.data[3] };
208 *this = p - *this;
209 } else {
210 constexpr field p{ twice_modulus.data[0], twice_modulus.data[1], twice_modulus.data[2], twice_modulus.data[3] };
211 *this = (p - *this).reduce_once();
212 }
213}
214
215template <class T> constexpr void field<T>::self_conditional_negate(const uint64_t predicate) & noexcept
216{
217 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
218 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
219 *this = predicate ? -(*this) : *this; // NOLINT
220 } else {
222 *this = predicate ? -(*this) : *this; // NOLINT
223 } else {
224 asm_conditional_negate(*this, predicate);
225 }
226 }
227}
228
240template <class T> constexpr bool field<T>::operator>(const field& other) const noexcept
241{
242 const field left = reduce_once();
243 const field right = other.reduce_once();
244 const bool t0 = left.data[3] > right.data[3];
245 const bool t1 = (left.data[3] == right.data[3]) && (left.data[2] > right.data[2]);
246 const bool t2 =
247 (left.data[3] == right.data[3]) && (left.data[2] == right.data[2]) && (left.data[1] > right.data[1]);
248 const bool t3 = (left.data[3] == right.data[3]) && (left.data[2] == right.data[2]) &&
249 (left.data[1] == right.data[1]) && (left.data[0] > right.data[0]);
250 return (t0 || t1 || t2 || t3);
251}
252
264template <class T> constexpr bool field<T>::operator<(const field& other) const noexcept
265{
266 return (other > *this);
267}
268
269template <class T> constexpr bool field<T>::operator==(const field& other) const noexcept
270{
271 // for both 254-bit fields and 256-bit fields, there are at most two representatives for each element of the prime
272 // field. This is because for 254-bit fields, the internal representation is in [0, 2*p) and for 256-bit feilds, the
273 // internal representation is an arbitrary `uint256_t`.
274 const field left = reduce_once();
275 const field right = other.reduce_once();
276 return (left.data[0] == right.data[0]) && (left.data[1] == right.data[1]) && (left.data[2] == right.data[2]) &&
277 (left.data[3] == right.data[3]);
278}
279
280template <class T> constexpr bool field<T>::operator!=(const field& other) const noexcept
281{
282 return (!operator==(other));
283}
284// to/from montgomery form methods.
285// We note that we do not need to perform extra reductions to run the from/to montgomery form algorithms for the
286// non-WASM builds. In the case of 254-bit fields, one way of saying this is: by the analysis in the field
287// documentation, as the constant we are multiplying by (aR) is less than p (r_squared, one_raw), for any 256-bit
288// number, the Montgomery multiplication algorithm will yield something in the range [0, 2p), i.e., in coarse form, as
289// desired. For 256-bit fields, that this is true again follows from the fact that the constant we are multiplying by is
290// less than p; hence the output of Montgomery multiplication of aR with a field element whose internal representation
291// is 256-bits will again be 256-bits. For more details, plesae see the field documentation.
292//
293// Note: For WASM builds, we do need the extra reduce_once() to ensure correctness with the 29-bit limb Montgomery
294// multiplication implementation.
295
296template <class T> constexpr field<T> field<T>::to_montgomery_form() const noexcept
297{
298 constexpr field r_squared =
299 field{ r_squared_uint.data[0], r_squared_uint.data[1], r_squared_uint.data[2], r_squared_uint.data[3] };
300 return *this * r_squared;
301}
303template <class T> constexpr field<T> field<T>::from_montgomery_form() const noexcept
305 constexpr field one_raw{ 1, 0, 0, 0 };
306 return operator*(one_raw);
307}
308
309template <class T> constexpr void field<T>::self_to_montgomery_form() & noexcept
310{
311 constexpr field r_squared =
312 field{ r_squared_uint.data[0], r_squared_uint.data[1], r_squared_uint.data[2], r_squared_uint.data[3] };
313 *this *= r_squared;
316template <class T> constexpr void field<T>::self_from_montgomery_form() & noexcept
318 constexpr field one_raw{ 1, 0, 0, 0 };
319 *this *= one_raw;
320}
321
322// Reduced versions - guarantee canonical form [0, p)
323template <class T> constexpr field<T> field<T>::to_montgomery_form_reduced() const noexcept
325 return to_montgomery_form().reduce_once();
326}
328template <class T> constexpr field<T> field<T>::from_montgomery_form_reduced() const noexcept
329{
330 return from_montgomery_form().reduce_once();
332
333template <class T> constexpr void field<T>::self_to_montgomery_form_reduced() & noexcept
335 self_to_montgomery_form();
336 self_reduce_once();
338
339template <class T> constexpr void field<T>::self_from_montgomery_form_reduced() & noexcept
340{
341 self_from_montgomery_form();
342 self_reduce_once();
343}
344
345template <class T> constexpr field<T> field<T>::reduce_once() const noexcept
346{
347 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
348 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
349 return reduce();
350 } else {
352 return reduce();
353 }
354 return asm_reduce_once(*this);
355 }
356}
358template <class T> constexpr void field<T>::self_reduce_once() & noexcept
359{
360 if constexpr (BBERG_NO_ASM || (T::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) ||
361 (T::modulus_1 == 0 && T::modulus_2 == 0 && T::modulus_3 == 0)) {
362 *this = reduce();
363 } else {
365 *this = reduce();
366 } else {
367 asm_self_reduce_once(*this);
368 }
370}
372template <class T> constexpr field<T> field<T>::pow(const uint256_t& exponent) const noexcept
373{
374 field accumulator{ data[0], data[1], data[2], data[3] };
375 field to_mul{ data[0], data[1], data[2], data[3] };
376 const uint64_t maximum_set_bit = exponent.get_msb();
377
378 for (int i = static_cast<int>(maximum_set_bit) - 1; i >= 0; --i) {
379 accumulator.self_sqr();
380 if (exponent.get_bit(static_cast<uint64_t>(i))) {
381 accumulator *= to_mul;
382 }
383 }
384 if (exponent == uint256_t(0)) {
385 accumulator = one();
386 } else if (*this == zero()) {
387 accumulator = zero();
388 }
389 return accumulator;
390}
391
392template <class T> constexpr field<T> field<T>::pow(const uint64_t exponent) const noexcept
393{
394 return pow({ exponent, 0, 0, 0 });
395}
396
397template <class T> constexpr field<T> field<T>::invert() const noexcept
398{
399 if (*this == zero()) {
400 bb::assert_failure("Trying to invert zero in the field");
401 }
402 return pow(modulus_minus_two);
403}
404
405template <class T> void field<T>::batch_invert(field* coeffs, const size_t n) noexcept
406{
407 batch_invert(std::span{ coeffs, n });
408}
409
410template <class T> void field<T>::batch_invert(std::span<field> coeffs) noexcept
411{
412 batch_invert<decltype(coeffs)>(coeffs);
413}
414
423template <class T>
424template <typename C>
425 requires requires(C& c) {
426 { c.size() } -> std::convertible_to<size_t>;
427 { c[0] };
428 }
429void field<T>::batch_invert(C& coeffs) noexcept
430{
431 const size_t n = coeffs.size();
432
433 std::vector<field> temporaries;
434 std::vector<bool> skipped;
435 temporaries.reserve(n);
436 skipped.reserve(n);
437
438 field accumulator = one();
439 for (size_t i = 0; i < n; ++i) {
440 temporaries[i] = accumulator;
441 if (coeffs[i].is_zero()) {
442 skipped[i] = true;
443 } else {
444 skipped[i] = false;
445 accumulator *= coeffs[i];
446 }
447 }
448 accumulator = accumulator.invert();
449
450 field T0;
451 for (size_t i = n - 1; i < n; --i) {
452 if (!skipped[i]) {
453 T0 = accumulator * temporaries[i];
454 accumulator *= coeffs[i];
455 coeffs[i] = T0;
456 }
457 }
458}
459
471template <class T> constexpr field<T> field<T>::tonelli_shanks_sqrt() const noexcept
472{
473 if (is_zero()) {
474 return field::zero();
475 }
476 if (*this == field::one()) {
477 return field::one();
478 }
479 // Tonelli-shanks algorithm begins by finding integers Q, S, with Q odd, such that (p - 1) = Q.2^{S}.
480 // We can determine s by counting the least significant set bit of `p - 1`. We pick elements `r, g` such that g =
481 // r^Q and r is not a square. (the coset generators are all nonresidues and satisfy this condition). This forces g
482 // to have order exactly 2^{S}.
483 //
484 // To find the square root of `u`, consider `v = u^((Q - 1)/2)`
485 // There exists an integer `e` where uv^2 = g^e (see Theorem 3.1 in paper -- the point is that uv^2 has 2-primary
486 // order). If `u` is a square, `e` is even and (uvg^{−e/2})^2 = u^2v^2g^e = u^{Q+1}g^{-e} = u
487 //
488 // The goal of the algorithm is two fold:
489 // 1. find `e` given `u`. (Discrete log is easy for 2-primary groups; we used an optimized chunking strategy.)
490 // 2. compute `sqrt(u) = uvg^{−e/2}`
491
492 // -----------------------------------------------------------------------------------------
493 // STEP 1: Compute the initial values v, uv, and uvv
494 // -----------------------------------------------------------------------------------------
495 // Q is the odd part of (p - 1), i.e., (p - 1) = Q * 2^S where S = primitive_root_log_size().
496 constexpr uint256_t Q = (modulus - 1) >> static_cast<uint64_t>(primitive_root_log_size());
497 constexpr uint256_t Q_minus_one_over_two = (Q - 1) >> 1;
498
499 field v = pow(Q_minus_one_over_two);
500 field uv = operator*(v);
501 // uvv = uv * v = u^{(Q+1)/2} * u^{(Q-1)/2} = u^Q
502 // By Theorem 3.1, uvv lies in the 2-primary subgroup generated by g, so uvv = g^e for some integer e.
503 field uvv = uv * v;
504
505 // -----------------------------------------------------------------------------------------
506 // STEP 2: Check if u is a quadratic residue
507 // -----------------------------------------------------------------------------------------
508 // u is a quadratic residue iff u^{(p-1)/2} = 1.
509 // Since uv^2 = u^Q and (p-1)/2 = Q * 2^{S-1}, we have u^{(p-1)/2} = (uv^2)^{2^{S-1}}.
510 // So we square uv^2 exactly (S-1) times and check if the result is 1.
511 field check = uvv;
512 for (size_t i = 0; i < primitive_root_log_size() - 1; ++i) {
513 check.self_sqr();
514 }
515 if (check != field::one()) {
516 // u is not a quadratic residue; return 0 to indicate no square root exists.
517 return field::zero();
518 }
519
520 // -----------------------------------------------------------------------------------------
521 // STEP 3: Set up precomputed lookup tables for the discrete log computation
522 // -----------------------------------------------------------------------------------------
523 // g = r^Q where r is a quadratic non-residue (coset_generator).
524 // Since r has order (p-1) and Q is the odd part, g has order exactly 2^S.
525 constexpr field g = coset_generator().pow(Q);
526
527 // g_inv = g^{-1} = r^{-Q} = r^{p-1-Q}
528 constexpr field g_inv = coset_generator().pow(modulus - 1 - Q);
529
530 // S = primitive_root_log_size() is the 2-adic valuation of (p-1), i.e., the largest power of 2 dividing (p-1).
531 constexpr size_t root_bits = primitive_root_log_size();
532
533 // table_bits (called 'w' in Bernstein's paper) determines the chunk size for the discrete log.
534 // We process the exponent e in chunks of table_bits bits at a time.
535 // Using 6 bits means tables of size 64, balancing memory usage vs. number of iterations.
536 constexpr size_t table_bits = 6;
537
538 // num_tables = ceil(S / table_bits)
539 // WARNING: this will have to be slightly changed if root_bits is exactly divisible by table_bits.
540 constexpr size_t num_tables = root_bits / table_bits + (root_bits % table_bits != 0 ? 1 : 0);
541 constexpr size_t num_offset_tables = num_tables - 1;
542
543 // table_size = 2^table_bits = 64 entries per table.
544 constexpr size_t table_size = static_cast<size_t>(1UL) << table_bits;
545
546 using GTable = std::array<field, table_size>;
547
548 // get_g_table(h) returns [h^0, h^1, h^2, ..., h^{table_size-1}].
549 // This allows O(1) lookup of h^k for any k in [0, table_size).
550 constexpr auto get_g_table = [&](const field& h) {
551 GTable result;
552 result[0] = 1;
553 for (size_t i = 1; i < table_size; ++i) {
554 result[i] = result[i - 1] * h;
555 }
556 return result;
557 };
558
559 // g_tables[i] contains powers of g_inv^{2^{table_bits * i}}.
560 // This allows us to compute g_inv^{e} efficiently by decomposing e into table_bits-sized chunks.
561 // g_tables[i][k] = g_inv^{k * 2^{table_bits * i}}
562 constexpr std::array<GTable, num_tables> g_tables = [&]() {
563 field working_base = g_inv;
565 for (size_t i = 0; i < num_tables; ++i) {
566 result[i] = get_g_table(working_base);
567 // Square table_bits times to get g_inv^{2^{table_bits * (i+1)}}
568 for (size_t j = 0; j < table_bits; ++j) {
569 working_base.self_sqr();
570 }
571 }
572 return result;
573 }();
574
575 // offset_g_tables handle the case where root_bits is not a multiple of table_bits.
576 // The first chunk may have fewer than table_bits bits, so we need offset tables
577 // that start from g_inv^{2^{root_bits % table_bits}} instead of g_inv.
578 constexpr std::array<GTable, num_offset_tables> offset_g_tables = [&]() {
579 field working_base = g_inv;
580 // Skip ahead by (root_bits % table_bits) squarings to align with the chunk boundaries.
581 for (size_t i = 0; i < root_bits % table_bits; ++i) {
582 working_base.self_sqr();
583 }
585 for (size_t i = 0; i < num_offset_tables; ++i) {
586 result[i] = get_g_table(working_base);
587 for (size_t j = 0; j < table_bits; ++j) {
588 working_base.self_sqr();
589 }
590 }
591 return result;
592 }();
593
594 // root_table_a and root_table_b are used to find the discrete log in each chunk.
595 // They contain powers of g (not g_inv) so we can match against uvv raised to appropriate powers.
596 // root_table_a: powers of g^{2^{(num_tables-1) * table_bits}} - used for the first (most significant) chunk.
597 constexpr GTable root_table_a = get_g_table(g.pow(1UL << ((num_tables - 1) * table_bits)));
598 // root_table_b: powers of g^{2^{root_bits - table_bits}} - used for subsequent chunks.
599 constexpr GTable root_table_b = get_g_table(g.pow(1UL << (root_bits - table_bits)));
600
601 // -----------------------------------------------------------------------------------------
602 // STEP 4: Compute powers of uvv for the chunked discrete log
603 // -----------------------------------------------------------------------------------------
604 // uvv_powers[i] = (uv^2)^{2^{table_bits * i}}
605 // These are the values we'll use to extract each chunk of the exponent e.
607 field base = uvv;
608 for (size_t i = 0; i < num_tables - 1; ++i) {
609 uvv_powers[i] = base;
610 for (size_t j = 0; j < table_bits; ++j) {
611 base.self_sqr();
612 }
613 }
614 uvv_powers[num_tables - 1] = base;
615
616 // -----------------------------------------------------------------------------------------
617 // STEP 5: Extract the chunks of e
618 // -----------------------------------------------------------------------------------------
619 // We find e such that uv^2 = g^e by determining e chunk by chunk, from most significant to least.
620 // e_slices[i] will hold the i-th chunk of e (each chunk is table_bits bits).
622 for (size_t i = 0; i < num_tables; ++i) {
623 // Process chunks from most significant (table_index = num_tables - 1) to least significant.
624 size_t table_index = num_tables - 1 - i;
625
626 // Start with (uv^2)^{2^{table_bits * table_index}}.
627 field target = uvv_powers[table_index];
628
629 // Correct target using previously discovered chunks.
630 // This removes the contribution of higher-order chunks so we can isolate this chunk.
631 for (size_t j = 0; j < i; ++j) {
632 size_t e_idx = num_tables - 1 - (i - 1) + j;
633 size_t g_idx = num_tables - 2 - j;
634
635 field g_lookup;
636 if (j != i - 1) {
637 g_lookup = offset_g_tables[g_idx - 1][e_slices[e_idx]];
638 } else {
639 g_lookup = g_tables[g_idx][e_slices[e_idx]];
640 }
641 target *= g_lookup;
642 }
643
644 // Search for target in the appropriate root table.
645 // target should equal g^{e_slice * 2^{...}} for some e_slice in [0, table_size).
646 size_t count = 0;
647
648 if (i == 0) {
649 // First iteration: use root_table_a for the most significant chunk.
650 for (auto& x : root_table_a) {
651 if (x == target) {
652 break;
653 }
654 count += 1;
655 }
656 } else {
657 // Subsequent iterations: use root_table_b.
658 for (auto& x : root_table_b) {
659 if (x == target) {
660 break;
661 }
662 count += 1;
663 }
664 }
665
666 if (count == table_size) {
667 // This should never happen if u is a valid quadratic residue.
668 bb::assert_failure("Tonelli-Shanks: count == table_size");
669 }
670 e_slices[table_index] = count;
671 }
672
673 // -----------------------------------------------------------------------------------------
674 // STEP 6: Compute e/2 from the slice representation
675 // -----------------------------------------------------------------------------------------
676 // We need g^{-e/2}, so we must divide e by 2.
677 // Since e is even (guaranteed by Theorem 3.1 for quadratic residues), this is exact.
678 // We perform the division on the slice representation by right-shifting each slice
679 // and propagating any "borrow" (the shifted-out bit) to the next slice.
680 for (size_t i = 0; i < num_tables; ++i) {
681 auto& e_slice = e_slices[num_tables - 1 - i];
682 // e_slices[num_tables - 1] (the most significant slice) is always even by Theorem 3.1.
683 // If a slice is odd, the bit that gets shifted out must be added to the previous slice
684 // (which represents higher powers of 2).
685 if ((e_slice & 1UL) == 1UL) {
686 // borrow_value is 2^{table_bits - 1} normally, but for the boundary between
687 // the first chunk (which may be smaller) and second chunk, we use the remainder size.
688 size_t borrow_value = (i == 1) ? 1UL << ((root_bits % table_bits) - 1) : (1UL << (table_bits - 1));
689 e_slices[num_tables - i] += borrow_value;
690 }
691 e_slice >>= 1;
692 }
693
694 // -----------------------------------------------------------------------------------------
695 // STEP 7: Compute g^{-e/2} from the slices and return the final square root
696 // -----------------------------------------------------------------------------------------
697 // g^{-e/2} = product of g_inv^{slice[i] * 2^{table_bits * i}} for all chunks i.
698 // We look up each term in the appropriate precomputed table.
699 field g_pow_minus_e_over_2 = 1;
700 for (size_t i = 0; i < num_tables; ++i) {
701 if (i == 0) {
702 g_pow_minus_e_over_2 *= g_tables[i][e_slices[num_tables - 1 - i]];
703 } else {
704 g_pow_minus_e_over_2 *= offset_g_tables[i - 1][e_slices[num_tables - 1 - i]];
705 }
706 }
707 // Final result: sqrt(u) = uv * g^{-e/2} = u^{(Q+1)/2} * g^{-e/2}
708 auto result = uv * g_pow_minus_e_over_2;
709 if (result * result != *this) {
710 bb::assert_failure("Tonelli-Shanks sqrt verification failed");
711 }
712 return result;
713}
714
715template <class T>
716constexpr std::pair<bool, field<T>> field<T>::sqrt() const noexcept
717 requires((T::modulus_0 & 0x3UL) == 0x3UL)
718{
719 constexpr uint256_t sqrt_exponent = (modulus + uint256_t(1)) >> 2;
720 field root = pow(sqrt_exponent);
721 if ((root * root) == (*this)) {
722 return std::pair<bool, field>(true, root);
723 }
724 return std::pair<bool, field>(false, field::zero());
725}
726
727template <class T>
728constexpr std::pair<bool, field<T>> field<T>::sqrt() const noexcept
729 requires((T::modulus_0 & 0x3UL) != 0x3UL)
730{
731 field root = tonelli_shanks_sqrt();
732 if ((root * root) == (*this)) {
733 return std::pair<bool, field>(true, root);
734 }
735 return std::pair<bool, field>(false, field::zero());
736}
737
738template <class T> constexpr field<T> field<T>::operator/(const field& other) const noexcept
739{
740 return operator*(other.invert());
741}
742
743template <class T> constexpr field<T>& field<T>::operator/=(const field& other) & noexcept
744{
745 *this = operator/(other);
746 return *this;
747}
748
749template <class T> constexpr void field<T>::self_set_msb() & noexcept
750{
751 data[3] = 0ULL | (1ULL << 63ULL);
752}
753
754template <class T> constexpr bool field<T>::is_msb_set() const noexcept
755{
756 return (data[3] >> 63ULL) == 1ULL;
757}
758
759template <class T> constexpr uint64_t field<T>::is_msb_set_word() const noexcept
760{
761 return (data[3] >> 63ULL);
762}
763
764template <class T> constexpr bool field<T>::is_zero() const noexcept
765{
766 return ((data[0] | data[1] | data[2] | data[3]) == 0) ||
767 (data[0] == T::modulus_0 && data[1] == T::modulus_1 && data[2] == T::modulus_2 && data[3] == T::modulus_3);
768}
769
770template <class T> constexpr field<T> field<T>::get_root_of_unity(size_t subgroup_size) noexcept
771{
772#if defined(__SIZEOF_INT128__) && !defined(__wasm__)
773 field r{ T::primitive_root_0, T::primitive_root_1, T::primitive_root_2, T::primitive_root_3 };
774#else
775 field r{ T::primitive_root_wasm_0, T::primitive_root_wasm_1, T::primitive_root_wasm_2, T::primitive_root_wasm_3 };
776#endif
777 for (size_t i = primitive_root_log_size(); i > subgroup_size; --i) {
778 r.self_sqr();
779 }
780 return r;
781}
782
784{
785 if (engine == nullptr) {
787 }
788 constexpr field pow_2_256 = field(uint256_t(1) << 128).sqr();
789 field lo;
790 field hi;
793 return lo + (pow_2_256 * hi);
794}
795
796template <class T> constexpr size_t field<T>::primitive_root_log_size() noexcept
797{
798 uint256_t target = modulus - 1;
799 size_t result = 0;
800 while (!target.get_bit(result)) {
801 ++result;
802 }
803 return result;
804}
805
806// This function is used to serialize a field. It matches the old serialization format by first
807// converting the field from Montgomery form, which is a special representation used for efficient
808// modular arithmetic.
809template <class Params> void field<Params>::msgpack_pack(auto& packer) const
810{
811 // The field is first converted from Montgomery form to canonical [0, p) representation.
812 auto adjusted = from_montgomery_form_reduced();
813
814 // The data is then converted to big endian format using htonll, which stands for "host to network long
815 // long". This is necessary because the data will be written to a raw msgpack buffer, which requires big
816 // endian format.
817 uint64_t bin_data[4] = {
818 htonll(adjusted.data[3]), htonll(adjusted.data[2]), htonll(adjusted.data[1]), htonll(adjusted.data[0])
819 };
820
821 // The packer is then used to write the binary data to the buffer, just like in the old format.
822 packer.pack_bin(sizeof(bin_data));
823 packer.pack_bin_body((const char*)bin_data, sizeof(bin_data)); // NOLINT
824}
825
826// This function is used to deserialize a field. It also matches the old deserialization format by
827// reading the binary data as big endian uint64_t's, correcting them to the host endianness, and
828// then converting the field back to Montgomery form.
829template <class Params> void field<Params>::msgpack_unpack(auto o)
830{
831 // The binary data is first extracted from the msgpack object.
832 std::array<uint8_t, sizeof(data)> raw_data = o;
833
834 // The binary data is then read as big endian uint64_t's. This is done by casting the raw data to uint64_t*
835 // and then using ntohll ("network to host long long") to correct the endianness to the host's endianness.
836 uint64_t* cast_data = (uint64_t*)&raw_data[0]; // NOLINT
837 uint64_t reversed[] = { ntohll(cast_data[3]), ntohll(cast_data[2]), ntohll(cast_data[1]), ntohll(cast_data[0]) };
838
839 // The corrected data is then copied back into the field's data array.
840 for (int i = 0; i < 4; i++) {
841 data[i] = reversed[i];
842 }
843
844 // Finally, the field is converted back to Montgomery form, just like in the old format.
845 *this = to_montgomery_form_reduced();
846}
847
848} // namespace bb
849
850// clang-format off
851// NOLINTEND(cppcoreguidelines-avoid-c-arrays)
852// clang-format on
virtual uint256_t get_random_uint256()=0
constexpr bool get_bit(uint64_t bit_index) const
const std::vector< MemoryValue > data
numeric::RNG & engine
#define BBERG_NO_ASM
RNG & get_randomness()
Definition engine.cpp:230
Entry point for Barretenberg command-line interface.
Definition api.hpp:5
Univariate< Fr, domain_end > operator+(const Fr &ff, const Univariate< Fr, domain_end > &uv)
void assert_failure(std::string const &err)
Definition assert.cpp:11
Univariate< Fr, domain_end > operator*(const Fr &ff, const Univariate< Fr, domain_end > &uv)
constexpr void g(state_array &state, size_t a, size_t b, size_t c, size_t d, uint32_t x, uint32_t y)
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
General class for prime fields see Prime field documentation["field documentation"] for general imple...
BB_INLINE constexpr field from_montgomery_form_reduced() const noexcept
static constexpr field get_root_of_unity(size_t subgroup_size) noexcept
BB_INLINE constexpr field & operator+=(const field &other) &noexcept
BB_INLINE constexpr void self_to_montgomery_form_reduced() &noexcept
static constexpr field one()
BB_INLINE constexpr void self_reduce_once() &noexcept
BB_INLINE constexpr bool operator!=(const field &other) const noexcept
BB_INLINE constexpr field operator*(const field &other) const noexcept
BB_INLINE constexpr field operator+(const field &other) const noexcept
constexpr field tonelli_shanks_sqrt() const noexcept
Implements an optimized variant of Tonelli-Shanks via lookup tables. Algorithm taken from https://cr....
BB_INLINE constexpr void self_from_montgomery_form_reduced() &noexcept
BB_INLINE constexpr field to_montgomery_form() const noexcept
BB_INLINE constexpr void self_conditional_negate(uint64_t predicate) &noexcept
BB_INLINE constexpr field pow(const uint256_t &exponent) const noexcept
BB_INLINE constexpr field operator++() noexcept
constexpr field operator/(const field &other) const noexcept
BB_INLINE constexpr void self_sqr() &noexcept
constexpr field invert() const noexcept
BB_INLINE constexpr void self_neg() &noexcept
BB_INLINE constexpr bool operator==(const field &other) const noexcept
BB_INLINE constexpr bool is_msb_set() const noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept
BB_INLINE constexpr field sqr() const noexcept
BB_INLINE constexpr bool operator>(const field &other) const noexcept
Greater-than operator.
BB_INLINE constexpr field to_montgomery_form_reduced() const noexcept
BB_INLINE constexpr field & operator-=(const field &other) &noexcept
void msgpack_pack(auto &packer) const
constexpr std::pair< bool, field > sqrt() const noexcept
Compute square root of the field element.
BB_INLINE constexpr void self_from_montgomery_form() &noexcept
BB_INLINE constexpr field & operator*=(const field &other) &noexcept
BB_INLINE constexpr bool is_zero() const noexcept
static void batch_invert(C &coeffs) noexcept
Batch invert a collection of field elements using Montgomery's trick.
BB_INLINE constexpr void self_to_montgomery_form() &noexcept
BB_INLINE constexpr field from_montgomery_form() const noexcept
BB_INLINE constexpr field operator-() const noexcept
BB_INLINE constexpr bool operator<(const field &other) const noexcept
Less-than operator.
BB_INLINE constexpr void self_set_msb() &noexcept
void msgpack_unpack(auto o)
constexpr field & operator/=(const field &other) &noexcept
static constexpr size_t primitive_root_log_size() noexcept
BB_INLINE constexpr field reduce_once() const noexcept
static constexpr field zero()
BB_INLINE constexpr uint64_t is_msb_set_word() const noexcept