Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
scalar_multiplication.hpp
Go to the documentation of this file.
1// === AUDIT STATUS ===
2// internal: { status: Planned, auditors: [Sergei], commit: }
3// external_1: { status: not started, auditors: [], commit: }
4// external_2: { status: not started, auditors: [], commit: }
5// =====================
6
7#pragma once
10
14
15#include "./bitvector.hpp"
16#include "./process_buckets.hpp"
18
19template <typename Curve> class MSM {
20 public:
21 using Element = typename Curve::Element;
23 using BaseField = typename Curve::BaseField;
25
26 static constexpr size_t NUM_BITS_IN_FIELD = ScalarField::modulus.get_msb() + 1;
27
28 // ======================= Algorithm Tuning Constants =======================
29 //
30 // These constants control the behavior of the Pippenger MSM algorithm.
31 // They are empirically tuned for performance on typical hardware.
32
33 // Below this threshold, use naive scalar multiplication instead of Pippenger
34 static constexpr size_t PIPPENGER_THRESHOLD = 16;
35
36 // Below this threshold, the affine batch inversion trick is not beneficial
37 // (cost of inversions exceeds savings from cheaper affine additions)
38 static constexpr size_t AFFINE_TRICK_THRESHOLD = 128;
39
40 // Maximum bits per scalar slice (2^20 = 1M buckets, far beyond practical use)
41 static constexpr size_t MAX_SLICE_BITS = 20;
42
43 // Number of points to look ahead for memory prefetching
44 static constexpr size_t PREFETCH_LOOKAHEAD = 32;
45
46 // Prefetch every N iterations (must be power of 2); mask is N-1 for efficient modulo
47 static constexpr size_t PREFETCH_INTERVAL = 16;
48 static constexpr size_t PREFETCH_INTERVAL_MASK = PREFETCH_INTERVAL - 1;
49
50 // ======================= Cost Model Constants =======================
51 //
52 // These constants define the relative costs of various operations,
53 // used to decide between algorithm variants.
54
55 // Cost of bucket accumulation relative to a single point addition
56 // (2 Jacobian adds per bucket, each ~2.5x cost of affine add)
57 static constexpr size_t BUCKET_ACCUMULATION_COST = 5;
58
59 // Field multiplications saved per group operation when using affine trick
60 static constexpr size_t AFFINE_TRICK_SAVINGS_PER_OP = 5;
61
62 // Extra cost of Jacobian group operation when Z coordinate != 1
63 static constexpr size_t JACOBIAN_Z_NOT_ONE_PENALTY = 5;
64
65 // Cost of computing 4-bit lookup table for modular exponentiation (14 muls)
66 static constexpr size_t INVERSION_TABLE_COST = 14;
67 // ===========================================================================
68
69 // Offset generator used in bucket reduction to probabilistically avoid incomplete-addition
70 // edge cases in the accumulator. Derived from domain-separated precomputed generators.
72 {
73 static const AffineElement offset_generator = []() {
75 return get_precomputed_generators<typename Curve::Group, "ECCVM_OFFSET_GENERATOR", 1>()[0];
76 } else {
77 return get_precomputed_generators<typename Curve::Group, "DEFAULT_DOMAIN_SEPARATOR", 8>()[0];
78 }
79 }();
80 return offset_generator;
81 }
82
91 struct MSMWorkUnit {
92 size_t batch_msm_index = 0;
93 size_t start_index = 0;
94 size_t size = 0;
95 };
97
102 struct MSMData {
103 std::span<const ScalarField> scalars; // Scalars (non-Montgomery form)
104 std::span<const AffineElement> points; // Input points
105 std::span<const uint32_t> scalar_indices; // Indices of nonzero scalars
106 std::span<uint64_t> point_schedule; // Scratch space for point scheduling
107
112 static MSMData from_work_unit(std::span<std::span<ScalarField>> all_scalars,
113 std::span<std::span<const AffineElement>> all_points,
114 const std::vector<std::vector<uint32_t>>& all_indices,
115 std::span<uint64_t> point_schedule_buffer,
116 const MSMWorkUnit& work_unit) noexcept
117 {
118 return MSMData{
119 .scalars = all_scalars[work_unit.batch_msm_index],
120 .points = all_points[work_unit.batch_msm_index],
121 .scalar_indices =
122 std::span<const uint32_t>{ &all_indices[work_unit.batch_msm_index][work_unit.start_index],
123 work_unit.size },
124 .point_schedule = point_schedule_buffer,
125 };
126 }
127 };
128
137 std::vector<AffineElement> buckets;
139
140 BucketAccumulators(size_t num_buckets) noexcept
141 : buckets(num_buckets)
142 , bucket_exists(num_buckets)
143 {}
144 };
145
154 std::vector<Element> buckets;
156
158 : buckets(num_buckets)
159 , bucket_exists(num_buckets)
160 {}
161 };
166 static constexpr size_t BATCH_SIZE = 2048;
167 // when adding affine points, we have an edge case where the number of points in the batch can overflow by 2
168 static constexpr size_t BATCH_OVERFLOW_SIZE = 2;
169 std::vector<AffineElement> points_to_add;
170 std::vector<BaseField> inversion_scratch_space; // Used for Montgomery batch inversion denominators
172 AffineElement null_location{}; // Dummy write target for branchless conditional moves
173
179 };
180
186 uint64_t data;
187
188 [[nodiscard]] static constexpr PointScheduleEntry create(uint32_t point_index, uint32_t bucket_index) noexcept
189 {
190 return { (static_cast<uint64_t>(point_index) << 32) | bucket_index };
191 }
192 [[nodiscard]] constexpr uint32_t point_index() const noexcept { return static_cast<uint32_t>(data >> 32); }
193 [[nodiscard]] constexpr uint32_t bucket_index() const noexcept { return static_cast<uint32_t>(data); }
194 };
195
196 // ======================= Public Methods =======================
197 // See README.md for algorithm details and mathematical derivations.
198
204 static AffineElement msm(std::span<const AffineElement> points,
206 bool handle_edge_cases = false) noexcept;
207
213 static std::vector<AffineElement> batch_multi_scalar_mul(std::span<std::span<const AffineElement>> points,
214 std::span<std::span<ScalarField>> scalars,
215 bool handle_edge_cases = true) noexcept;
216
217 // ======================= Test-Visible Methods =======================
218 // Exposed for unit testing; not part of the public API.
219
220 static uint32_t get_num_rounds(size_t num_points) noexcept
221 {
222 const uint32_t bits_per_slice = get_optimal_log_num_buckets(num_points);
223 return static_cast<uint32_t>((NUM_BITS_IN_FIELD + bits_per_slice - 1) / bits_per_slice);
224 }
225
227 static void add_affine_points(AffineElement* points,
228 const size_t num_points,
229 typename Curve::BaseField* scratch_space) noexcept;
230
232 static uint32_t get_scalar_slice(const ScalarField& scalar, size_t round, size_t slice_size) noexcept;
233
235 static uint32_t get_optimal_log_num_buckets(size_t num_points) noexcept;
236
239 std::span<const AffineElement> points,
240 AffineAdditionData& affine_data,
241 BucketAccumulators& bucket_data) noexcept;
242
244 template <typename BucketType> static Element accumulate_buckets(BucketType& bucket_accumulators) noexcept
245 {
246 auto& buckets = bucket_accumulators.buckets;
247 BB_ASSERT_DEBUG(buckets.size() > static_cast<size_t>(0));
248 int starting_index = static_cast<int>(buckets.size() - 1);
249 Element running_sum;
250 bool found_start = false;
251 while (!found_start && starting_index > 0) {
252 const size_t idx = static_cast<size_t>(starting_index);
253 if (bucket_accumulators.bucket_exists.get(idx)) {
254
255 running_sum = buckets[idx];
256 found_start = true;
257 } else {
258 starting_index -= 1;
259 }
260 }
261 if (!found_start) {
262 return Curve::Group::point_at_infinity;
263 }
264 BB_ASSERT_DEBUG(starting_index > 0);
265 const auto& offset_generator = get_offset_generator();
266 Element sum = running_sum + offset_generator;
267 for (int i = starting_index - 1; i > 0; --i) {
268 size_t idx = static_cast<size_t>(i);
269 BB_ASSERT_DEBUG(idx < bucket_accumulators.bucket_exists.size());
270 if (bucket_accumulators.bucket_exists.get(idx)) {
271 running_sum += buckets[idx];
272 }
273 sum += running_sum;
274 }
275 return sum - offset_generator;
276 }
277
278 private:
279 // ======================= Private Implementation =======================
280
283 std::vector<uint32_t>& nonzero_scalar_indices) noexcept;
284
287 std::vector<std::vector<uint32_t>>& msm_scalar_indices) noexcept;
288
290 static bool use_affine_trick(size_t num_points, size_t num_buckets) noexcept;
291
293 static Element jacobian_pippenger_with_transformed_scalars(MSMData& msm_data) noexcept;
294
296 static Element affine_pippenger_with_transformed_scalars(MSMData& msm_data) noexcept;
297
298 // Helpers for batch_accumulate_points_into_buckets. Inlined for performance.
299
300 // Process single point: if bucket has accumulator, pair them for addition; else cache in bucket.
301 __attribute__((always_inline)) static void process_single_point(size_t bucket,
305 size_t& scratch_it,
306 size_t& point_it) noexcept
307 {
308 bool has_accumulator = bucket_data.bucket_exists.get(bucket);
309 if (has_accumulator) {
311 affine_data.points_to_add[scratch_it + 1] = bucket_data.buckets[bucket];
312 bucket_data.bucket_exists.set(bucket, false);
313 affine_data.addition_result_bucket_destinations[scratch_it >> 1] = static_cast<uint32_t>(bucket);
314 scratch_it += 2;
315 } else {
316 bucket_data.buckets[bucket] = *point_source;
317 bucket_data.bucket_exists.set(bucket, true);
318 }
320 }
321
322 // Branchless bucket pair processing. Updates point_it (by 2 if same bucket, else 1) and scratch_it.
323 // See README.md "batch_accumulate_points_into_buckets Algorithm" for case analysis.
324 __attribute__((always_inline)) static void process_bucket_pair(size_t lhs_bucket,
330 size_t& scratch_it,
331 size_t& point_it) noexcept
332 {
333 bool has_bucket_accumulator = bucket_data.bucket_exists.get(lhs_bucket);
334 bool buckets_match = lhs_bucket == rhs_bucket;
335 bool do_affine_add = buckets_match || has_bucket_accumulator;
336
338
343
345 dest_bucket = do_affine_add ? static_cast<uint32_t>(lhs_bucket) : dest_bucket;
346
349
350 bucket_data.bucket_exists.set(lhs_bucket, (has_bucket_accumulator && buckets_match) || !do_affine_add);
352 point_it += (do_affine_add && buckets_match) ? 2 : 1;
353 }
354};
355
357template <typename Curve>
360 bool handle_edge_cases = true) noexcept;
361
363template <typename Curve>
364typename Curve::Element pippenger_unsafe(PolynomialSpan<const typename Curve::ScalarField> scalars,
365 std::span<const typename Curve::AffineElement> points) noexcept;
366
367extern template class MSM<curve::Grumpkin>;
368extern template class MSM<curve::BN254>;
369
370} // namespace bb::scalar_multiplication
#define BB_ASSERT_DEBUG(expression,...)
Definition assert.hpp:55
Custom class to handle packed vectors of bits.
Definition bitvector.hpp:21
typename Group::element Element
Definition grumpkin.hpp:64
typename grumpkin::g1 Group
Definition grumpkin.hpp:63
typename Group::affine_element AffineElement
Definition grumpkin.hpp:65
const AffineElement AffineAdditionData BucketAccumulators size_t & scratch_it
typename Curve::BaseField BaseField
static constexpr size_t BUCKET_ACCUMULATION_COST
static bool use_affine_trick(size_t num_points, size_t num_buckets) noexcept
Decide if batch inversion saves work vs Jacobian additions.
static Element jacobian_pippenger_with_transformed_scalars(MSMData &msm_data) noexcept
Pippenger using Jacobian buckets (handles edge cases: doubling, infinity)
__attribute__((always_inline)) static void process_single_point(size_t bucket
static Element accumulate_buckets(BucketType &bucket_accumulators) noexcept
Reduce buckets to single point using running (suffix) sum from high to low: R = sum(k * B_k)
static constexpr size_t INVERSION_TABLE_COST
static uint32_t get_scalar_slice(const ScalarField &scalar, size_t round, size_t slice_size) noexcept
Extract c-bit slice from scalar for bucket index computation.
static constexpr size_t PREFETCH_INTERVAL
static constexpr size_t PREFETCH_INTERVAL_MASK
const AffineElement AffineAdditionData & affine_data
static const AffineElement & get_offset_generator() noexcept
static AffineElement msm(std::span< const AffineElement > points, PolynomialSpan< const ScalarField > scalars, bool handle_edge_cases=false) noexcept
Main entry point for single MSM computation.
static Element affine_pippenger_with_transformed_scalars(MSMData &msm_data) noexcept
Pippenger using affine buckets with batch inversion (faster, no edge case handling)
static constexpr size_t AFFINE_TRICK_SAVINGS_PER_OP
size_t const AffineElement const AffineElement * rhs_source_if_match
static void add_affine_points(AffineElement *points, const size_t num_points, typename Curve::BaseField *scratch_space) noexcept
Batch add n/2 independent point pairs using Montgomery's trick.
const AffineElement AffineAdditionData BucketAccumulators & bucket_data
static constexpr size_t PREFETCH_LOOKAHEAD
static constexpr size_t PIPPENGER_THRESHOLD
static uint32_t get_num_rounds(size_t num_points) noexcept
static constexpr size_t JACOBIAN_Z_NOT_ONE_PENALTY
__attribute__((always_inline)) static void process_bucket_pair(size_t lhs_bucket
std::vector< MSMWorkUnit > ThreadWorkUnits
static std::vector< ThreadWorkUnits > get_work_units(std::span< std::span< ScalarField > > scalars, std::vector< std::vector< uint32_t > > &msm_scalar_indices) noexcept
Distribute multiple MSMs across threads with balanced point counts.
size_t const AffineElement * lhs_source
static constexpr size_t NUM_BITS_IN_FIELD
static uint32_t get_optimal_log_num_buckets(size_t num_points) noexcept
Compute optimal bits per slice by minimizing cost over c in [1, MAX_SLICE_BITS)
static std::vector< AffineElement > batch_multi_scalar_mul(std::span< std::span< const AffineElement > > points, std::span< std::span< ScalarField > > scalars, bool handle_edge_cases=true) noexcept
Compute multiple MSMs in parallel with work balancing.
static constexpr size_t AFFINE_TRICK_THRESHOLD
static void batch_accumulate_points_into_buckets(std::span< const uint64_t > point_schedule, std::span< const AffineElement > points, AffineAdditionData &affine_data, BucketAccumulators &bucket_data) noexcept
Process sorted point schedule into bucket accumulators using batched affine additions.
typename Curve::ScalarField ScalarField
const AffineElement AffineAdditionData BucketAccumulators size_t size_t &point_it noexcept
typename Curve::AffineElement AffineElement
static void transform_scalar_and_get_nonzero_scalar_indices(std::span< ScalarField > scalars, std::vector< uint32_t > &nonzero_scalar_indices) noexcept
Convert scalars from Montgomery form and collect indices of nonzero scalars.
bb::curve::BN254::Element Element
Curve::Element pippenger(PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool handle_edge_cases) noexcept
Safe MSM wrapper (defaults to handle_edge_cases=true)
Curve::Element pippenger_unsafe(PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points) noexcept
Fast MSM wrapper for linearly independent points (no edge case handling)
Inner sum(Cont< Inner, Args... > const &in)
Definition container.hpp:70
constexpr std::span< const typename Group::affine_element > get_precomputed_generators()
@ BN254
Definition types.hpp:10
STL namespace.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
Scratch space for batched affine point additions (one per thread)
Affine bucket accumulators for the fast affine-trick Pippenger variant.
Jacobian bucket accumulators for the safe Pippenger variant.
Container for MSM input data passed between algorithm stages.
static MSMData from_work_unit(std::span< std::span< ScalarField > > all_scalars, std::span< std::span< const AffineElement > > all_points, const std::vector< std::vector< uint32_t > > &all_indices, std::span< uint64_t > point_schedule_buffer, const MSMWorkUnit &work_unit) noexcept
Factory method to construct MSMData from a work unit.
MSMWorkUnit describes an MSM that may be part of a larger MSM.
Packed point schedule entry: (point_index << 32) | bucket_index.
constexpr uint32_t point_index() const noexcept
uint64_t data
constexpr uint32_t bucket_index() const noexcept
static constexpr PointScheduleEntry create(uint32_t point_index, uint32_t bucket_index) noexcept