8#ifndef NO_MULTITHREADING
12uint32_t& get_num_cores_ref()
14 static thread_local const char* val =
std::getenv(
"HARDWARE_CONCURRENCY");
15 static thread_local uint32_t cores =
25#ifdef NO_MULTITHREADING
26 throw_or_abort(
"Cannot set hardware concurrency when multithreading is disabled.");
29 get_num_cores_ref() =
static_cast<uint32_t
>(num_cores);
35#ifdef NO_MULTITHREADING
38 return static_cast<size_t>(get_num_cores_ref());
111void parallel_for(
size_t num_iterations,
const std::function<
void(
size_t)>& func)
113#ifdef NO_MULTITHREADING
114 for (
size_t i = 0; i < num_iterations; ++i) {
118#ifdef OMP_MULTITHREADING
142 const std::function<
void(
size_t,
size_t)>& func,
143 size_t no_multhreading_if_less_or_equal)
145 if (num_points <= no_multhreading_if_less_or_equal) {
153 const size_t chunk_size = (num_points / num_cpus) + (num_points % num_cpus == 0 ? 0 : 1);
155 parallel_for(num_cpus, [num_points, chunk_size, &func](
size_t chunk_index) {
157 if (chunk_size * chunk_index > num_points) {
161 size_t current_chunk_size = std::min(num_points - (chunk_size * chunk_index), chunk_size);
162 if (current_chunk_size == 0) {
165 size_t start = chunk_index * chunk_size;
166 size_t end = chunk_index * chunk_size + current_chunk_size;
172 const std::function<
void(
size_t,
size_t,
size_t)>& func,
173 size_t heuristic_cost)
175 using namespace thread_heuristics;
180 const size_t chunk_size = (num_points / num_cpus) + (num_points % num_cpus == 0 ? 0 : 1);
183 const size_t offset_cost = (num_points - chunk_size) * heuristic_cost;
186 if (offset_cost < PARALLEL_FOR_COST) {
187 func(0, num_points, 0);
191 parallel_for(num_cpus, [num_points, chunk_size, &func](
size_t chunk_index) {
193 if (chunk_size * chunk_index > num_points) {
197 size_t current_chunk_size = std::min(num_points - (chunk_size * chunk_index), chunk_size);
198 if (current_chunk_size == 0) {
201 size_t start = chunk_index * chunk_size;
202 size_t end = chunk_index * chunk_size + current_chunk_size;
204 func(start, end, chunk_index);
211 const size_t thread_size = num_iterations / num_threads;
214 std::vector<size_t> start(num_threads);
215 std::vector<size_t> end(num_threads);
216 for (
size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
217 start[thread_idx] = thread_idx * thread_size;
218 end[thread_idx] = (thread_idx == num_threads - 1) ? num_iterations : (thread_idx + 1) * thread_size;
236 size_t desired_num_threads = num_iterations / min_iterations_per_thread;
237 size_t num_threads = std::min(desired_num_threads, max_num_threads);
238 num_threads = num_threads > 0 ? num_threads : 1;
uint32_t env_hardware_concurrency()
Entry point for Barretenberg command-line interface.
void parallel_for_mutex_pool(size_t num_iterations, const std::function< void(size_t)> &func)
MultithreadData calculate_thread_data(size_t num_iterations, size_t min_iterations_per_thread)
Calculates number of threads and index bounds for each thread.
void parallel_for_queued(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_moody(size_t num_iterations, const std::function< void(size_t)> &func)
size_t calculate_num_threads(size_t num_iterations, size_t min_iterations_per_thread)
calculates number of threads to create based on minimum iterations per thread
void parallel_for_atomic_pool(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_heuristic(size_t num_points, const std::function< void(size_t, size_t, size_t)> &func, size_t heuristic_cost)
Split a loop into several loops running in parallel based on operations in 1 iteration.
void parallel_for_spawning(size_t num_iterations, const std::function< void(size_t)> &func)
void set_parallel_for_concurrency(size_t num_cores)
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_omp(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
void throw_or_abort(std::string const &err)