diff --git a/components/suggest/benches/benchmark_all.rs b/components/suggest/benches/benchmark_all.rs index 26e401013..03b09ff61 100644 --- a/components/suggest/benches/benchmark_all.rs +++ b/components/suggest/benches/benchmark_all.rs @@ -24,10 +24,11 @@ fn run_benchmarks( benchmarks: Vec<(&'static str, B)>, ) { for (name, benchmark) in benchmarks { + let g_input = benchmark.global_input(); group.bench_function(name.to_string(), |b| { b.iter_batched( - || benchmark.generate_input(), - |input| benchmark.benchmarked_code(input), + || benchmark.iteration_input(), + |i_input| benchmark.benchmarked_code(&g_input, i_input), // See https://docs.rs/criterion/latest/criterion/enum.BatchSize.html#variants for // a discussion of this. PerIteration is chosen for these benchmarks because the // input holds a database file handle diff --git a/components/suggest/src/benchmarks/ingest.rs b/components/suggest/src/benchmarks/ingest.rs index 9a04c2dc2..99ad6cac6 100644 --- a/components/suggest/src/benchmarks/ingest.rs +++ b/components/suggest/src/benchmarks/ingest.rs @@ -48,9 +48,12 @@ impl IngestBenchmark { pub struct InputType(SuggestStoreInner); impl BenchmarkWithInput for IngestBenchmark { - type Input = InputType; + type GlobalInput = (); + type IterationInput = InputType; - fn generate_input(&self) -> Self::Input { + fn global_input(&self) -> Self::GlobalInput {} + + fn iteration_input(&self) -> Self::IterationInput { let data_path = self.temp_dir.path().join(unique_db_filename()); let store = SuggestStoreInner::new(data_path, vec![], self.client.clone()); store.ensure_db_initialized(); @@ -61,7 +64,7 @@ impl BenchmarkWithInput for IngestBenchmark { InputType(store) } - fn benchmarked_code(&self, input: Self::Input) { + fn benchmarked_code(&self, _: &Self::GlobalInput, input: Self::IterationInput) { let InputType(store) = input; store.ingest_records_by_type(self.record_type); } diff --git a/components/suggest/src/benchmarks/mod.rs b/components/suggest/src/benchmarks/mod.rs index 52efa276c..1a7245e28 100644 --- a/components/suggest/src/benchmarks/mod.rs +++ b/components/suggest/src/benchmarks/mod.rs @@ -10,7 +10,16 @@ //! //! All benchmarks are defined as structs that implement either the [Benchmark] or [BenchmarkWithInput] -use std::sync::atomic::{AtomicU32, Ordering}; +use std::{ + path::PathBuf, + sync::{ + atomic::{AtomicU32, Ordering}, + OnceLock, + }, +}; +use tempfile::TempDir; + +use crate::{SuggestIngestionConstraints, SuggestStore}; pub mod client; pub mod ingest; @@ -28,21 +37,52 @@ pub trait Benchmark { /// Trait for benchmarks that require input /// /// This will run using Criterion's `iter_batched` function. Criterion will create a batch of -/// inputs, then pass each one to benchmark. +/// inputs, then pass each one to the benchmark's iterations. /// -/// This supports simple benchmarks that don't require any input. Note: global setup can be done -/// in the `new()` method for the struct. +/// This supports simple benchmarks that don't require any input. pub trait BenchmarkWithInput { - type Input; + /// Input that will be created once and then passed by reference to each + /// of the benchmark's iterations. + type GlobalInput; - /// Generate the input (this is not included in the benchmark time) - fn generate_input(&self) -> Self::Input; + /// Input that will be created for each of the benchmark's iterations. + type IterationInput; + + /// Generate the global input (not included in the benchmark time) + fn global_input(&self) -> Self::GlobalInput; + + /// Generate the per-iteration input (not included in the benchmark time) + fn iteration_input(&self) -> Self::IterationInput; /// Perform the operations that we're benchmarking. - fn benchmarked_code(&self, input: Self::Input); + fn benchmarked_code(&self, g_input: &Self::GlobalInput, i_input: Self::IterationInput); } fn unique_db_filename() -> String { static COUNTER: AtomicU32 = AtomicU32::new(0); format!("db{}.sqlite", COUNTER.fetch_add(1, Ordering::Relaxed)) } + +/// Creates a new store that will contain all provider data currently in remote +/// settings. +fn new_store() -> SuggestStore { + // Create a "starter" store that will do an initial ingest, and then + // initialize every returned store with a copy of its DB so that each one + // doesn't need to reingest. + static STARTER: OnceLock<(TempDir, PathBuf)> = OnceLock::new(); + let (starter_dir, starter_db_path) = STARTER.get_or_init(|| { + let temp_dir = tempfile::tempdir().unwrap(); + let db_path = temp_dir.path().join(unique_db_filename()); + let store = + SuggestStore::new(&db_path.to_string_lossy(), None).expect("Error building store"); + store + .ingest(SuggestIngestionConstraints::all_providers()) + .expect("Error during ingestion"); + store.checkpoint(); + (temp_dir, db_path) + }); + + let db_path = starter_dir.path().join(unique_db_filename()); + std::fs::copy(starter_db_path, &db_path).expect("Error copying starter DB file"); + SuggestStore::new(&db_path.to_string_lossy(), None).expect("Error building store") +} diff --git a/components/suggest/src/benchmarks/query.rs b/components/suggest/src/benchmarks/query.rs index 30e41dc67..fc6b32d28 100644 --- a/components/suggest/src/benchmarks/query.rs +++ b/components/suggest/src/benchmarks/query.rs @@ -3,50 +3,39 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use crate::{ - benchmarks::{unique_db_filename, BenchmarkWithInput}, - SuggestIngestionConstraints, SuggestStore, SuggestionProvider, SuggestionQuery, + benchmarks::{new_store, BenchmarkWithInput}, + SuggestStore, SuggestionProvider, SuggestionQuery, }; pub struct QueryBenchmark { - store: SuggestStore, provider: SuggestionProvider, query: &'static str, } impl QueryBenchmark { pub fn new(provider: SuggestionProvider, query: &'static str) -> Self { - let temp_dir = tempfile::tempdir().unwrap(); - let data_path = temp_dir.path().join(unique_db_filename()); - let store = - SuggestStore::new(&data_path.to_string_lossy(), None).expect("Error building store"); - store - .ingest(SuggestIngestionConstraints::all_providers()) - .expect("Error during ingestion"); - Self { - store, - provider, - query, - } + Self { provider, query } } } -// The input for each benchmark a query to pass to the store -pub struct InputType(SuggestionQuery); - impl BenchmarkWithInput for QueryBenchmark { - type Input = InputType; + type GlobalInput = SuggestStore; + type IterationInput = SuggestionQuery; - fn generate_input(&self) -> Self::Input { - InputType(SuggestionQuery { + fn global_input(&self) -> Self::GlobalInput { + new_store() + } + + fn iteration_input(&self) -> Self::IterationInput { + SuggestionQuery { providers: vec![self.provider], keyword: self.query.to_string(), ..SuggestionQuery::default() - }) + } } - fn benchmarked_code(&self, input: Self::Input) { - let InputType(query) = input; - self.store + fn benchmarked_code(&self, store: &Self::GlobalInput, query: Self::IterationInput) { + store .query(query) .unwrap_or_else(|e| panic!("Error querying store: {e}")); } diff --git a/components/suggest/src/store.rs b/components/suggest/src/store.rs index 6a2620cf7..262a3407e 100644 --- a/components/suggest/src/store.rs +++ b/components/suggest/src/store.rs @@ -266,6 +266,16 @@ impl SuggestStore { } } +#[cfg(feature = "benchmark_api")] +impl SuggestStore { + /// Creates a WAL checkpoint. This will cause changes in the write-ahead log + /// to be written to the DB. See: + /// https://sqlite.org/pragma.html#pragma_wal_checkpoint + pub fn checkpoint(&self) { + self.inner.checkpoint(); + } +} + /// Constraints limit which suggestions to ingest from Remote Settings. #[derive(Clone, Default, Debug, uniffi::Record)] pub struct SuggestIngestionConstraints { @@ -751,6 +761,12 @@ where self.dbs().unwrap(); } + fn checkpoint(&self) { + let conn = self.dbs().unwrap().writer.conn.lock(); + conn.pragma_update(None, "wal_checkpoint", "TRUNCATE") + .expect("Error performing checkpoint"); + } + pub fn ingest_records_by_type(&self, ingest_record_type: SuggestRecordType) { let writer = &self.dbs().unwrap().writer; let mut context = MetricsContext::default();