diff --git a/components/suggest/benches/benchmark_all.rs b/components/suggest/benches/benchmark_all.rs index 03b09ff61..2a9df68af 100644 --- a/components/suggest/benches/benchmark_all.rs +++ b/components/suggest/benches/benchmark_all.rs @@ -2,7 +2,13 @@ use criterion::{ criterion_group, criterion_main, measurement::Measurement, BatchSize, BenchmarkGroup, Criterion, }; use std::sync::Once; -use suggest::benchmarks::{ingest, query, BenchmarkWithInput}; +use suggest::benchmarks::{geoname, ingest, query, BenchmarkWithInput}; + +pub fn geoname(c: &mut Criterion) { + setup_viaduct(); + let group = c.benchmark_group("geoname"); + run_benchmarks(group, geoname::all_benchmarks()) +} pub fn ingest(c: &mut Criterion) { setup_viaduct(); @@ -44,5 +50,5 @@ fn setup_viaduct() { INIT.call_once(viaduct_reqwest::use_reqwest_backend); } -criterion_group!(benches, ingest, query); +criterion_group!(benches, geoname, ingest, query); criterion_main!(benches); diff --git a/components/suggest/src/benchmarks/geoname.rs b/components/suggest/src/benchmarks/geoname.rs new file mode 100644 index 000000000..428784b28 --- /dev/null +++ b/components/suggest/src/benchmarks/geoname.rs @@ -0,0 +1,449 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::{ + benchmarks::{new_store, BenchmarkWithInput}, + geoname::{Geoname, GeonameType}, + SuggestStore, +}; + +pub struct GeonameBenchmark { + args: FetchGeonamesArgs, + should_match: bool, +} + +#[derive(Clone, Debug)] +pub struct FetchGeonamesArgs { + query: &'static str, + match_name_prefix: bool, + geoname_type: Option, + filter: Option>, +} + +pub struct IterationInput { + fetch_args: FetchGeonamesArgs, + should_match_message: String, +} + +impl BenchmarkWithInput for GeonameBenchmark { + type GlobalInput = SuggestStore; + type IterationInput = IterationInput; + + fn global_input(&self) -> Self::GlobalInput { + new_store() + } + + fn iteration_input(&self) -> Self::IterationInput { + let fetch_args = self.args.clone(); + // Format the message now so it doesn't take up time in the benchmark. + let should_match_message = format!("should_match for fetch: {:?}", fetch_args); + IterationInput { + fetch_args, + should_match_message, + } + } + + fn benchmarked_code(&self, store: &Self::GlobalInput, i_input: Self::IterationInput) { + let matches = store + .fetch_geonames( + i_input.fetch_args.query, + i_input.fetch_args.match_name_prefix, + i_input.fetch_args.geoname_type, + i_input.fetch_args.filter, + ) + .unwrap_or_else(|e| panic!("Error fetching geonames: {e}")); + + // Make sure matches were returned or not as expected. Otherwise the + // benchmark might not be testing what it's intended to test. + assert_eq!( + !matches.is_empty(), + self.should_match, + "{}", + i_input.should_match_message, + ); + } +} + +pub fn all_benchmarks() -> Vec<(&'static str, GeonameBenchmark)> { + static NY_STATE: std::sync::OnceLock = std::sync::OnceLock::new(); + let ny_state = NY_STATE.get_or_init(|| Geoname { + geoname_id: 8, + name: "New York".to_string(), + latitude: 43.00035, + longitude: -75.4999, + country_code: "US".to_string(), + admin1_code: "NY".to_string(), + population: 19274244, + }); + + vec![ + // no matches + ( + "geoname-fetch-no-match-1", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "nomatch", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: false, + } + ), + ( + "geoname-fetch-no-match-2", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "no match", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: false, + } + ), + ( + "geoname-fetch-no-match-3", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "no match either", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: false, + } + ), + ( + "geoname-fetch-no-match-long", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "this is a very long string that does not match anything in the geonames database but it sure is very long", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: false, + } + ), + + // no matches w/ prefix matching + ( + "geoname-fetch-no-match-1-prefix", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "nomatch", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: false, + } + ), + ( + "geoname-fetch-no-match-2-prefix", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "no match", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: false, + } + ), + ( + "geoname-fetch-no-match-3-prefix", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "no match either", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: false, + } + ), + ( + "geoname-fetch-no-match-long-prefix", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "this is a very long string that does not match anything in the geonames database but it sure is very long", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: false, + } + ), + + // abbreviations and airport codes + ( + "geoname-fetch-abbr-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-abbr-nyc", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "nyc", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-abbr-ca", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ca", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-airport-pdx", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "pdx", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-airport-roc", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "roc", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + + // abbreviations and airport codes w/ prefix matching + ( + "geoname-fetch-abbr-prefix-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-abbr-prefix-nyc", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "nyc", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-abbr-prefix-ca", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ca", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-airport-prefix-pdx", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "pdx", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-airport-prefix-roc", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "roc", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + + // full names + ( + "geoname-fetch-name-new-york", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "new york", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-name-rochester", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "rochester", + match_name_prefix: false, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + + // full names w/ prefix matching + ( + "geoname-fetch-name-prefix-new-york", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "new york", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-name-prefix-rochester", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "rochester", + match_name_prefix: true, + geoname_type: None, + filter: None, + }, + should_match: true, + } + ), + + // restricting to a geoname type + ( + "geoname-fetch-type-city-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: false, + geoname_type: Some(GeonameType::City), + filter: None, + }, + should_match: true, + } + ), + ( + "geoname-fetch-type-region-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: false, + geoname_type: Some(GeonameType::Region), + filter: None, + }, + should_match: true, + } + ), + + // filtering + ( + "geoname-fetch-filter-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: false, + geoname_type: None, + filter: Some(vec![&ny_state]), + }, + should_match: true, + } + ), + + // restricting to a geoname type + filtering + ( + "geoname-fetch-type-filter-city-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: false, + geoname_type: Some(GeonameType::City), + filter: Some(vec![&ny_state]), + }, + should_match: true, + } + ), + ( + "geoname-fetch-type-filter-region-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: false, + geoname_type: Some(GeonameType::Region), + filter: Some(vec![&ny_state]), + }, + should_match: true, + } + ), + + // restricting to a geoname type + filtering w/ prefix matching + ( + "geoname-fetch-type-filter-prefix-city-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: true, + geoname_type: Some(GeonameType::City), + filter: Some(vec![&ny_state]), + }, + should_match: true, + } + ), + ( + "geoname-fetch-type-filter-prefix-region-ny", + GeonameBenchmark { + args: FetchGeonamesArgs { + query: "ny", + match_name_prefix: true, + geoname_type: Some(GeonameType::Region), + filter: Some(vec![&ny_state]), + }, + should_match: true, + } + ), + ] +} diff --git a/components/suggest/src/benchmarks/mod.rs b/components/suggest/src/benchmarks/mod.rs index 1a7245e28..3285c35a9 100644 --- a/components/suggest/src/benchmarks/mod.rs +++ b/components/suggest/src/benchmarks/mod.rs @@ -22,6 +22,7 @@ use tempfile::TempDir; use crate::{SuggestIngestionConstraints, SuggestStore}; pub mod client; +pub mod geoname; pub mod ingest; pub mod query; diff --git a/components/suggest/src/benchmarks/query.rs b/components/suggest/src/benchmarks/query.rs index fc6b32d28..02f4d81f2 100644 --- a/components/suggest/src/benchmarks/query.rs +++ b/components/suggest/src/benchmarks/query.rs @@ -10,34 +10,49 @@ use crate::{ pub struct QueryBenchmark { provider: SuggestionProvider, query: &'static str, + should_match: bool, } -impl QueryBenchmark { - pub fn new(provider: SuggestionProvider, query: &'static str) -> Self { - Self { provider, query } - } +pub struct IterationInput { + query: SuggestionQuery, + should_match_message: String, } impl BenchmarkWithInput for QueryBenchmark { type GlobalInput = SuggestStore; - type IterationInput = SuggestionQuery; + type IterationInput = IterationInput; fn global_input(&self) -> Self::GlobalInput { new_store() } fn iteration_input(&self) -> Self::IterationInput { - SuggestionQuery { + let query = SuggestionQuery { providers: vec![self.provider], keyword: self.query.to_string(), ..SuggestionQuery::default() + }; + // Format the message now so it doesn't take up time in the benchmark. + let should_match_message = format!("should_match for query: {:?}", query); + IterationInput { + query, + should_match_message, } } - fn benchmarked_code(&self, store: &Self::GlobalInput, query: Self::IterationInput) { - store - .query(query) + fn benchmarked_code(&self, store: &Self::GlobalInput, i_input: Self::IterationInput) { + let suggestions = store + .query(i_input.query) .unwrap_or_else(|e| panic!("Error querying store: {e}")); + + // Make sure matches were returned or not as expected. Otherwise the + // benchmark might not be testing what it's intended to test. + assert_eq!( + !suggestions.is_empty(), + self.should_match, + "{}", + i_input.should_match_message, + ); } } @@ -50,27 +65,371 @@ pub fn all_benchmarks() -> Vec<(&'static str, QueryBenchmark)> { // Therefore, to test shorter prefixes we use 2-term queries. ( "query-fakespot-hand-s", - QueryBenchmark::new(SuggestionProvider::Fakespot, "hand s"), + QueryBenchmark { + provider: SuggestionProvider::Fakespot, + query: "hand s", + should_match: true, + } ), ( "query-fakespot-hand-sa", - QueryBenchmark::new(SuggestionProvider::Fakespot, "hand sa"), + QueryBenchmark { + provider: SuggestionProvider::Fakespot, + query: "hand sa", + should_match: true, + } ), ( "query-fakespot-hand-san", - QueryBenchmark::new(SuggestionProvider::Fakespot, "hand san"), + QueryBenchmark { + provider: SuggestionProvider::Fakespot, + query: "hand san", + should_match: true, + } ), ( "query-fakespot-sani", - QueryBenchmark::new(SuggestionProvider::Fakespot, "sani"), + QueryBenchmark { + provider: SuggestionProvider::Fakespot, + query: "sani", + should_match: true, + } ), ( "query-fakespot-sanit", - QueryBenchmark::new(SuggestionProvider::Fakespot, "sanit"), + QueryBenchmark { + provider: SuggestionProvider::Fakespot, + query: "sanit", + should_match: true, + } ), ( "query-fakespot-saniti", - QueryBenchmark::new(SuggestionProvider::Fakespot, "saniti"), + QueryBenchmark { + provider: SuggestionProvider::Fakespot, + query: "saniti", + should_match: false, + }, + ), + + // weather: no matches + ( + "query-weather-no-match-1", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "nomatch", + should_match: false, + }, + ), + ( + "query-weather-no-match-2", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "no match", + should_match: false, + }, + ), + ( + "query-weather-no-match-3", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "no match either", + should_match: false, + }, + ), + ( + "query-weather-no-match-long-1", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "city1 city2 state1 state2 keyword1 keyword2 keyword3", + should_match: false, + }, + ), + ( + "query-weather-no-match-long-2", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "this does not match anything especially not a weather suggestion but nevertheless it is a very long query which as previously mentioned doesn't match anything at all", + should_match: false, + }, + ), + ( + "query-weather-no-match-keyword-prefix", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "wea", + should_match: false, + }, + ), + ( + "query-weather-no-match-city-abbr", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "ny", + should_match: false, + }, + ), + ( + "query-weather-no-match-airport-code", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "pdx", + should_match: false, + }, + ), + ( + "query-weather-no-match-airport-code-region", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "pdx or", + should_match: false, + }, + ), + + // weather: keyword only + ( + "query-weather-keyword-only", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather", + should_match: true, + }, + ), + + // weather: city only + ( + "query-weather-city-only", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "new york", + should_match: true, + }, + ), + + // weather: city + region + ( + "query-weather-city-region-los-angeles-c", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "los angeles c", + should_match: true, + }, + ), + ( + "query-weather-city-region-los-angeles-ca", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "los angeles ca", + should_match: true, + }, + ), + ( + "query-weather-city-region-la-ca", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "la ca", + should_match: true, + }, + ), + ( + "query-weather-city-region-ny-ny", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "ny ny", + should_match: true, + }, + ), + + // weather: keyword + city + ( + "query-weather-keyword-city-n", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather n", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-ne", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather ne", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-new", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather new", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-new-york", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather new york", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-ny", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather ny", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-pdx", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather pdx", + should_match: true, + }, + ), + + // weather: keyword + city + region + ( + "query-weather-keyword-city-region-los-angeles-c", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather los angeles c", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-region-los-angeles-ca", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather los angeles ca", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-region-la-ca", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather la ca", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-region-ny-ny", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather ny ny", + should_match: true, + }, + ), + ( + "query-weather-keyword-city-region-pdx-or", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "weather pdx or", + should_match: true, + }, + ), + + // weather: city + keyword + ( + "query-weather-city-keyword-new-york-w", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "new york w", + should_match: true, + }, + ), + ( + "query-weather-city-keyword-new-york-we", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "new york we", + should_match: true, + }, + ), + ( + "query-weather-city-keyword-new-york-wea", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "new york wea", + should_match: true, + }, + ), + ( + "query-weather-city-keyword-new-york-weather", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "new york weather", + should_match: true, + }, + ), + ( + "query-weather-city-keyword-ny-w", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "ny w", + should_match: true, + }, + ), + ( + "query-weather-city-keyword-ny-weather", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "ny weather", + should_match: true, + }, + ), + + // weather: city + region + keyword + ( + "query-weather-city-region-keyword-los-angeles-w", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "los angeles ca w", + should_match: true, + }, + ), + ( + "query-weather-city-region-keyword-los-angeles-we", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "los angeles ca we", + should_match: true, + }, + ), + ( + "query-weather-city-region-keyword-los-angeles-wea", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "los angeles ca wea", + should_match: true, + }, + ), + ( + "query-weather-city-region-keyword-los-angeles-weather", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "los angeles ca weather", + should_match: true, + }, + ), + ( + "query-weather-city-region-keyword-la-ca-weather", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "la ca weather", + should_match: true, + }, + ), + ( + "query-weather-city-region-keyword-ny-ny-weather", + QueryBenchmark { + provider: SuggestionProvider::Weather, + query: "ny ny weather", + should_match: true, + }, ), ] } diff --git a/components/suggest/src/schema.rs b/components/suggest/src/schema.rs index de628f82d..8690ae152 100644 --- a/components/suggest/src/schema.rs +++ b/components/suggest/src/schema.rs @@ -23,7 +23,7 @@ use sql_support::{ /// `clear_database()` by adding their names to `conditional_tables`, unless /// they are cleared via a deletion trigger or there's some other good /// reason not to do so. -pub const VERSION: u32 = 30; +pub const VERSION: u32 = 31; /// The current Suggest database schema. pub const SQL: &str = " @@ -218,6 +218,7 @@ CREATE TABLE geonames_alternates( PRIMARY KEY (name, geoname_id), FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE ) WITHOUT ROWID; +CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id); CREATE TABLE geonames_metrics( record_id TEXT NOT NULL PRIMARY KEY, @@ -586,6 +587,16 @@ CREATE TABLE geonames_alternates( clear_database(tx)?; Ok(()) } + 30 => { + // Add the `geonames_alternates_geoname_id` index. + clear_database(tx)?; + tx.execute_batch( + " +CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id); + ", + )?; + Ok(()) + } _ => Err(open_database::Error::IncompatibleVersion(version)), } } diff --git a/components/suggest/src/store.rs b/components/suggest/src/store.rs index 262a3407e..a0f0b0ad5 100644 --- a/components/suggest/src/store.rs +++ b/components/suggest/src/store.rs @@ -30,6 +30,9 @@ use crate::{ QueryWithMetricsResult, Result, SuggestApiResult, Suggestion, SuggestionQuery, }; +#[cfg(feature = "benchmark_api")] +use crate::geoname::{Geoname, GeonameMatch, GeonameType}; + /// Builder for [SuggestStore] /// /// Using a builder is preferred to calling the constructor directly since it's harder to confuse @@ -274,6 +277,17 @@ impl SuggestStore { pub fn checkpoint(&self) { self.inner.checkpoint(); } + + pub fn fetch_geonames( + &self, + query: &str, + match_name_prefix: bool, + geoname_type: Option, + filter: Option>, + ) -> Result> { + self.inner + .fetch_geonames(query, match_name_prefix, geoname_type, filter) + } } /// Constraints limit which suggestions to ingest from Remote Settings. @@ -833,6 +847,18 @@ where conn.query_one("SELECT page_size * page_count FROM pragma_page_count(), pragma_page_size()") .unwrap() } + + pub fn fetch_geonames( + &self, + query: &str, + match_name_prefix: bool, + geoname_type: Option, + filter: Option>, + ) -> Result> { + self.dbs()? + .reader + .read(|dao| dao.fetch_geonames(query, match_name_prefix, geoname_type, filter)) + } } /// Holds a store's open connections to the Suggest database.