Improve matching logic for city abbreviations and airport codes
This commit is contained in:
Родитель
578e31c21d
Коммит
ea0654349e
|
@ -82,6 +82,33 @@ impl Hash for Geoname {
|
|||
}
|
||||
}
|
||||
|
||||
/// Value returned by `fetch_geonames()`.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct GeonameMatch {
|
||||
pub geoname: Geoname,
|
||||
pub match_type: GeonameMatchType,
|
||||
pub prefix: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum GeonameMatchType {
|
||||
/// For U.S. states, abbreviations are the usual two-letter codes ("CA").
|
||||
Abbreviation,
|
||||
AirportCode,
|
||||
/// This includes any names that aren't abbreviations or airport codes.
|
||||
Name,
|
||||
}
|
||||
|
||||
impl GeonameMatchType {
|
||||
pub fn is_abbreviation(&self) -> bool {
|
||||
matches!(self, GeonameMatchType::Abbreviation)
|
||||
}
|
||||
|
||||
pub fn is_name(&self) -> bool {
|
||||
matches!(self, GeonameMatchType::Name)
|
||||
}
|
||||
}
|
||||
|
||||
/// This data is used to service every query handled by the weather provider and
|
||||
/// potentially other providers, so we cache it from the DB.
|
||||
#[derive(Debug, Default)]
|
||||
|
@ -162,13 +189,11 @@ impl SuggestDao<'_> {
|
|||
/// Fetches geonames that have at least one name matching the `query`
|
||||
/// string.
|
||||
///
|
||||
/// `match_prefixes` determines whether prefix matching is performed. If
|
||||
/// `true`, returned geonames will have at least one name prefixed by
|
||||
/// `query`. If `false`, returned geonames will have at least one name equal
|
||||
/// to `query`.
|
||||
///
|
||||
/// `match_abbreviations` determines whether abbreviations and airport codes
|
||||
/// are matched.
|
||||
/// `match_name_prefix` determines whether prefix matching is performed on
|
||||
/// names that aren't abbreviations and airport codes. When `true`, names
|
||||
/// that start with `query` will match. When false, names that equal `query`
|
||||
/// will match. Prefix matching is never performed on abbreviations and
|
||||
/// airport codes because we don't currently have a use case for that.
|
||||
///
|
||||
/// `geoname_type` restricts returned geonames to the specified type. `None`
|
||||
/// restricts geonames to cities and regions. There's no way to return
|
||||
|
@ -182,14 +207,20 @@ impl SuggestDao<'_> {
|
|||
/// since city and region names are not unique. `filter` is disjunctive: If
|
||||
/// any item in `filter` matches a geoname, the geoname will be filtered in.
|
||||
/// If `filter` is empty, all geonames will be filtered out.
|
||||
///
|
||||
/// The returned matches will include all matching types for a geoname, one
|
||||
/// match per type per geoname. For example, if the query matches both a
|
||||
/// geoname's name and abbreviation, two matches for that geoname will be
|
||||
/// returned: one with a `match_type` of `GeonameMatchType::Name` and one
|
||||
/// with a `match_type` of `GeonameMatchType::Abbreviation`. `prefix` is set
|
||||
/// according to whether the query matched a prefix of the given type.
|
||||
pub fn fetch_geonames(
|
||||
&self,
|
||||
query: &str,
|
||||
match_prefixes: bool,
|
||||
match_abbreviations: bool,
|
||||
match_name_prefix: bool,
|
||||
geoname_type: Option<GeonameType>,
|
||||
filter: Option<Vec<&Geoname>>,
|
||||
) -> Result<Vec<Geoname>> {
|
||||
) -> Result<Vec<GeonameMatch>> {
|
||||
let city_pred = "(g.feature_class = 'P')";
|
||||
let region_pred = "(g.feature_class = 'A' AND g.feature_code = 'ADM1')";
|
||||
let type_pred = match geoname_type {
|
||||
|
@ -210,56 +241,67 @@ impl SuggestDao<'_> {
|
|||
g.feature_class,
|
||||
g.country_code,
|
||||
g.admin1_code,
|
||||
g.population
|
||||
g.population,
|
||||
a.name != :name AS prefix,
|
||||
(SELECT CASE
|
||||
-- abbreviation
|
||||
WHEN a.iso_language = 'abbr' THEN 1
|
||||
-- airport code
|
||||
WHEN a.iso_language IN ('iata', 'icao', 'faac') THEN 2
|
||||
-- name
|
||||
ELSE 3
|
||||
END
|
||||
) AS match_type
|
||||
FROM
|
||||
geonames g
|
||||
JOIN
|
||||
geonames_alternates a ON g.id = a.geoname_id
|
||||
WHERE
|
||||
{}
|
||||
AND g.id IN (
|
||||
SELECT DISTINCT
|
||||
geoname_id
|
||||
FROM
|
||||
geonames_alternates
|
||||
WHERE
|
||||
CASE :abbr
|
||||
WHEN TRUE THEN 1
|
||||
ELSE (
|
||||
iso_language ISNULL
|
||||
OR iso_language NOT IN ('iata', 'icao', 'faac', 'abbr')
|
||||
) END
|
||||
AND CASE :prefix
|
||||
WHEN TRUE THEN (name BETWEEN :name AND :name || X'FFFF')
|
||||
ELSE name = :name
|
||||
END
|
||||
)
|
||||
AND CASE :prefix
|
||||
WHEN FALSE THEN a.name = :name
|
||||
ELSE (a.name = :name OR (
|
||||
(a.name BETWEEN :name AND :name || X'FFFF')
|
||||
AND match_type = 3
|
||||
))
|
||||
END
|
||||
GROUP BY
|
||||
g.id, match_type
|
||||
ORDER BY
|
||||
g.feature_class = 'P' DESC, g.population DESC, g.id ASC
|
||||
g.feature_class = 'P' DESC, g.population DESC, g.id ASC, a.iso_language ASC
|
||||
"#,
|
||||
type_pred
|
||||
),
|
||||
named_params! {
|
||||
":name": query.to_lowercase(),
|
||||
":abbr": match_abbreviations,
|
||||
":prefix": match_prefixes,
|
||||
":prefix": match_name_prefix,
|
||||
},
|
||||
|row| -> Result<Option<Geoname>> {
|
||||
let geoname = Geoname {
|
||||
geoname_id: row.get("id")?,
|
||||
name: row.get("name")?,
|
||||
latitude: row.get("latitude")?,
|
||||
longitude: row.get("longitude")?,
|
||||
country_code: row.get("country_code")?,
|
||||
admin1_code: row.get("admin1_code")?,
|
||||
population: row.get("population")?,
|
||||
|row| -> Result<Option<GeonameMatch>> {
|
||||
let g_match = GeonameMatch {
|
||||
geoname: Geoname {
|
||||
geoname_id: row.get("id")?,
|
||||
name: row.get("name")?,
|
||||
latitude: row.get("latitude")?,
|
||||
longitude: row.get("longitude")?,
|
||||
country_code: row.get("country_code")?,
|
||||
admin1_code: row.get("admin1_code")?,
|
||||
population: row.get("population")?,
|
||||
},
|
||||
prefix: row.get("prefix")?,
|
||||
match_type: match row.get::<_, i32>("match_type")? {
|
||||
1 => GeonameMatchType::Abbreviation,
|
||||
2 => GeonameMatchType::AirportCode,
|
||||
_ => GeonameMatchType::Name,
|
||||
},
|
||||
};
|
||||
if let Some(geonames) = &filter {
|
||||
geonames
|
||||
.iter()
|
||||
.find(|g| g.has_same_region(&geoname))
|
||||
.map(|_| Ok(Some(geoname)))
|
||||
.find(|g| g.has_same_region(&g_match.geoname))
|
||||
.map(|_| Ok(Some(g_match)))
|
||||
.unwrap_or(Ok(None))
|
||||
} else {
|
||||
Ok(Some(geoname))
|
||||
Ok(Some(g_match))
|
||||
}
|
||||
},
|
||||
)?
|
||||
|
@ -586,6 +628,39 @@ pub(crate) mod tests {
|
|||
{ "name": "ny", "iso_language": "abbr" },
|
||||
],
|
||||
},
|
||||
// Waco, TX: Has a surprising IATA airport code that's a
|
||||
// common English word and not a prefix of the city name
|
||||
{
|
||||
"id": 9,
|
||||
"name": "Waco",
|
||||
"latitude": "31.54933",
|
||||
"longitude": "-97.14667",
|
||||
"feature_class": "P",
|
||||
"feature_code": "PPLA2",
|
||||
"country_code": "US",
|
||||
"admin1_code": "TX",
|
||||
"population": 132356,
|
||||
"alternate_names_2": [
|
||||
{ "name": "waco" },
|
||||
{ "name": "act", "iso_language": "iata" },
|
||||
],
|
||||
},
|
||||
// TX
|
||||
{
|
||||
"id": 10,
|
||||
"name": "Texas",
|
||||
"latitude": "31.25044",
|
||||
"longitude": "-99.25061",
|
||||
"feature_class": "A",
|
||||
"feature_code": "ADM1",
|
||||
"country_code": "US",
|
||||
"admin1_code": "TX",
|
||||
"population": 22875689,
|
||||
"alternate_names_2": [
|
||||
{ "name": "texas" },
|
||||
{ "name": "tx", "iso_language": "abbr" },
|
||||
],
|
||||
},
|
||||
// Made-up city with a long name
|
||||
{
|
||||
"id": 999,
|
||||
|
@ -655,6 +730,18 @@ pub(crate) mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn waco() -> Geoname {
|
||||
Geoname {
|
||||
geoname_id: 9,
|
||||
name: "Waco".to_string(),
|
||||
latitude: 31.54933,
|
||||
longitude: -97.14667,
|
||||
country_code: "US".to_string(),
|
||||
admin1_code: "TX".to_string(),
|
||||
population: 132356,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn long_name_city() -> Geoname {
|
||||
Geoname {
|
||||
geoname_id: 999,
|
||||
|
@ -715,404 +802,455 @@ pub(crate) mod tests {
|
|||
..SuggestIngestionConstraints::all_providers()
|
||||
});
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Test {
|
||||
query: &'static str,
|
||||
match_prefixes: bool,
|
||||
match_abbreviations: bool,
|
||||
match_name_prefix: bool,
|
||||
geoname_type: Option<GeonameType>,
|
||||
filter: Option<Vec<Geoname>>,
|
||||
expected: Vec<Geoname>,
|
||||
expected: Vec<GeonameMatch>,
|
||||
}
|
||||
|
||||
let tests = [
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![ia()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: ia(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![ia()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: ia(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: Some(vec![waterloo_ia(), waterloo_al()]),
|
||||
expected: vec![ia()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: ia(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: Some(vec![waterloo_ia()]),
|
||||
expected: vec![ia()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: ia(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: Some(vec![waterloo_al()]),
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: Some(GeonameType::City),
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "ia",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: Some(GeonameType::Region),
|
||||
filter: None,
|
||||
expected: vec![ia()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: ia(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "iowa",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: false,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![ia()],
|
||||
},
|
||||
Test {
|
||||
query: "iowa",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![ia()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: ia(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "al",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![al()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: al(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
// "al" is both a name prefix and an abbreviation.
|
||||
Test {
|
||||
query: "al",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![al()],
|
||||
},
|
||||
Test {
|
||||
query: "al",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
// "al" is both an abbreviation and a prefix, so disabling
|
||||
// abbreviations but enabling prefixes should match it.
|
||||
Test {
|
||||
query: "al",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![al()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: al(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: true,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: al(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "waterloo",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: Some(vec![ia()]),
|
||||
expected: vec![waterloo_ia()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: waterloo_ia(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "waterloo",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: Some(vec![al()]),
|
||||
expected: vec![waterloo_al()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: waterloo_al(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "waterloo",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: Some(vec![ny_state()]),
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "waterloo",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
// Waterloo, IA should be first since it has a larger
|
||||
// population.
|
||||
expected: vec![waterloo_ia(), waterloo_al()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: waterloo_ia(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: waterloo_al(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "water",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![waterloo_ia(), waterloo_al()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: waterloo_ia(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: true,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: waterloo_al(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "water",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "ny",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
// NYC should be first since cities are ordered before regions.
|
||||
expected: vec![nyc(), ny_state()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: nyc(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: ny_state(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "ny",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![nyc(), ny_state()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: nyc(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: ny_state(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "ny",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "ny",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "ny",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: Some(vec![nyc()]),
|
||||
expected: vec![nyc(), ny_state()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: nyc(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: ny_state(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "ny",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: Some(vec![ny_state()]),
|
||||
expected: vec![nyc(), ny_state()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: nyc(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: ny_state(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "ny",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: Some(GeonameType::City),
|
||||
filter: None,
|
||||
expected: vec![nyc()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: nyc(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "ny",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: Some(GeonameType::Region),
|
||||
filter: None,
|
||||
expected: vec![ny_state()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: ny_state(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: "NeW YoRk",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![nyc(), ny_state()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: nyc(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: ny_state(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "NY",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![nyc(), ny_state()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: nyc(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: ny_state(),
|
||||
match_type: GeonameMatchType::Abbreviation,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "new",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "new",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![nyc(), ny_state()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: nyc(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: true,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: ny_state(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "new york foo",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "new york foo",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "new foo",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "foo new york",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "foo new york",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "foo new",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "roc",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![rochester()],
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: rochester(),
|
||||
match_type: GeonameMatchType::AirportCode,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
// "roc" is both a name prefix and an airport code.
|
||||
Test {
|
||||
query: "roc",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: false,
|
||||
match_name_prefix: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![],
|
||||
},
|
||||
Test {
|
||||
query: "roc",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: true,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![rochester()],
|
||||
},
|
||||
// "roc" is both an airport code and a prefix, so disabling
|
||||
// abbreviations but enabling prefixes should match it.
|
||||
Test {
|
||||
query: "roc",
|
||||
match_prefixes: true,
|
||||
match_abbreviations: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![rochester()],
|
||||
expected: vec![
|
||||
GeonameMatch {
|
||||
geoname: rochester(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: true,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: rochester(),
|
||||
match_type: GeonameMatchType::AirportCode,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
Test {
|
||||
query: "long name",
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![Geoname {
|
||||
geoname_id: 999,
|
||||
name: "Long Name".to_string(),
|
||||
latitude: 38.06084,
|
||||
longitude: -97.92977,
|
||||
country_code: "US".to_string(),
|
||||
admin1_code: "NY".to_string(),
|
||||
population: 2,
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: long_name_city(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
Test {
|
||||
query: LONG_NAME,
|
||||
match_prefixes: false,
|
||||
match_abbreviations: true,
|
||||
match_name_prefix: false,
|
||||
geoname_type: None,
|
||||
filter: None,
|
||||
expected: vec![Geoname {
|
||||
geoname_id: 999,
|
||||
name: "Long Name".to_string(),
|
||||
latitude: 38.06084,
|
||||
longitude: -97.92977,
|
||||
country_code: "US".to_string(),
|
||||
admin1_code: "NY".to_string(),
|
||||
population: 2,
|
||||
expected: vec![GeonameMatch {
|
||||
geoname: long_name_city(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
}],
|
||||
},
|
||||
];
|
||||
|
||||
store.read(|dao| {
|
||||
for t in tests {
|
||||
let gs = t.filter.unwrap_or_default();
|
||||
let gs = t.filter.clone().unwrap_or_default();
|
||||
let gs_refs: Vec<_> = gs.iter().collect();
|
||||
let filters = if gs_refs.is_empty() {
|
||||
None
|
||||
|
@ -1122,12 +1260,13 @@ pub(crate) mod tests {
|
|||
assert_eq!(
|
||||
dao.fetch_geonames(
|
||||
t.query,
|
||||
t.match_prefixes,
|
||||
t.match_abbreviations,
|
||||
t.geoname_type,
|
||||
t.match_name_prefix,
|
||||
t.geoname_type.clone(),
|
||||
filters
|
||||
)?,
|
||||
t.expected
|
||||
t.expected,
|
||||
"Test: {:?}",
|
||||
t
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
|
@ -1230,8 +1369,19 @@ pub(crate) mod tests {
|
|||
// Make sure we have a match.
|
||||
store.read(|dao| {
|
||||
assert_eq!(
|
||||
dao.fetch_geonames("waterloo", false, true, None, None)?,
|
||||
vec![waterloo_ia(), waterloo_al()],
|
||||
dao.fetch_geonames("waterloo", false, None, None)?,
|
||||
vec![
|
||||
GeonameMatch {
|
||||
geoname: waterloo_ia(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
},
|
||||
GeonameMatch {
|
||||
geoname: waterloo_al(),
|
||||
match_type: GeonameMatchType::Name,
|
||||
prefix: false,
|
||||
},
|
||||
],
|
||||
);
|
||||
Ok(())
|
||||
})?;
|
||||
|
@ -1248,10 +1398,7 @@ pub(crate) mod tests {
|
|||
// The same query shouldn't match anymore and the tables should be
|
||||
// empty.
|
||||
store.read(|dao| {
|
||||
assert_eq!(
|
||||
dao.fetch_geonames("waterloo", false, true, None, None)?,
|
||||
vec![],
|
||||
);
|
||||
assert_eq!(dao.fetch_geonames("waterloo", false, None, None)?, vec![],);
|
||||
|
||||
let g_ids = dao.conn.query_rows_and_then(
|
||||
"SELECT id FROM geonames",
|
||||
|
|
|
@ -83,7 +83,7 @@ pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
|
|||
///
|
||||
/// ```
|
||||
/// # use suggest::util::filter_map_chunks;
|
||||
/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, _| {
|
||||
/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _| {
|
||||
/// Ok(Some(vec![chunk.to_uppercase()]))
|
||||
/// });
|
||||
/// assert_eq!(paths.unwrap(), vec![
|
||||
|
@ -99,7 +99,7 @@ pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
|
|||
///
|
||||
/// ```
|
||||
/// # use suggest::util::filter_map_chunks;
|
||||
/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, chunk_index, _, _| {
|
||||
/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, chunk_index, _| {
|
||||
/// if chunk_index > 0 || chunk == "a" {
|
||||
/// Ok(Some(vec![chunk.to_uppercase()]))
|
||||
/// } else {
|
||||
|
@ -117,7 +117,7 @@ pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
|
|||
///
|
||||
/// ```
|
||||
/// # use suggest::util::filter_map_chunks;
|
||||
/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, path| {
|
||||
/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, path| {
|
||||
/// if path.iter().any(|value| value == "A B") {
|
||||
/// Ok(None)
|
||||
/// } else {
|
||||
|
@ -135,7 +135,7 @@ pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
|
|||
///
|
||||
/// ```
|
||||
/// # use suggest::util::filter_map_chunks;
|
||||
/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, _| {
|
||||
/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _| {
|
||||
/// Ok(Some(vec![format!("{chunk}0"), format!("{chunk}1")]))
|
||||
/// });
|
||||
/// assert_eq!(paths.unwrap(), vec![
|
||||
|
@ -162,7 +162,7 @@ pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
|
|||
pub fn filter_map_chunks<T: Clone>(
|
||||
words: &[&str],
|
||||
max_chunk_size: usize,
|
||||
f: impl Fn(&str, usize, usize, &[T]) -> Result<Option<Vec<T>>>,
|
||||
f: impl Fn(&str, usize, &[T]) -> Result<Option<Vec<T>>>,
|
||||
) -> Result<Vec<Vec<T>>> {
|
||||
let normalized_query = words.join(" ");
|
||||
filter_map_chunks_recurse(words, &normalized_query, &mut vec![], 0, max_chunk_size, &f)
|
||||
|
@ -182,7 +182,7 @@ fn filter_map_chunks_recurse<T: Clone>(
|
|||
path: &mut Vec<T>,
|
||||
chunk_index: usize,
|
||||
max_chunk_size: usize,
|
||||
f: &impl Fn(&str, usize, usize, &[T]) -> Result<Option<Vec<T>>>,
|
||||
f: &impl Fn(&str, usize, &[T]) -> Result<Option<Vec<T>>>,
|
||||
) -> Result<Vec<Vec<T>>> {
|
||||
// Filtered-in (non-pruned) paths that will be returned from this step of
|
||||
// the traversal.
|
||||
|
@ -206,7 +206,7 @@ fn filter_map_chunks_recurse<T: Clone>(
|
|||
let chunk = &remaining_query[..chunk_char_len];
|
||||
|
||||
// Call the mapper function.
|
||||
if let Some(mapped_values) = f(chunk, chunk_index, chunk_size, &path[..])? {
|
||||
if let Some(mapped_values) = f(chunk, chunk_index, &path[..])? {
|
||||
for value in mapped_values {
|
||||
if chunk_size == remaining_words.len() {
|
||||
// This is the final chunk in the path. Stop recursing.
|
||||
|
@ -302,7 +302,7 @@ mod tests {
|
|||
fn fmc<T: Clone>(
|
||||
query: &str,
|
||||
max_chunk_size: usize,
|
||||
f: impl Fn(&str, usize, usize, &[T]) -> Result<Option<Vec<T>>>,
|
||||
f: impl Fn(&str, usize, &[T]) -> Result<Option<Vec<T>>>,
|
||||
) -> Result<Vec<Vec<T>>> {
|
||||
let words: Vec<_> = query.split_whitespace().collect();
|
||||
filter_map_chunks(&words, max_chunk_size, f)
|
||||
|
@ -323,7 +323,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_1() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _| {
|
||||
Ok(Some(vec![(chunk.to_string(), chunk_index)]))
|
||||
})?;
|
||||
check_paths(
|
||||
|
@ -335,7 +335,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| {
|
||||
Ok(Some(vec![(chunk.to_string(), chunk_index)]))
|
||||
})?;
|
||||
check_paths(
|
||||
|
@ -356,7 +356,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| {
|
||||
Ok(Some(vec![(chunk.to_string(), chunk_index)]))
|
||||
})?;
|
||||
check_paths(
|
||||
|
@ -382,7 +382,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| {
|
||||
Ok(Some(vec![(chunk.to_string(), chunk_index)]))
|
||||
})?;
|
||||
check_paths(
|
||||
|
@ -410,7 +410,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_5() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 5, |chunk, chunk_index, _, _| {
|
||||
let paths = fmc("a b c d e", 5, |chunk, chunk_index, _| {
|
||||
Ok(Some(vec![(chunk.to_string(), chunk_index)]))
|
||||
})?;
|
||||
check_paths(
|
||||
|
@ -439,7 +439,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_1_map_many() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c", 1, |chunk, _, _, _| {
|
||||
let paths = fmc("a b c", 1, |chunk, _, _| {
|
||||
Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
|
||||
})?;
|
||||
assert_eq!(
|
||||
|
@ -479,7 +479,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_map_many() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c", 2, |chunk, _, _, _| {
|
||||
let paths = fmc("a b c", 2, |chunk, _, _| {
|
||||
Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
|
||||
})?;
|
||||
assert_eq!(
|
||||
|
@ -535,35 +535,9 @@ mod tests {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_chunk_size() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, _, chunk_size, _| {
|
||||
Ok(Some(vec![(chunk.to_string(), chunk_size)]))
|
||||
})?;
|
||||
check_paths(
|
||||
paths,
|
||||
vec![
|
||||
vec![("a", 1), ("b", 1), ("c", 1), ("d", 1), ("e", 1)],
|
||||
vec![("a", 1), ("b", 1), ("c", 1), ("d e", 2)],
|
||||
vec![("a", 1), ("b", 1), ("c d", 2), ("e", 1)],
|
||||
vec![("a", 1), ("b", 1), ("c d e", 3)],
|
||||
vec![("a", 1), ("b c", 2), ("d", 1), ("e", 1)],
|
||||
vec![("a", 1), ("b c", 2), ("d e", 2)],
|
||||
vec![("a", 1), ("b c d", 3), ("e", 1)],
|
||||
vec![("a b", 2), ("c", 1), ("d", 1), ("e", 1)],
|
||||
vec![("a b", 2), ("c", 1), ("d e", 2)],
|
||||
vec![("a b", 2), ("c d", 2), ("e", 1)],
|
||||
vec![("a b", 2), ("c d e", 3)],
|
||||
vec![("a b c", 3), ("d", 1), ("e", 1)],
|
||||
vec![("a b c", 3), ("d e", 2)],
|
||||
],
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_map_chunks_1_prune_a() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _| match chunk {
|
||||
"a" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -573,7 +547,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_1_prune_b() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _| match chunk {
|
||||
"b" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -583,7 +557,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_1_prune_c() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _| match chunk {
|
||||
"c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -593,7 +567,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_1_prune_d() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _| match chunk {
|
||||
"d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -603,7 +577,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_1_prune_e() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 1, |chunk, chunk_index, _| match chunk {
|
||||
"e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -613,7 +587,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_a() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"a" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -630,7 +604,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_b() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"b" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -649,7 +623,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_c() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -667,7 +641,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_d() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -686,7 +660,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_e() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -703,7 +677,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_ab() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"a b" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -722,7 +696,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_bc() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"b c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -742,7 +716,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_cd() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"c d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -762,7 +736,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_de() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -781,7 +755,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_a_bc() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"a" | "b c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -798,7 +772,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_a_cd() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"a" | "c d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -814,7 +788,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_bc_cd() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"b c" | "c d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -832,7 +806,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_2_prune_bc_de() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 2, |chunk, chunk_index, _| match chunk {
|
||||
"b c" | "d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -850,7 +824,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_a() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"a" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -870,7 +844,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_b() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"b" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -893,7 +867,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_c() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -916,7 +890,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_d() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -939,7 +913,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_e() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -959,7 +933,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_ab() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"a b" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -982,7 +956,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_bc() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"b c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1007,7 +981,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_cd() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"c d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1032,7 +1006,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_de() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1055,7 +1029,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_abc() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"a b c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1080,7 +1054,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_bcd() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"b c d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1106,7 +1080,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_cde() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"c d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1131,7 +1105,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_3_prune_a_bc_cde() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 3, |chunk, chunk_index, _| match chunk {
|
||||
"a" | "b c" | "c d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1150,7 +1124,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_a() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"a" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1171,7 +1145,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_b() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"b" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1196,7 +1170,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_c() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1221,7 +1195,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_d() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1246,7 +1220,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_e() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1267,7 +1241,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_ab() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"a b" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1292,7 +1266,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_bc() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"b c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1319,7 +1293,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_cd() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"c d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1346,7 +1320,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_de() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1371,7 +1345,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_abc() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"a b c" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1398,7 +1372,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_bcd() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"b c d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1426,7 +1400,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_cde() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"c d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1453,7 +1427,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_abcd() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"a b c d" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1481,7 +1455,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_bcde() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"b c d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1509,7 +1483,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_a_bc_de() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"a" | "b c" | "d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1528,7 +1502,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_4_prune_a_bc_cde() -> anyhow::Result<()> {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
|
||||
let paths = fmc("a b c d e", 4, |chunk, chunk_index, _| match chunk {
|
||||
"a" | "b c" | "c d e" => Ok(None),
|
||||
_ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
|
||||
})?;
|
||||
|
@ -1548,7 +1522,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn filter_map_chunks_spaces() -> anyhow::Result<()> {
|
||||
let paths = fmc(" a b c d e ", 2, |chunk, chunk_index, _, _| {
|
||||
let paths = fmc(" a b c d e ", 2, |chunk, chunk_index, _| {
|
||||
Ok(Some(vec![(chunk.to_string(), chunk_index)]))
|
||||
})?;
|
||||
check_paths(
|
||||
|
|
|
@ -15,7 +15,7 @@ use crate::{
|
|||
KeywordInsertStatement, KeywordMetricsInsertStatement, SuggestDao,
|
||||
SuggestionInsertStatement, DEFAULT_SUGGESTION_SCORE,
|
||||
},
|
||||
geoname::{Geoname, GeonameType},
|
||||
geoname::{GeonameMatch, GeonameType},
|
||||
metrics::MetricsContext,
|
||||
provider::SuggestionProvider,
|
||||
rs::{Client, Record, SuggestRecordId},
|
||||
|
@ -101,7 +101,8 @@ impl SuggestDao<'_> {
|
|||
.collect();
|
||||
|
||||
let mut matches =
|
||||
filter_map_chunks::<Token>(&words, max_chunk_size, |chunk, c_i, c_size, path| {
|
||||
// Step 2: Parse the query words into a list of token paths.
|
||||
filter_map_chunks::<Token>(&words, max_chunk_size, |chunk, chunk_i, path| {
|
||||
// Match the chunk to token types that haven't already been matched
|
||||
// in this path. `all_tokens` will remain `None` until a token is
|
||||
// matched.
|
||||
|
@ -112,21 +113,7 @@ impl SuggestDao<'_> {
|
|||
TokenType::WeatherKeyword,
|
||||
] {
|
||||
if !path.iter().any(|t| t.token_type() == tt) {
|
||||
let is_first_chunk = c_i == 0;
|
||||
let is_last_chunk = c_i + c_size == words.len();
|
||||
// Match prefixes if the chunk isn't the first term in
|
||||
// the query.
|
||||
let match_prefixes = !is_first_chunk;
|
||||
// Match abbreviations if the chunk isn't the only term
|
||||
// in the query.
|
||||
let match_abbreviations = !is_first_chunk || !is_last_chunk;
|
||||
let mut tokens = self.match_weather_tokens(
|
||||
tt,
|
||||
path,
|
||||
chunk,
|
||||
match_prefixes,
|
||||
match_abbreviations,
|
||||
)?;
|
||||
let mut tokens = self.match_weather_tokens(tt, path, chunk, chunk_i == 0)?;
|
||||
if !tokens.is_empty() {
|
||||
let mut ts = all_tokens.take().unwrap_or_default();
|
||||
ts.append(&mut tokens);
|
||||
|
@ -138,8 +125,8 @@ impl SuggestDao<'_> {
|
|||
Ok(all_tokens)
|
||||
})?
|
||||
.into_iter()
|
||||
// Map each token path to a tuple that represents a matched city,
|
||||
// region, and keyword (each optional). Since paths are vecs,
|
||||
// Step 3: Map each token path to a tuple that represents a matched
|
||||
// city, region, and keyword (each optional). Since paths are vecs,
|
||||
// they're ordered, so we may end up with duplicate tuples after
|
||||
// this step. e.g., the paths `[<Waterloo IA>, <IA>]` and `[<IA>,
|
||||
// <Waterloo IA>]` map to the same match.
|
||||
|
@ -160,17 +147,34 @@ impl SuggestDao<'_> {
|
|||
match_tuple
|
||||
})
|
||||
})
|
||||
// Dedupe the matches by collecting them into a set.
|
||||
// Step 4: Discard matches that don't have the right combination of
|
||||
// tokens or that are otherwise invalid. Along with step 1, this is
|
||||
// the core of the matching logic. In general, allow a match if it
|
||||
// has (a) a city name typed in full or (b) a weather keyword at
|
||||
// least as long as the config's min keyword length, since that
|
||||
// indicates a weather intent.
|
||||
.filter(|(city_match, region_match, kw_match)| {
|
||||
match (city_match, region_match, kw_match) {
|
||||
(None, None, Some(_)) => true,
|
||||
(None, _, None) | (None, Some(_), Some(_)) => false,
|
||||
(Some(city), region, kw) => {
|
||||
(city.match_type.is_name() && !city.prefix)
|
||||
// Allow city abbreviations without a weather
|
||||
// keyword but only if the region was typed in full.
|
||||
|| (city.match_type.is_abbreviation()
|
||||
&& !city.prefix
|
||||
&& region.as_ref().map(|r| !r.prefix).unwrap_or(false))
|
||||
|| kw.as_ref().map(|k| k.is_min_keyword_length).unwrap_or(false)
|
||||
}
|
||||
}
|
||||
})
|
||||
// Step 5: Map the match objects to their underlying values.
|
||||
.map(|(city, region, kw)| {
|
||||
(city.map(|c| c.geoname), region.map(|r| r.geoname), kw.map(|k| k.keyword))
|
||||
})
|
||||
// Step 6: Dedupe the values by collecting them into a set.
|
||||
.collect::<HashSet<_>>()
|
||||
.into_iter()
|
||||
// Filter out matches that don't have the right combination of
|
||||
// tokens.
|
||||
.filter(|(city, region, kw)| {
|
||||
!matches!(
|
||||
(city, region, kw),
|
||||
(None, _, None) | (None, Some(_), Some(_))
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Sort the matches so cities with larger populations are first.
|
||||
|
@ -207,19 +211,20 @@ impl SuggestDao<'_> {
|
|||
token_type: TokenType,
|
||||
path: &[Token],
|
||||
candidate: &str,
|
||||
match_prefixes: bool,
|
||||
match_abbreviations: bool,
|
||||
is_first_chunk: bool,
|
||||
) -> Result<Vec<Token>> {
|
||||
match token_type {
|
||||
TokenType::City => {
|
||||
// Fetch matching cities, and filter them to regions we've
|
||||
// already matched in this path.
|
||||
let regions: Vec<_> = path.iter().filter_map(|t| t.region()).collect();
|
||||
let regions: Vec<_> = path
|
||||
.iter()
|
||||
.filter_map(|t| t.region().map(|m| &m.geoname))
|
||||
.collect();
|
||||
Ok(self
|
||||
.fetch_geonames(
|
||||
candidate,
|
||||
match_prefixes,
|
||||
match_abbreviations,
|
||||
!is_first_chunk,
|
||||
Some(GeonameType::City),
|
||||
if regions.is_empty() {
|
||||
None
|
||||
|
@ -234,12 +239,14 @@ impl SuggestDao<'_> {
|
|||
TokenType::Region => {
|
||||
// Fetch matching regions, and filter them to cities we've
|
||||
// already matched in this patch.
|
||||
let cities: Vec<_> = path.iter().filter_map(|t| t.city()).collect();
|
||||
let cities: Vec<_> = path
|
||||
.iter()
|
||||
.filter_map(|t| t.city().map(|m| &m.geoname))
|
||||
.collect();
|
||||
Ok(self
|
||||
.fetch_geonames(
|
||||
candidate,
|
||||
match_prefixes,
|
||||
match_abbreviations,
|
||||
!is_first_chunk,
|
||||
Some(GeonameType::Region),
|
||||
if cities.is_empty() {
|
||||
None
|
||||
|
@ -255,11 +262,10 @@ impl SuggestDao<'_> {
|
|||
// Fetch matching keywords. `min_keyword_length == 0` in the
|
||||
// config means that the config doesn't allow prefix matching.
|
||||
// `min_keyword_length > 0` means that the keyword must be at
|
||||
// least that long when it's the first term in the query. We
|
||||
// assume that `match_prefixes == false` means the candidate is
|
||||
// the first term in the query.
|
||||
// least that long when there's not already a city name present
|
||||
// in the query.
|
||||
let len = self.weather_cache().min_keyword_length;
|
||||
if !match_prefixes && (candidate.len() as i32) < len {
|
||||
if is_first_chunk && (candidate.len() as i32) < len {
|
||||
// The candidate is the first term in the query and it's too
|
||||
// short.
|
||||
Ok(vec![])
|
||||
|
@ -268,9 +274,14 @@ impl SuggestDao<'_> {
|
|||
// first term in the query or if the config allows prefix
|
||||
// matching.
|
||||
Ok(self
|
||||
.match_weather_keywords(candidate, match_prefixes || len > 0)?
|
||||
.match_weather_keywords(candidate, !is_first_chunk || len > 0)?
|
||||
.into_iter()
|
||||
.map(Token::WeatherKeyword)
|
||||
.map(|keyword| {
|
||||
Token::WeatherKeyword(WeatherKeywordMatch {
|
||||
keyword,
|
||||
is_min_keyword_length: (len as usize) <= candidate.len(),
|
||||
})
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
@ -282,7 +293,8 @@ impl SuggestDao<'_> {
|
|||
r#"
|
||||
SELECT
|
||||
k.keyword,
|
||||
s.score
|
||||
s.score,
|
||||
k.keyword != :keyword AS matched_prefix
|
||||
FROM
|
||||
suggestions s
|
||||
JOIN
|
||||
|
@ -420,20 +432,20 @@ enum TokenType {
|
|||
|
||||
#[derive(Clone, Debug)]
|
||||
enum Token {
|
||||
City(Geoname),
|
||||
Region(Geoname),
|
||||
WeatherKeyword(String),
|
||||
City(GeonameMatch),
|
||||
Region(GeonameMatch),
|
||||
WeatherKeyword(WeatherKeywordMatch),
|
||||
}
|
||||
|
||||
impl Token {
|
||||
fn city(&self) -> Option<&Geoname> {
|
||||
fn city(&self) -> Option<&GeonameMatch> {
|
||||
match self {
|
||||
Self::City(g) => Some(g),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn region(&self) -> Option<&Geoname> {
|
||||
fn region(&self) -> Option<&GeonameMatch> {
|
||||
match self {
|
||||
Self::Region(g) => Some(g),
|
||||
_ => None,
|
||||
|
@ -449,10 +461,18 @@ impl Token {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
|
||||
struct WeatherKeywordMatch {
|
||||
keyword: String,
|
||||
is_min_keyword_length: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{geoname, store::tests::TestStore, testing::*, SuggestIngestionConstraints};
|
||||
use crate::{
|
||||
geoname, geoname::Geoname, store::tests::TestStore, testing::*, SuggestIngestionConstraints,
|
||||
};
|
||||
|
||||
impl From<Geoname> for Suggestion {
|
||||
fn from(g: Geoname) -> Self {
|
||||
|
@ -627,7 +647,7 @@ mod tests {
|
|||
"weather-1",
|
||||
json!({
|
||||
"keywords": ["ab", "xyz", "weather"],
|
||||
"min_keyword_length": 3,
|
||||
"min_keyword_length": 5,
|
||||
"max_keyword_length": "weather".len(),
|
||||
"max_keyword_word_count": 1,
|
||||
"score": 0.24
|
||||
|
@ -640,6 +660,244 @@ mod tests {
|
|||
});
|
||||
|
||||
let tests: &[(&str, Vec<Suggestion>)] = &[
|
||||
(
|
||||
"act",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act w",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act we",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act wea",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act weat",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
// `min_keyword_length` = 5, so there should be a match.
|
||||
"act weath",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"act weathe",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"act weather",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"weather a",
|
||||
// The made-up long-name city starts with A.
|
||||
vec![geoname::tests::long_name_city().into()],
|
||||
),
|
||||
(
|
||||
"weather ac",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"weather act",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"act t",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act tx",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act tx w",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act tx we",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act tx wea",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act tx weat",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
// `min_keyword_length` = 5, so there should be a match.
|
||||
"act tx weath",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"act tx weathe",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"act tx weather",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"tx a",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"tx ac",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"tx act",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"tx act w",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"tx act we",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"tx act wea",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"tx act weat",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
// `min_keyword_length` = 5, so there should be a match.
|
||||
"tx act weath",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"tx act weathe",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"tx act weather",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"act te",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act tex",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act texa",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act texas",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act texas w",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act texas we",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act texas wea",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"act texas weat",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
// `min_keyword_length` = 5, so there should be a match.
|
||||
"act texas weath",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"act texas weathe",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"act texas weather",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"texas a",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"texas ac",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"texas act",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"texas act w",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"texas act we",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"texas act wea",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"texas act weat",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
// `min_keyword_length` = 5, so there should be a match.
|
||||
"texas act weath",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"texas act weathe",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"texas act weather",
|
||||
vec![geoname::tests::waco().into()],
|
||||
),
|
||||
(
|
||||
"ia w",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ia wa",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ia wat",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ia wate",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ia water",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ia waterl",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ia waterlo",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"waterloo",
|
||||
vec![
|
||||
|
@ -649,10 +907,26 @@ mod tests {
|
|||
geoname::tests::waterloo_al().into(),
|
||||
],
|
||||
),
|
||||
(
|
||||
"waterloo i",
|
||||
vec![geoname::tests::waterloo_ia().into()],
|
||||
),
|
||||
(
|
||||
"waterloo ia",
|
||||
vec![geoname::tests::waterloo_ia().into()],
|
||||
),
|
||||
(
|
||||
"waterloo io",
|
||||
vec![geoname::tests::waterloo_ia().into()],
|
||||
),
|
||||
(
|
||||
"waterloo iow",
|
||||
vec![geoname::tests::waterloo_ia().into()],
|
||||
),
|
||||
(
|
||||
"waterloo iowa",
|
||||
vec![geoname::tests::waterloo_ia().into()],
|
||||
),
|
||||
(
|
||||
"ia waterloo",
|
||||
vec![geoname::tests::waterloo_ia().into()],
|
||||
|
@ -696,6 +970,34 @@ mod tests {
|
|||
vec![geoname::tests::nyc().into()],
|
||||
),
|
||||
("ny ny ny", vec![]),
|
||||
(
|
||||
"ny n",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny ne",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny new",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny new ",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny new y",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny new yo",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny new yor",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny new york",
|
||||
vec![geoname::tests::nyc().into()],
|
||||
|
@ -712,6 +1014,31 @@ mod tests {
|
|||
"ny weather",
|
||||
vec![geoname::tests::nyc().into()],
|
||||
),
|
||||
(
|
||||
"ny w",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny we",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny wea",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny weat",
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
// `min_keyword_length` = 5, so there should be a match.
|
||||
"ny weath",
|
||||
vec![geoname::tests::nyc().into()],
|
||||
),
|
||||
(
|
||||
"ny weathe",
|
||||
vec![geoname::tests::nyc().into()],
|
||||
),
|
||||
(
|
||||
"weather ny ny",
|
||||
vec![geoname::tests::nyc().into()],
|
||||
|
@ -827,11 +1154,11 @@ mod tests {
|
|||
),
|
||||
(
|
||||
"roc ny",
|
||||
vec![geoname::tests::rochester().into()],
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"ny roc",
|
||||
vec![geoname::tests::rochester().into()],
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
"nyc weather",
|
||||
|
@ -1129,7 +1456,9 @@ mod tests {
|
|||
for (query, expected_suggestions) in tests {
|
||||
assert_eq!(
|
||||
&store.fetch_suggestions(SuggestionQuery::weather(query)),
|
||||
expected_suggestions
|
||||
expected_suggestions,
|
||||
"Query: {:?}",
|
||||
query
|
||||
);
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче