Relevancy: implement ingestion
This commit is contained in:
Родитель
9fea1aea9c
Коммит
2e945f39a4
|
@ -136,6 +136,34 @@ pub struct InterestVector {
|
|||
pub travel: u32,
|
||||
}
|
||||
|
||||
impl std::ops::Add for InterestVector {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, other: Self) -> Self {
|
||||
Self {
|
||||
inconclusive: self.inconclusive + other.inconclusive,
|
||||
animals: self.animals + other.animals,
|
||||
arts: self.arts + other.arts,
|
||||
autos: self.autos + other.autos,
|
||||
business: self.business + other.business,
|
||||
career: self.career + other.career,
|
||||
education: self.education + other.education,
|
||||
fashion: self.fashion + other.fashion,
|
||||
finance: self.finance + other.finance,
|
||||
food: self.food + other.food,
|
||||
government: self.government + other.government,
|
||||
hobbies: self.hobbies + other.hobbies,
|
||||
home: self.home + other.home,
|
||||
news: self.news + other.news,
|
||||
real_estate: self.real_estate + other.real_estate,
|
||||
society: self.society + other.society,
|
||||
sports: self.sports + other.sports,
|
||||
tech: self.tech + other.tech,
|
||||
travel: self.travel + other.travel,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Index<Interest> for InterestVector {
|
||||
type Output = u32;
|
||||
|
||||
|
|
|
@ -55,8 +55,21 @@ impl RelevancyStore {
|
|||
///
|
||||
/// This method may execute for a long time and should only be called from a worker thread.
|
||||
#[handle_error(Error)]
|
||||
pub fn ingest(&self, _top_urls_by_frecency: Vec<String>) -> ApiResult<()> {
|
||||
ingest::ensure_interest_data_populated(&self.db)
|
||||
pub fn ingest(&self, top_urls_by_frecency: Vec<String>) -> ApiResult<InterestVector> {
|
||||
ingest::ensure_interest_data_populated(&self.db)?;
|
||||
self.classify(top_urls_by_frecency)
|
||||
}
|
||||
|
||||
pub fn classify(&self, top_urls_by_frecency: Vec<String>) -> Result<InterestVector> {
|
||||
// For experimentation purposes we are going to return an interest vector.
|
||||
// Eventually we would want to store this data in the DB and incrementally update it.
|
||||
let mut interest_vector = InterestVector::default();
|
||||
for url in top_urls_by_frecency {
|
||||
let interest_count = self.db.read(|dao| dao.get_url_interest_vector(&url))?;
|
||||
interest_vector = interest_vector + interest_count;
|
||||
}
|
||||
|
||||
Ok(interest_vector)
|
||||
}
|
||||
|
||||
/// Calculate metrics for the validation phase
|
||||
|
@ -86,3 +99,45 @@ pub struct InterestMetrics {
|
|||
}
|
||||
|
||||
uniffi::include_scaffolding!("relevancy");
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::url_hash::hash_url;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ingest() {
|
||||
let top_urls = vec![
|
||||
"https://food.com/".to_string(),
|
||||
"https://hello.com".to_string(),
|
||||
"https://pasta.com".to_string(),
|
||||
"https://dog.com".to_string(),
|
||||
];
|
||||
let relevancy_store =
|
||||
RelevancyStore::new("file:test_store_data?mode=memory&cache=shared".to_owned());
|
||||
relevancy_store
|
||||
.db
|
||||
.read_write(|dao| {
|
||||
dao.add_url_interest(hash_url("https://food.com").unwrap(), Interest::Food)?;
|
||||
dao.add_url_interest(
|
||||
hash_url("https://hello.com").unwrap(),
|
||||
Interest::Inconclusive,
|
||||
)?;
|
||||
dao.add_url_interest(hash_url("https://pasta.com").unwrap(), Interest::Food)?;
|
||||
dao.add_url_interest(hash_url("https://dog.com").unwrap(), Interest::Animals)?;
|
||||
Ok(())
|
||||
})
|
||||
.expect("Insert should succeed");
|
||||
|
||||
assert_eq!(
|
||||
relevancy_store.ingest(top_urls).unwrap(),
|
||||
InterestVector {
|
||||
inconclusive: 1,
|
||||
animals: 1,
|
||||
food: 2,
|
||||
..InterestVector::default()
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ interface RelevancyStore {
|
|||
|
||||
// Ingest the top URLs by frequency to build up the user's interest vector
|
||||
[Throws=RelevancyApiError]
|
||||
void ingest(sequence<string> top_urls);
|
||||
InterestVector ingest(sequence<string> top_urls);
|
||||
|
||||
// Calculate metrics for the user's interest vector in order to measure how strongly we're
|
||||
// identifying interests. See the `InterestMetrics` struct for details.
|
||||
|
|
Загрузка…
Ссылка в новой задаче