Relevancy: implement ingestion

This commit is contained in:
Tif Tran 2024-05-08 12:03:09 -07:00
Родитель 9fea1aea9c
Коммит 2e945f39a4
3 изменённых файлов: 86 добавлений и 3 удалений

Просмотреть файл

@ -136,6 +136,34 @@ pub struct InterestVector {
pub travel: u32,
}
impl std::ops::Add for InterestVector {
type Output = Self;
fn add(self, other: Self) -> Self {
Self {
inconclusive: self.inconclusive + other.inconclusive,
animals: self.animals + other.animals,
arts: self.arts + other.arts,
autos: self.autos + other.autos,
business: self.business + other.business,
career: self.career + other.career,
education: self.education + other.education,
fashion: self.fashion + other.fashion,
finance: self.finance + other.finance,
food: self.food + other.food,
government: self.government + other.government,
hobbies: self.hobbies + other.hobbies,
home: self.home + other.home,
news: self.news + other.news,
real_estate: self.real_estate + other.real_estate,
society: self.society + other.society,
sports: self.sports + other.sports,
tech: self.tech + other.tech,
travel: self.travel + other.travel,
}
}
}
impl std::ops::Index<Interest> for InterestVector {
type Output = u32;

Просмотреть файл

@ -55,8 +55,21 @@ impl RelevancyStore {
///
/// This method may execute for a long time and should only be called from a worker thread.
#[handle_error(Error)]
pub fn ingest(&self, _top_urls_by_frecency: Vec<String>) -> ApiResult<()> {
ingest::ensure_interest_data_populated(&self.db)
pub fn ingest(&self, top_urls_by_frecency: Vec<String>) -> ApiResult<InterestVector> {
ingest::ensure_interest_data_populated(&self.db)?;
self.classify(top_urls_by_frecency)
}
pub fn classify(&self, top_urls_by_frecency: Vec<String>) -> Result<InterestVector> {
// For experimentation purposes we are going to return an interest vector.
// Eventually we would want to store this data in the DB and incrementally update it.
let mut interest_vector = InterestVector::default();
for url in top_urls_by_frecency {
let interest_count = self.db.read(|dao| dao.get_url_interest_vector(&url))?;
interest_vector = interest_vector + interest_count;
}
Ok(interest_vector)
}
/// Calculate metrics for the validation phase
@ -86,3 +99,45 @@ pub struct InterestMetrics {
}
uniffi::include_scaffolding!("relevancy");
#[cfg(test)]
mod test {
use crate::url_hash::hash_url;
use super::*;
#[test]
fn test_ingest() {
let top_urls = vec![
"https://food.com/".to_string(),
"https://hello.com".to_string(),
"https://pasta.com".to_string(),
"https://dog.com".to_string(),
];
let relevancy_store =
RelevancyStore::new("file:test_store_data?mode=memory&cache=shared".to_owned());
relevancy_store
.db
.read_write(|dao| {
dao.add_url_interest(hash_url("https://food.com").unwrap(), Interest::Food)?;
dao.add_url_interest(
hash_url("https://hello.com").unwrap(),
Interest::Inconclusive,
)?;
dao.add_url_interest(hash_url("https://pasta.com").unwrap(), Interest::Food)?;
dao.add_url_interest(hash_url("https://dog.com").unwrap(), Interest::Animals)?;
Ok(())
})
.expect("Insert should succeed");
assert_eq!(
relevancy_store.ingest(top_urls).unwrap(),
InterestVector {
inconclusive: 1,
animals: 1,
food: 2,
..InterestVector::default()
}
);
}
}

Просмотреть файл

@ -22,7 +22,7 @@ interface RelevancyStore {
// Ingest the top URLs by frequency to build up the user's interest vector
[Throws=RelevancyApiError]
void ingest(sequence<string> top_urls);
InterestVector ingest(sequence<string> top_urls);
// Calculate metrics for the user's interest vector in order to measure how strongly we're
// identifying interests. See the `InterestMetrics` struct for details.