Switch relevancy component from UDL to proc-macros

2024-09-18 09:56:53 -03:00 · 2024-09-18 09:56:53 -03:00 · 220af9af6c
--- a/components/relevancy/build.rs
+++ b/components/relevancy/build.rs
@ -1,8 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-fn main() {
-    uniffi::generate_scaffolding("./src/relevancy.udl").unwrap();
-}
--- a/components/relevancy/src/error.rs
+++ b/components/relevancy/src/error.rs
@ -6,7 +6,7 @@
 use error_support::{ErrorHandling, GetErrorHandling};

 /// Errors we return via the public interface.
-#[derive(Debug, thiserror::Error)]
+#[derive(Debug, thiserror::Error, uniffi::Error)]
 pub enum RelevancyApiError {
    #[error("Unexpected Error: {reason}")]
    Unexpected { reason: String },
--- a/components/relevancy/src/interest.rs
+++ b/components/relevancy/src/interest.rs
@ -34,7 +34,7 @@ impl ToSql for InterestVectorKind {
 /// List of possible interests for a domain.  Domains can have be associated with one or multiple
 /// interests.  `Inconclusive` is used for domains in the user's top sites that we can't classify
 /// because there's no corresponding entry in the interest database.
-#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, uniffi::Enum)]
 #[repr(u32)]
 pub enum Interest {
    // Note: if you change these codes, make sure to update the `TryFrom<u32>` implementation and
@ -149,7 +149,7 @@ impl ToSql for Interest {
 ///
 /// Here "vector" refers to the mathematical object, not a Rust `Vec`.  It always has a fixed
 /// number of elements.
-#[derive(Debug, Default, PartialEq, Eq)]
+#[derive(Debug, Default, PartialEq, Eq, uniffi::Record)]
 pub struct InterestVector {
    pub inconclusive: u32,
    pub animals: u32,
--- a/components/relevancy/src/lib.rs
+++ b/components/relevancy/src/lib.rs
@ -25,33 +25,39 @@ pub use ranker::score;

 use error_support::handle_error;

+uniffi::setup_scaffolding!();
+
+#[derive(uniffi::Object)]
 pub struct RelevancyStore {
    db: RelevancyDb,
 }

 /// Top-level API for the Relevancy component
+// Impl block to be exported via `UniFFI`.
+#[uniffi::export]
 impl RelevancyStore {
+    /// Construct a new RelevancyStore
+    ///
+    /// This is non-blocking since databases and other resources are lazily opened.
+    #[uniffi::constructor]
    pub fn new(db_path: String) -> Self {
        Self {
            db: RelevancyDb::new(db_path),
        }
    }

+    /// Close any open resources (for example databases)
+    ///
+    /// Calling `close` will interrupt any in-progress queries on other threads.
    pub fn close(&self) {
        self.db.close()
    }

+    /// Interrupt any current database queries
    pub fn interrupt(&self) {
        self.db.interrupt()
    }

-    /// Download the interest data from remote settings if needed
-    #[handle_error(Error)]
-    pub fn ensure_interest_data_populated(&self) -> ApiResult<()> {
-        ingest::ensure_interest_data_populated(&self.db)?;
-        Ok(())
-    }
-
    /// Ingest top URLs to build the user's interest vector.
    ///
    /// Consumer should pass a list of the user's top URLs by frecency to this method.  It will
@ -72,17 +78,6 @@ impl RelevancyStore {
        Ok(interest_vec)
    }

-    pub fn classify(&self, top_urls_by_frecency: Vec<String>) -> Result<InterestVector> {
-        let mut interest_vector = InterestVector::default();
-        for url in top_urls_by_frecency {
-            let interest_count = self.db.read(|dao| dao.get_url_interest_vector(&url))?;
-            log::trace!("classified: {url} {}", interest_count.summary());
-            interest_vector = interest_vector + interest_count;
-        }
-
-        Ok(interest_vector)
-    }
-
    /// Calculate metrics for the validation phase
    ///
    /// This runs after [Self::ingest].  It takes the interest vector that ingest created and
@ -102,14 +97,50 @@ impl RelevancyStore {
    }
 }

-/// Interest metric data.  See `relevancy.udl` for details.
-pub struct InterestMetrics {
-    pub top_single_interest_similarity: u32,
-    pub top_2interest_similarity: u32,
-    pub top_3interest_similarity: u32,
+impl RelevancyStore {
+    /// Download the interest data from remote settings if needed
+    #[handle_error(Error)]
+    pub fn ensure_interest_data_populated(&self) -> ApiResult<()> {
+        ingest::ensure_interest_data_populated(&self.db)?;
+        Ok(())
+    }
+
+    pub fn classify(&self, top_urls_by_frecency: Vec<String>) -> Result<InterestVector> {
+        let mut interest_vector = InterestVector::default();
+        for url in top_urls_by_frecency {
+            let interest_count = self.db.read(|dao| dao.get_url_interest_vector(&url))?;
+            log::trace!("classified: {url} {}", interest_count.summary());
+            interest_vector = interest_vector + interest_count;
+        }
+        Ok(interest_vector)
+    }
 }

-uniffi::include_scaffolding!("relevancy");
+/// Interest metrics that we want to send to Glean as part of the validation process.  These contain
+/// the cosine similarity when comparing the user's interest against various interest vectors that
+/// consumers may use.
+///
+/// Cosine similarly was chosen because it seems easy to calculate.  This was then matched against
+/// some semi-plausible real-world interest vectors that consumers might use.  This is all up for
+/// debate and we may decide to switch to some other metrics.
+///
+/// Similarity values are transformed to integers by multiplying the floating point value by 1000 and
+/// rounding.  This is to make them compatible with Glean's distribution metrics.
+#[derive(uniffi::Record)]
+pub struct InterestMetrics {
+    /// Similarity between the user's interest vector and an interest vector where the element for
+    /// the user's top interest is copied, but all other interests are set to zero.  This measures
+    /// the highest possible similarity with consumers that used interest vectors with a single
+    /// interest set.
+    pub top_single_interest_similarity: u32,
+    /// The same as before, but the top 2 interests are copied. This measures the highest possible
+    /// similarity with consumers that used interest vectors with a two interests (note: this means
+    /// they would need to choose the user's top two interests and have the exact same proportion
+    /// between them as the user).
+    pub top_2interest_similarity: u32,
+    /// The same as before, but the top 3 interests are copied.
+    pub top_3interest_similarity: u32,
+}

 #[cfg(test)]
 mod test {
--- a/components/relevancy/src/ranker.rs
+++ b/components/relevancy/src/ranker.rs
@ -14,6 +14,7 @@ use crate::interest::{Interest, InterestVector};
 ///   - `content_categories`: a list of categories (interests) of the give content.
 /// Return:
 //   - A score ranges in [0, 1].
+#[uniffi::export]
 pub fn score(interest_vector: InterestVector, content_categories: Vec<Interest>) -> f64 {
    let n = content_categories
        .iter()
--- a/components/relevancy/src/relevancy.udl
+++ b/components/relevancy/src/relevancy.udl
@ -1,125 +0,0 @@
-namespace relevancy {
-  // Calculate score for a piece of categorized content based on a user interest vector.
-  //
-  // Params:
-  //   - `interest_vector`: a user interest vector that can be fetched via
-  //     `RelevancyStore::user_interest_vector()`.
-  //   - `content_categories`: a list of categories (interests) of the give content.
-  // Return:
-  //   - A score ranges in [0, 1].
-  double score(InterestVector interest_vector, sequence<Interest> content_categories);
-};
-
-[Error]
-interface RelevancyApiError {
-    Unexpected(string reason);
-};
-
-// Top-level class for the Relevancy component
-interface RelevancyStore {
-    /// Construct a new RelevancyStore
-    ///
-    /// This is non-blocking since databases and other resources are lazily opened.
-    constructor(string dbpath);
-
-    /// Close any open resources (for example databases)
-    ///
-    /// Calling `close` will interrupt any in-progress queries on other threads.
-    void close();
-
-    /// Interrupt any current database queries
-    void interrupt();
-
-    /// Ingest the top URLs by frequency to build up the user's interest vector
-    [Throws=RelevancyApiError]
-    InterestVector ingest(sequence<string> top_urls);
-
-    /// Calculate metrics for the user's interest vector in order to measure how strongly we're
-    /// identifying interests.  See the `InterestMetrics` struct for details.
-    [Throws=RelevancyApiError]
-    InterestMetrics calculate_metrics();
-
-    /// Get the interest vector for the user.
-    ///
-    /// This is intended to be show to the user in an `about:` page so that users can judge if it
-    /// feels correct.
-    [Throws=RelevancyApiError]
-    InterestVector user_interest_vector();
-};
-
-enum Interest {
-    "Animals",
-    "Arts",
-    "Autos",
-    "Business",
-    "Career",
-    "Education",
-    "Fashion",
-    "Finance",
-    "Food",
-    "Government",
-    // "Health",
-    "Hobbies",
-    "Home",
-    "News",
-    "RealEstate",
-    "Society",
-    "Sports",
-    "Tech",
-    "Travel",
-    "Inconclusive",
-};
-
-/// Interest metrics that we want to send to Glean as part of the validation process.  These contain
-/// the cosine similarity when comparing the user's interest against various interest vectors that
-/// consumers may use.
-///
-/// Cosine similarly was chosen because it seems easy to calculate.  This was then matched against
-/// some semi-plausible real-world interest vectors that consumers might use.  This is all up for
-/// debate and we may decide to switch to some other metrics.
-///
-/// Similarity values are transformed to integers by multiplying the floating point value by 1000 and
-/// rounding.  This is to make them compatible with Glean's distribution metrics.
-dictionary InterestMetrics {
-    /// Similarity between the user's interest vector and an interest vector where the element for
-    /// the user's top interest is copied, but all other interests are set to zero.  This measures
-    /// the highest possible similarity with consumers that used interest vectors with a single
-    /// interest set.
-    u32 top_single_interest_similarity;
-
-    /// The same as before, but the top 2 interests are copied. This measures the highest possible
-    /// similarity with consumers that used interest vectors with a two interests (note: this means
-    /// they would need to choose the user's top two interests and have the exact same proportion
-    /// between them as the user).
-    u32 top_2interest_similarity;
-
-    /// The same as before, but the top 3 interests are copied.
-    u32 top_3interest_similarity;
-};
-
-/// Vector storing a count value for each interest
-///
-/// Here "vector" refers to the mathematical object, not a Rust `Vec`.  It always has a fixed
-/// number of elements.
-dictionary InterestVector {
-    u32 animals;
-    u32 arts;
-    u32 autos;
-    u32 business;
-    u32 career;
-    u32 education;
-    u32 fashion;
-    u32 finance;
-    u32 food;
-    u32 government;
-    // u32 health;
-    u32 hobbies;
-    u32 home;
-    u32 news;
-    u32 real_estate;
-    u32 society;
-    u32 sports;
-    u32 tech;
-    u32 travel;
-    u32 inconclusive;
-};