Optimize places_get_visited and make it tolerate bad urls.

2019-01-16 12:21:13 -08:00 · 2019-01-16 12:21:13 -08:00 · 92ea7b68c1
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,6 +4,12 @@

 [Full Changelog](https://github.com/mozilla/application-services/compare/v0.13.3...master)

+## Places
+
+### What's Fixed
+
+- PlacesConnection.getVisited will now return that invalid URLs have not been visited, instead of throwing. ([#552](https://github.com/mozilla/application-services/issues/552))
+
 # 0.13.3 (_2019-01-11_)

 [Full Changelog](https://github.com/mozilla/application-services/compare/v0.13.2...v0.13.3)
--- a/components/places/android/src/main/java/org/mozilla/places/LibPlacesFFI.kt
+++ b/components/places/android/src/main/java/org/mozilla/places/LibPlacesFFI.kt
@ -9,6 +9,7 @@ import com.sun.jna.Library
 import com.sun.jna.Native
 import com.sun.jna.Pointer
 import com.sun.jna.PointerType
+import com.sun.jna.StringArray
 import java.lang.reflect.Proxy

 internal interface LibPlacesFFI : Library {
@ -46,7 +47,6 @@ internal interface LibPlacesFFI : Library {
            out_err: RustError.ByReference
    ): RawPlacesConnection?

-    /** Returns JSON string, which you need to free with places_destroy_string */
    fun places_note_observation(
            conn: RawPlacesConnection,
            json_observation_data: String,
@ -61,11 +61,17 @@ internal interface LibPlacesFFI : Library {
            out_err: RustError.ByReference
    ): Pointer?

+    /** Note: urls_len and buffer_len must be the same length. The argument is somewhat redundant, but
+     * is provided for a slight additional amount of sanity checking. These lengths are the number
+     * of elements present (and not e.g. the number of bytes allocated). */
    fun places_get_visited(
            conn: RawPlacesConnection,
-            urls_json: String,
+            urls: StringArray,
+            urls_len: Int,
+            buffer: Pointer,
+            buf_len: Int,
            out_err: RustError.ByReference
-    ): Pointer?
+    )

    fun places_get_visited_urls_in_range(
            conn: RawPlacesConnection,
--- a/components/places/android/src/main/java/org/mozilla/places/PlacesConnection.kt
+++ b/components/places/android/src/main/java/org/mozilla/places/PlacesConnection.kt
@ -4,11 +4,15 @@

 package org.mozilla.places

+import com.sun.jna.Native
 import com.sun.jna.Pointer
+import com.sun.jna.StringArray
 import org.json.JSONArray
 import org.json.JSONException
 import org.json.JSONObject
 import java.io.File
+import java.nio.ByteBuffer
+import java.nio.ByteOrder

 /**
 * An implementation of a [PlacesAPI] backed by a Rust Places library.
@ -67,18 +71,29 @@ class PlacesConnection(path: String, encryption_key: String? = null) : PlacesAPI
    }

    override fun getVisited(urls: List<String>): List<Boolean> {
-        val urlsToJson = JSONArray()
-        for (url in urls) {
-            urlsToJson.put(url)
+        // Note urlStrings has a potential footgun in that StringArray has a `size()` method
+        // which returns the size *in bytes*. Hence us using urls.size (which is an element count)
+        // for the actual number of urls!
+        val urlStrings = StringArray(urls.toTypedArray(), "utf8")
+        val byteBuffer = ByteBuffer.allocateDirect(urls.size)
+        byteBuffer.order(ByteOrder.nativeOrder())
+        rustCall { error ->
+            val bufferPtr = Native.getDirectBufferPointer(byteBuffer)
+            LibPlacesFFI.INSTANCE.places_get_visited(
+                    this.db!!,
+                    urlStrings, urls.size,
+                    bufferPtr, urls.size,
+                    error
+            )
        }
-        val urlStr = urlsToJson.toString()
-        val visitedStr = rustCallForString { error ->
-            LibPlacesFFI.INSTANCE.places_get_visited(this.db!!, urlStr, error)
-        }
-        val visited = JSONArray(visitedStr)
-        val result = mutableListOf<Boolean>()
-        for (index in 0 until visited.length()) {
-            result.add(visited.getBoolean(index))
+        val result = ArrayList<Boolean>(urls.size)
+        for (index in 0 until urls.size) {
+            val wasVisited = byteBuffer.get(index)
+            if (wasVisited != 0.toByte() && wasVisited != 1.toByte()) {
+                throw java.lang.RuntimeException(
+                        "Places bug! Memory corruption possible! Report me!")
+            }
+            result.add(wasVisited == 1.toByte())
        }
        return result
    }
--- a/components/places/ffi/src/lib.rs
+++ b/components/places/ffi/src/lib.rs
@ -90,22 +90,33 @@ pub unsafe extern "C" fn places_query_autocomplete(
 #[no_mangle]
 pub unsafe extern "C" fn places_get_visited(
    conn: &PlacesDb,
-    urls_json: *const c_char,
+    urls: *const *const c_char,
+    urls_len: i32,
+    byte_buffer: *mut bool,
+    byte_buffer_len: i32,
    error: &mut ExternError,
-) -> *mut c_char {
+) {
    log::debug!("places_get_visited");
    // This function has a dumb amount of overhead and copying...
-    call_with_result(error, || -> places::Result<String> {
-        let json = ffi_support::rust_str_from_c(urls_json);
-        let url_strings: Vec<String> = serde_json::from_str(json)?;
-        let urls = url_strings
-            .into_iter()
-            .map(|url| url::Url::parse(&url))
-            .collect::<Result<Vec<_>, _>>()?;
-        // We need to call `to_string` manually because primitives (e.g. bool) don't implement
-        // `ffi_support::IntoFfiJsonTag` (Not clear if they should, needs more thought).
-        let visited = storage::history::get_visited(conn, &urls)?;
-        Ok(serde_json::to_string(&visited)?)
+    call_with_result(error, || -> places::Result<()> {
+        assert!(
+            urls_len >= 0,
+            "Negative array length provided to places_get_visited {}",
+            urls_len
+        );
+        assert_eq!(byte_buffer_len, urls_len);
+        let url_ptrs = std::slice::from_raw_parts(urls, urls_len as usize);
+        let output = std::slice::from_raw_parts_mut(byte_buffer, byte_buffer_len as usize);
+        let urls = url_ptrs
+            .iter()
+            .enumerate()
+            .filter_map(|(idx, &p)| {
+                let s = ffi_support::rust_str_from_c(p);
+                url::Url::parse(s).ok().map(|url| (idx, url))
+            })
+            .collect::<Vec<_>>();
+        storage::history::get_visited_into(conn, &urls, output)?;
+        Ok(())
    })
 }

--- a/components/places/src/storage/history.rs
+++ b/components/places/src/storage/history.rs
@ -575,29 +575,43 @@ pub mod history_sync {
    }
 } // end of sync module.

-pub fn get_visited(db: &PlacesDb, urls: &[Url]) -> Result<Vec<bool>> {
-    let mut result = vec![false; urls.len()];
+pub fn get_visited<I>(db: &PlacesDb, urls: I) -> Result<Vec<bool>>
+where
+    I: IntoIterator<Item = Url>,
+    I::IntoIter: ExactSizeIterator,
+{
+    let iter = urls.into_iter();
+    let mut result = vec![false; iter.len()];
+    let url_idxs = iter.enumerate().collect::<Vec<_>>();
+    get_visited_into(db, &url_idxs, &mut result)?;
+    Ok(result)
+}
+
+/// Low level api used to implement both get_visited and the FFI get_visited call.
+/// Takes a slice where we should output the results, as well as a slice of
+/// index/url pairs.
+///
+/// This is done so that the FFI can more easily support returning
+/// false when asked if it's visited an invalid URL.
+pub fn get_visited_into(
+    db: &PlacesDb,
+    urls_idxs: &[(usize, Url)],
+    result: &mut [bool],
+) -> Result<()> {
    sql_support::each_chunk_mapped(
-        &urls,
-        |url| url.as_str(),
+        &urls_idxs,
+        |(_, url)| url.as_str(),
        |chunk, offset| -> Result<()> {
            let values_with_idx = sql_support::repeat_display(chunk.len(), ",", |i, f| {
-                write!(
-                    f,
-                    "({},{},?)",
-                    i + offset,
-                    hash::hash_url(urls[i + offset].as_str())
-                )
+                let (idx, url) = &urls_idxs[i + offset];
+                write!(f, "({},{},?)", *idx, hash::hash_url(url.as_str()))
            });
            let sql = format!(
-                "
-            WITH to_fetch(fetch_url_index, url_hash, url) AS (VALUES {})
-            SELECT fetch_url_index
-            FROM moz_places h
-            JOIN to_fetch f
-            ON h.url_hash = f.url_hash
-              AND h.url = f.url
-        ",
+                "WITH to_fetch(fetch_url_index, url_hash, url) AS (VALUES {})
+                 SELECT fetch_url_index
+                 FROM moz_places h
+                 JOIN to_fetch f ON h.url_hash = f.url_hash
+                   AND h.url = f.url",
                values_with_idx
            );
            let mut stmt = db.prepare(&sql)?;
@ -610,7 +624,7 @@ pub fn get_visited(db: &PlacesDb, urls: &[Url]) -> Result<Vec<bool>> {
            Ok(())
        },
    )?;
-    Ok(result)
+    Ok(())
 }

 /// Get the set of urls that were visited between `start` and `end`. Only considers local visits
@ -903,13 +917,23 @@ mod tests {
        let _ = env_logger::try_init();
        let mut conn = PlacesDb::open_in_memory(None)?;

+        let unicode_in_path = "http://www.example.com/tëst😀abc";
+        let escaped_unicode_in_path = "http://www.example.com/t%C3%ABst%F0%9F%98%80abc";
+
+        let unicode_in_domain = "http://www.exämple😀123.com";
+        let escaped_unicode_in_domain = "http://www.xn--exmple123-w2a24222l.com";
+
        let to_add = [
-            "https://www.example.com/1",
-            "https://www.example.com/12",
-            "https://www.example.com/123",
-            "https://www.example.com/1234",
-            "https://www.mozilla.com",
-            "https://www.firefox.com",
+            "https://www.example.com/1".to_string(),
+            "https://www.example.com/12".to_string(),
+            "https://www.example.com/123".to_string(),
+            "https://www.example.com/1234".to_string(),
+            "https://www.mozilla.com".to_string(),
+            "https://www.firefox.com".to_string(),
+            unicode_in_path.to_string() + "/1",
+            escaped_unicode_in_path.to_string() + "/2",
+            unicode_in_domain.to_string() + "/1",
+            escaped_unicode_in_domain.to_string() + "/2",
        ];

        for item in &to_add {
@ -921,26 +945,37 @@ mod tests {
        }

        let to_search = [
-            ("https://www.example.com", false),
-            ("https://www.example.com/1", true),
-            ("https://www.example.com/12", true),
-            ("https://www.example.com/123", true),
-            ("https://www.example.com/1234", true),
-            ("https://www.example.com/12345", false),
-            ("https://www.mozilla.com", true),
-            ("https://www.firefox.com", true),
-            ("https://www.mozilla.org", false),
+            ("https://www.example.com".to_string(), false),
+            ("https://www.example.com/1".to_string(), true),
+            ("https://www.example.com/12".to_string(), true),
+            ("https://www.example.com/123".to_string(), true),
+            ("https://www.example.com/1234".to_string(), true),
+            ("https://www.example.com/12345".to_string(), false),
+            ("https://www.mozilla.com".to_string(), true),
+            ("https://www.firefox.com".to_string(), true),
+            ("https://www.mozilla.org".to_string(), false),
            // dupes should still work!
-            ("https://www.example.com/1234", true),
-            ("https://www.example.com/12345", false),
+            ("https://www.example.com/1234".to_string(), true),
+            ("https://www.example.com/12345".to_string(), false),
+            // The unicode URLs should work when escaped the way we
+            // encountered them
+            (unicode_in_path.to_string() + "/1", true),
+            (escaped_unicode_in_path.to_string() + "/2", true),
+            (unicode_in_domain.to_string() + "/1", true),
+            (escaped_unicode_in_domain.to_string() + "/2", true),
+            // But also the other way.
+            (unicode_in_path.to_string() + "/2", true),
+            (escaped_unicode_in_path.to_string() + "/1", true),
+            (unicode_in_domain.to_string() + "/2", true),
+            (escaped_unicode_in_domain.to_string() + "/1", true),
        ];

        let urls = to_search
            .iter()
-            .map(|(url, _expect)| Url::parse(url).unwrap())
+            .map(|(url, _expect)| Url::parse(&url).unwrap())
            .collect::<Vec<_>>();

-        let visited = get_visited(&conn, &urls).unwrap();
+        let visited = get_visited(&conn, urls).unwrap();

        assert_eq!(visited.len(), to_search.len());

@ -958,6 +993,59 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn test_get_visited_into() {
+        let _ = env_logger::try_init();
+        let mut conn = PlacesDb::open_in_memory(None).unwrap();
+
+        let to_add = [
+            Url::parse("https://www.example.com/1").unwrap(),
+            Url::parse("https://www.example.com/12").unwrap(),
+            Url::parse("https://www.example.com/123").unwrap(),
+        ];
+
+        for item in &to_add {
+            apply_observation(
+                &mut conn,
+                VisitObservation::new(item.clone()).with_visit_type(VisitTransition::Link),
+            )
+            .unwrap();
+        }
+
+        let mut results = [false; 10];
+
+        let get_visited_request = [
+            // 0 blank
+            (2, to_add[1].clone()),
+            (1, to_add[0].clone()),
+            // 3 blank
+            (4, to_add[2].clone()),
+            // 5 blank
+            // Note: url for 6 is not visited.
+            (6, Url::parse("https://www.example.com/1234").unwrap()),
+            // 7 blank
+            // Note: dupe is allowed
+            (8, to_add[1].clone()),
+            // 9 is blank
+        ];
+
+        get_visited_into(&conn, &get_visited_request, &mut results).unwrap();
+        let expect = [
+            false, // 0
+            true,  // 1
+            true,  // 2
+            false, // 3
+            true,  // 4
+            false, // 5
+            false, // 6
+            false, // 7
+            true,  // 8
+            false, // 9
+        ];
+
+        assert_eq!(expect, results);
+    }
+
    #[test]
    fn test_change_counter() -> Result<()> {
        let _ = env_logger::try_init();