Optimize places_get_visited and make it tolerate bad urls.
This commit is contained in:
Родитель
7591d77270
Коммит
92ea7b68c1
|
@ -4,6 +4,12 @@
|
|||
|
||||
[Full Changelog](https://github.com/mozilla/application-services/compare/v0.13.3...master)
|
||||
|
||||
## Places
|
||||
|
||||
### What's Fixed
|
||||
|
||||
- PlacesConnection.getVisited will now return that invalid URLs have not been visited, instead of throwing. ([#552](https://github.com/mozilla/application-services/issues/552))
|
||||
|
||||
# 0.13.3 (_2019-01-11_)
|
||||
|
||||
[Full Changelog](https://github.com/mozilla/application-services/compare/v0.13.2...v0.13.3)
|
||||
|
|
|
@ -9,6 +9,7 @@ import com.sun.jna.Library
|
|||
import com.sun.jna.Native
|
||||
import com.sun.jna.Pointer
|
||||
import com.sun.jna.PointerType
|
||||
import com.sun.jna.StringArray
|
||||
import java.lang.reflect.Proxy
|
||||
|
||||
internal interface LibPlacesFFI : Library {
|
||||
|
@ -46,7 +47,6 @@ internal interface LibPlacesFFI : Library {
|
|||
out_err: RustError.ByReference
|
||||
): RawPlacesConnection?
|
||||
|
||||
/** Returns JSON string, which you need to free with places_destroy_string */
|
||||
fun places_note_observation(
|
||||
conn: RawPlacesConnection,
|
||||
json_observation_data: String,
|
||||
|
@ -61,11 +61,17 @@ internal interface LibPlacesFFI : Library {
|
|||
out_err: RustError.ByReference
|
||||
): Pointer?
|
||||
|
||||
/** Note: urls_len and buffer_len must be the same length. The argument is somewhat redundant, but
|
||||
* is provided for a slight additional amount of sanity checking. These lengths are the number
|
||||
* of elements present (and not e.g. the number of bytes allocated). */
|
||||
fun places_get_visited(
|
||||
conn: RawPlacesConnection,
|
||||
urls_json: String,
|
||||
urls: StringArray,
|
||||
urls_len: Int,
|
||||
buffer: Pointer,
|
||||
buf_len: Int,
|
||||
out_err: RustError.ByReference
|
||||
): Pointer?
|
||||
)
|
||||
|
||||
fun places_get_visited_urls_in_range(
|
||||
conn: RawPlacesConnection,
|
||||
|
|
|
@ -4,11 +4,15 @@
|
|||
|
||||
package org.mozilla.places
|
||||
|
||||
import com.sun.jna.Native
|
||||
import com.sun.jna.Pointer
|
||||
import com.sun.jna.StringArray
|
||||
import org.json.JSONArray
|
||||
import org.json.JSONException
|
||||
import org.json.JSONObject
|
||||
import java.io.File
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.ByteOrder
|
||||
|
||||
/**
|
||||
* An implementation of a [PlacesAPI] backed by a Rust Places library.
|
||||
|
@ -67,18 +71,29 @@ class PlacesConnection(path: String, encryption_key: String? = null) : PlacesAPI
|
|||
}
|
||||
|
||||
override fun getVisited(urls: List<String>): List<Boolean> {
|
||||
val urlsToJson = JSONArray()
|
||||
for (url in urls) {
|
||||
urlsToJson.put(url)
|
||||
// Note urlStrings has a potential footgun in that StringArray has a `size()` method
|
||||
// which returns the size *in bytes*. Hence us using urls.size (which is an element count)
|
||||
// for the actual number of urls!
|
||||
val urlStrings = StringArray(urls.toTypedArray(), "utf8")
|
||||
val byteBuffer = ByteBuffer.allocateDirect(urls.size)
|
||||
byteBuffer.order(ByteOrder.nativeOrder())
|
||||
rustCall { error ->
|
||||
val bufferPtr = Native.getDirectBufferPointer(byteBuffer)
|
||||
LibPlacesFFI.INSTANCE.places_get_visited(
|
||||
this.db!!,
|
||||
urlStrings, urls.size,
|
||||
bufferPtr, urls.size,
|
||||
error
|
||||
)
|
||||
}
|
||||
val urlStr = urlsToJson.toString()
|
||||
val visitedStr = rustCallForString { error ->
|
||||
LibPlacesFFI.INSTANCE.places_get_visited(this.db!!, urlStr, error)
|
||||
}
|
||||
val visited = JSONArray(visitedStr)
|
||||
val result = mutableListOf<Boolean>()
|
||||
for (index in 0 until visited.length()) {
|
||||
result.add(visited.getBoolean(index))
|
||||
val result = ArrayList<Boolean>(urls.size)
|
||||
for (index in 0 until urls.size) {
|
||||
val wasVisited = byteBuffer.get(index)
|
||||
if (wasVisited != 0.toByte() && wasVisited != 1.toByte()) {
|
||||
throw java.lang.RuntimeException(
|
||||
"Places bug! Memory corruption possible! Report me!")
|
||||
}
|
||||
result.add(wasVisited == 1.toByte())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
|
|
@ -90,22 +90,33 @@ pub unsafe extern "C" fn places_query_autocomplete(
|
|||
#[no_mangle]
|
||||
pub unsafe extern "C" fn places_get_visited(
|
||||
conn: &PlacesDb,
|
||||
urls_json: *const c_char,
|
||||
urls: *const *const c_char,
|
||||
urls_len: i32,
|
||||
byte_buffer: *mut bool,
|
||||
byte_buffer_len: i32,
|
||||
error: &mut ExternError,
|
||||
) -> *mut c_char {
|
||||
) {
|
||||
log::debug!("places_get_visited");
|
||||
// This function has a dumb amount of overhead and copying...
|
||||
call_with_result(error, || -> places::Result<String> {
|
||||
let json = ffi_support::rust_str_from_c(urls_json);
|
||||
let url_strings: Vec<String> = serde_json::from_str(json)?;
|
||||
let urls = url_strings
|
||||
.into_iter()
|
||||
.map(|url| url::Url::parse(&url))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
// We need to call `to_string` manually because primitives (e.g. bool) don't implement
|
||||
// `ffi_support::IntoFfiJsonTag` (Not clear if they should, needs more thought).
|
||||
let visited = storage::history::get_visited(conn, &urls)?;
|
||||
Ok(serde_json::to_string(&visited)?)
|
||||
call_with_result(error, || -> places::Result<()> {
|
||||
assert!(
|
||||
urls_len >= 0,
|
||||
"Negative array length provided to places_get_visited {}",
|
||||
urls_len
|
||||
);
|
||||
assert_eq!(byte_buffer_len, urls_len);
|
||||
let url_ptrs = std::slice::from_raw_parts(urls, urls_len as usize);
|
||||
let output = std::slice::from_raw_parts_mut(byte_buffer, byte_buffer_len as usize);
|
||||
let urls = url_ptrs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, &p)| {
|
||||
let s = ffi_support::rust_str_from_c(p);
|
||||
url::Url::parse(s).ok().map(|url| (idx, url))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
storage::history::get_visited_into(conn, &urls, output)?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -575,29 +575,43 @@ pub mod history_sync {
|
|||
}
|
||||
} // end of sync module.
|
||||
|
||||
pub fn get_visited(db: &PlacesDb, urls: &[Url]) -> Result<Vec<bool>> {
|
||||
let mut result = vec![false; urls.len()];
|
||||
pub fn get_visited<I>(db: &PlacesDb, urls: I) -> Result<Vec<bool>>
|
||||
where
|
||||
I: IntoIterator<Item = Url>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
let iter = urls.into_iter();
|
||||
let mut result = vec![false; iter.len()];
|
||||
let url_idxs = iter.enumerate().collect::<Vec<_>>();
|
||||
get_visited_into(db, &url_idxs, &mut result)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Low level api used to implement both get_visited and the FFI get_visited call.
|
||||
/// Takes a slice where we should output the results, as well as a slice of
|
||||
/// index/url pairs.
|
||||
///
|
||||
/// This is done so that the FFI can more easily support returning
|
||||
/// false when asked if it's visited an invalid URL.
|
||||
pub fn get_visited_into(
|
||||
db: &PlacesDb,
|
||||
urls_idxs: &[(usize, Url)],
|
||||
result: &mut [bool],
|
||||
) -> Result<()> {
|
||||
sql_support::each_chunk_mapped(
|
||||
&urls,
|
||||
|url| url.as_str(),
|
||||
&urls_idxs,
|
||||
|(_, url)| url.as_str(),
|
||||
|chunk, offset| -> Result<()> {
|
||||
let values_with_idx = sql_support::repeat_display(chunk.len(), ",", |i, f| {
|
||||
write!(
|
||||
f,
|
||||
"({},{},?)",
|
||||
i + offset,
|
||||
hash::hash_url(urls[i + offset].as_str())
|
||||
)
|
||||
let (idx, url) = &urls_idxs[i + offset];
|
||||
write!(f, "({},{},?)", *idx, hash::hash_url(url.as_str()))
|
||||
});
|
||||
let sql = format!(
|
||||
"
|
||||
WITH to_fetch(fetch_url_index, url_hash, url) AS (VALUES {})
|
||||
SELECT fetch_url_index
|
||||
FROM moz_places h
|
||||
JOIN to_fetch f
|
||||
ON h.url_hash = f.url_hash
|
||||
AND h.url = f.url
|
||||
",
|
||||
"WITH to_fetch(fetch_url_index, url_hash, url) AS (VALUES {})
|
||||
SELECT fetch_url_index
|
||||
FROM moz_places h
|
||||
JOIN to_fetch f ON h.url_hash = f.url_hash
|
||||
AND h.url = f.url",
|
||||
values_with_idx
|
||||
);
|
||||
let mut stmt = db.prepare(&sql)?;
|
||||
|
@ -610,7 +624,7 @@ pub fn get_visited(db: &PlacesDb, urls: &[Url]) -> Result<Vec<bool>> {
|
|||
Ok(())
|
||||
},
|
||||
)?;
|
||||
Ok(result)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the set of urls that were visited between `start` and `end`. Only considers local visits
|
||||
|
@ -903,13 +917,23 @@ mod tests {
|
|||
let _ = env_logger::try_init();
|
||||
let mut conn = PlacesDb::open_in_memory(None)?;
|
||||
|
||||
let unicode_in_path = "http://www.example.com/tëst😀abc";
|
||||
let escaped_unicode_in_path = "http://www.example.com/t%C3%ABst%F0%9F%98%80abc";
|
||||
|
||||
let unicode_in_domain = "http://www.exämple😀123.com";
|
||||
let escaped_unicode_in_domain = "http://www.xn--exmple123-w2a24222l.com";
|
||||
|
||||
let to_add = [
|
||||
"https://www.example.com/1",
|
||||
"https://www.example.com/12",
|
||||
"https://www.example.com/123",
|
||||
"https://www.example.com/1234",
|
||||
"https://www.mozilla.com",
|
||||
"https://www.firefox.com",
|
||||
"https://www.example.com/1".to_string(),
|
||||
"https://www.example.com/12".to_string(),
|
||||
"https://www.example.com/123".to_string(),
|
||||
"https://www.example.com/1234".to_string(),
|
||||
"https://www.mozilla.com".to_string(),
|
||||
"https://www.firefox.com".to_string(),
|
||||
unicode_in_path.to_string() + "/1",
|
||||
escaped_unicode_in_path.to_string() + "/2",
|
||||
unicode_in_domain.to_string() + "/1",
|
||||
escaped_unicode_in_domain.to_string() + "/2",
|
||||
];
|
||||
|
||||
for item in &to_add {
|
||||
|
@ -921,26 +945,37 @@ mod tests {
|
|||
}
|
||||
|
||||
let to_search = [
|
||||
("https://www.example.com", false),
|
||||
("https://www.example.com/1", true),
|
||||
("https://www.example.com/12", true),
|
||||
("https://www.example.com/123", true),
|
||||
("https://www.example.com/1234", true),
|
||||
("https://www.example.com/12345", false),
|
||||
("https://www.mozilla.com", true),
|
||||
("https://www.firefox.com", true),
|
||||
("https://www.mozilla.org", false),
|
||||
("https://www.example.com".to_string(), false),
|
||||
("https://www.example.com/1".to_string(), true),
|
||||
("https://www.example.com/12".to_string(), true),
|
||||
("https://www.example.com/123".to_string(), true),
|
||||
("https://www.example.com/1234".to_string(), true),
|
||||
("https://www.example.com/12345".to_string(), false),
|
||||
("https://www.mozilla.com".to_string(), true),
|
||||
("https://www.firefox.com".to_string(), true),
|
||||
("https://www.mozilla.org".to_string(), false),
|
||||
// dupes should still work!
|
||||
("https://www.example.com/1234", true),
|
||||
("https://www.example.com/12345", false),
|
||||
("https://www.example.com/1234".to_string(), true),
|
||||
("https://www.example.com/12345".to_string(), false),
|
||||
// The unicode URLs should work when escaped the way we
|
||||
// encountered them
|
||||
(unicode_in_path.to_string() + "/1", true),
|
||||
(escaped_unicode_in_path.to_string() + "/2", true),
|
||||
(unicode_in_domain.to_string() + "/1", true),
|
||||
(escaped_unicode_in_domain.to_string() + "/2", true),
|
||||
// But also the other way.
|
||||
(unicode_in_path.to_string() + "/2", true),
|
||||
(escaped_unicode_in_path.to_string() + "/1", true),
|
||||
(unicode_in_domain.to_string() + "/2", true),
|
||||
(escaped_unicode_in_domain.to_string() + "/1", true),
|
||||
];
|
||||
|
||||
let urls = to_search
|
||||
.iter()
|
||||
.map(|(url, _expect)| Url::parse(url).unwrap())
|
||||
.map(|(url, _expect)| Url::parse(&url).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let visited = get_visited(&conn, &urls).unwrap();
|
||||
let visited = get_visited(&conn, urls).unwrap();
|
||||
|
||||
assert_eq!(visited.len(), to_search.len());
|
||||
|
||||
|
@ -958,6 +993,59 @@ mod tests {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_visited_into() {
|
||||
let _ = env_logger::try_init();
|
||||
let mut conn = PlacesDb::open_in_memory(None).unwrap();
|
||||
|
||||
let to_add = [
|
||||
Url::parse("https://www.example.com/1").unwrap(),
|
||||
Url::parse("https://www.example.com/12").unwrap(),
|
||||
Url::parse("https://www.example.com/123").unwrap(),
|
||||
];
|
||||
|
||||
for item in &to_add {
|
||||
apply_observation(
|
||||
&mut conn,
|
||||
VisitObservation::new(item.clone()).with_visit_type(VisitTransition::Link),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let mut results = [false; 10];
|
||||
|
||||
let get_visited_request = [
|
||||
// 0 blank
|
||||
(2, to_add[1].clone()),
|
||||
(1, to_add[0].clone()),
|
||||
// 3 blank
|
||||
(4, to_add[2].clone()),
|
||||
// 5 blank
|
||||
// Note: url for 6 is not visited.
|
||||
(6, Url::parse("https://www.example.com/1234").unwrap()),
|
||||
// 7 blank
|
||||
// Note: dupe is allowed
|
||||
(8, to_add[1].clone()),
|
||||
// 9 is blank
|
||||
];
|
||||
|
||||
get_visited_into(&conn, &get_visited_request, &mut results).unwrap();
|
||||
let expect = [
|
||||
false, // 0
|
||||
true, // 1
|
||||
true, // 2
|
||||
false, // 3
|
||||
true, // 4
|
||||
false, // 5
|
||||
false, // 6
|
||||
false, // 7
|
||||
true, // 8
|
||||
false, // 9
|
||||
];
|
||||
|
||||
assert_eq!(expect, results);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_change_counter() -> Result<()> {
|
||||
let _ = env_logger::try_init();
|
||||
|
|
Загрузка…
Ссылка в новой задаче