Optimize places_get_visited and make it tolerate bad urls.

This commit is contained in:
Thom Chiovoloni 2019-01-16 12:21:13 -08:00 коммит произвёл Thom
Родитель 7591d77270
Коммит 92ea7b68c1
5 изменённых файлов: 191 добавлений и 65 удалений

Просмотреть файл

@ -4,6 +4,12 @@
[Full Changelog](https://github.com/mozilla/application-services/compare/v0.13.3...master)
## Places
### What's Fixed
- PlacesConnection.getVisited will now return that invalid URLs have not been visited, instead of throwing. ([#552](https://github.com/mozilla/application-services/issues/552))
# 0.13.3 (_2019-01-11_)
[Full Changelog](https://github.com/mozilla/application-services/compare/v0.13.2...v0.13.3)

Просмотреть файл

@ -9,6 +9,7 @@ import com.sun.jna.Library
import com.sun.jna.Native
import com.sun.jna.Pointer
import com.sun.jna.PointerType
import com.sun.jna.StringArray
import java.lang.reflect.Proxy
internal interface LibPlacesFFI : Library {
@ -46,7 +47,6 @@ internal interface LibPlacesFFI : Library {
out_err: RustError.ByReference
): RawPlacesConnection?
/** Returns JSON string, which you need to free with places_destroy_string */
fun places_note_observation(
conn: RawPlacesConnection,
json_observation_data: String,
@ -61,11 +61,17 @@ internal interface LibPlacesFFI : Library {
out_err: RustError.ByReference
): Pointer?
/** Note: urls_len and buffer_len must be the same length. The argument is somewhat redundant, but
* is provided for a slight additional amount of sanity checking. These lengths are the number
* of elements present (and not e.g. the number of bytes allocated). */
fun places_get_visited(
conn: RawPlacesConnection,
urls_json: String,
urls: StringArray,
urls_len: Int,
buffer: Pointer,
buf_len: Int,
out_err: RustError.ByReference
): Pointer?
)
fun places_get_visited_urls_in_range(
conn: RawPlacesConnection,

Просмотреть файл

@ -4,11 +4,15 @@
package org.mozilla.places
import com.sun.jna.Native
import com.sun.jna.Pointer
import com.sun.jna.StringArray
import org.json.JSONArray
import org.json.JSONException
import org.json.JSONObject
import java.io.File
import java.nio.ByteBuffer
import java.nio.ByteOrder
/**
* An implementation of a [PlacesAPI] backed by a Rust Places library.
@ -67,18 +71,29 @@ class PlacesConnection(path: String, encryption_key: String? = null) : PlacesAPI
}
override fun getVisited(urls: List<String>): List<Boolean> {
val urlsToJson = JSONArray()
for (url in urls) {
urlsToJson.put(url)
// Note urlStrings has a potential footgun in that StringArray has a `size()` method
// which returns the size *in bytes*. Hence us using urls.size (which is an element count)
// for the actual number of urls!
val urlStrings = StringArray(urls.toTypedArray(), "utf8")
val byteBuffer = ByteBuffer.allocateDirect(urls.size)
byteBuffer.order(ByteOrder.nativeOrder())
rustCall { error ->
val bufferPtr = Native.getDirectBufferPointer(byteBuffer)
LibPlacesFFI.INSTANCE.places_get_visited(
this.db!!,
urlStrings, urls.size,
bufferPtr, urls.size,
error
)
}
val urlStr = urlsToJson.toString()
val visitedStr = rustCallForString { error ->
LibPlacesFFI.INSTANCE.places_get_visited(this.db!!, urlStr, error)
}
val visited = JSONArray(visitedStr)
val result = mutableListOf<Boolean>()
for (index in 0 until visited.length()) {
result.add(visited.getBoolean(index))
val result = ArrayList<Boolean>(urls.size)
for (index in 0 until urls.size) {
val wasVisited = byteBuffer.get(index)
if (wasVisited != 0.toByte() && wasVisited != 1.toByte()) {
throw java.lang.RuntimeException(
"Places bug! Memory corruption possible! Report me!")
}
result.add(wasVisited == 1.toByte())
}
return result
}

Просмотреть файл

@ -90,22 +90,33 @@ pub unsafe extern "C" fn places_query_autocomplete(
#[no_mangle]
pub unsafe extern "C" fn places_get_visited(
conn: &PlacesDb,
urls_json: *const c_char,
urls: *const *const c_char,
urls_len: i32,
byte_buffer: *mut bool,
byte_buffer_len: i32,
error: &mut ExternError,
) -> *mut c_char {
) {
log::debug!("places_get_visited");
// This function has a dumb amount of overhead and copying...
call_with_result(error, || -> places::Result<String> {
let json = ffi_support::rust_str_from_c(urls_json);
let url_strings: Vec<String> = serde_json::from_str(json)?;
let urls = url_strings
.into_iter()
.map(|url| url::Url::parse(&url))
.collect::<Result<Vec<_>, _>>()?;
// We need to call `to_string` manually because primitives (e.g. bool) don't implement
// `ffi_support::IntoFfiJsonTag` (Not clear if they should, needs more thought).
let visited = storage::history::get_visited(conn, &urls)?;
Ok(serde_json::to_string(&visited)?)
call_with_result(error, || -> places::Result<()> {
assert!(
urls_len >= 0,
"Negative array length provided to places_get_visited {}",
urls_len
);
assert_eq!(byte_buffer_len, urls_len);
let url_ptrs = std::slice::from_raw_parts(urls, urls_len as usize);
let output = std::slice::from_raw_parts_mut(byte_buffer, byte_buffer_len as usize);
let urls = url_ptrs
.iter()
.enumerate()
.filter_map(|(idx, &p)| {
let s = ffi_support::rust_str_from_c(p);
url::Url::parse(s).ok().map(|url| (idx, url))
})
.collect::<Vec<_>>();
storage::history::get_visited_into(conn, &urls, output)?;
Ok(())
})
}

Просмотреть файл

@ -575,29 +575,43 @@ pub mod history_sync {
}
} // end of sync module.
pub fn get_visited(db: &PlacesDb, urls: &[Url]) -> Result<Vec<bool>> {
let mut result = vec![false; urls.len()];
pub fn get_visited<I>(db: &PlacesDb, urls: I) -> Result<Vec<bool>>
where
I: IntoIterator<Item = Url>,
I::IntoIter: ExactSizeIterator,
{
let iter = urls.into_iter();
let mut result = vec![false; iter.len()];
let url_idxs = iter.enumerate().collect::<Vec<_>>();
get_visited_into(db, &url_idxs, &mut result)?;
Ok(result)
}
/// Low level api used to implement both get_visited and the FFI get_visited call.
/// Takes a slice where we should output the results, as well as a slice of
/// index/url pairs.
///
/// This is done so that the FFI can more easily support returning
/// false when asked if it's visited an invalid URL.
pub fn get_visited_into(
db: &PlacesDb,
urls_idxs: &[(usize, Url)],
result: &mut [bool],
) -> Result<()> {
sql_support::each_chunk_mapped(
&urls,
|url| url.as_str(),
&urls_idxs,
|(_, url)| url.as_str(),
|chunk, offset| -> Result<()> {
let values_with_idx = sql_support::repeat_display(chunk.len(), ",", |i, f| {
write!(
f,
"({},{},?)",
i + offset,
hash::hash_url(urls[i + offset].as_str())
)
let (idx, url) = &urls_idxs[i + offset];
write!(f, "({},{},?)", *idx, hash::hash_url(url.as_str()))
});
let sql = format!(
"
WITH to_fetch(fetch_url_index, url_hash, url) AS (VALUES {})
SELECT fetch_url_index
FROM moz_places h
JOIN to_fetch f
ON h.url_hash = f.url_hash
AND h.url = f.url
",
"WITH to_fetch(fetch_url_index, url_hash, url) AS (VALUES {})
SELECT fetch_url_index
FROM moz_places h
JOIN to_fetch f ON h.url_hash = f.url_hash
AND h.url = f.url",
values_with_idx
);
let mut stmt = db.prepare(&sql)?;
@ -610,7 +624,7 @@ pub fn get_visited(db: &PlacesDb, urls: &[Url]) -> Result<Vec<bool>> {
Ok(())
},
)?;
Ok(result)
Ok(())
}
/// Get the set of urls that were visited between `start` and `end`. Only considers local visits
@ -903,13 +917,23 @@ mod tests {
let _ = env_logger::try_init();
let mut conn = PlacesDb::open_in_memory(None)?;
let unicode_in_path = "http://www.example.com/tëst😀abc";
let escaped_unicode_in_path = "http://www.example.com/t%C3%ABst%F0%9F%98%80abc";
let unicode_in_domain = "http://www.exämple😀123.com";
let escaped_unicode_in_domain = "http://www.xn--exmple123-w2a24222l.com";
let to_add = [
"https://www.example.com/1",
"https://www.example.com/12",
"https://www.example.com/123",
"https://www.example.com/1234",
"https://www.mozilla.com",
"https://www.firefox.com",
"https://www.example.com/1".to_string(),
"https://www.example.com/12".to_string(),
"https://www.example.com/123".to_string(),
"https://www.example.com/1234".to_string(),
"https://www.mozilla.com".to_string(),
"https://www.firefox.com".to_string(),
unicode_in_path.to_string() + "/1",
escaped_unicode_in_path.to_string() + "/2",
unicode_in_domain.to_string() + "/1",
escaped_unicode_in_domain.to_string() + "/2",
];
for item in &to_add {
@ -921,26 +945,37 @@ mod tests {
}
let to_search = [
("https://www.example.com", false),
("https://www.example.com/1", true),
("https://www.example.com/12", true),
("https://www.example.com/123", true),
("https://www.example.com/1234", true),
("https://www.example.com/12345", false),
("https://www.mozilla.com", true),
("https://www.firefox.com", true),
("https://www.mozilla.org", false),
("https://www.example.com".to_string(), false),
("https://www.example.com/1".to_string(), true),
("https://www.example.com/12".to_string(), true),
("https://www.example.com/123".to_string(), true),
("https://www.example.com/1234".to_string(), true),
("https://www.example.com/12345".to_string(), false),
("https://www.mozilla.com".to_string(), true),
("https://www.firefox.com".to_string(), true),
("https://www.mozilla.org".to_string(), false),
// dupes should still work!
("https://www.example.com/1234", true),
("https://www.example.com/12345", false),
("https://www.example.com/1234".to_string(), true),
("https://www.example.com/12345".to_string(), false),
// The unicode URLs should work when escaped the way we
// encountered them
(unicode_in_path.to_string() + "/1", true),
(escaped_unicode_in_path.to_string() + "/2", true),
(unicode_in_domain.to_string() + "/1", true),
(escaped_unicode_in_domain.to_string() + "/2", true),
// But also the other way.
(unicode_in_path.to_string() + "/2", true),
(escaped_unicode_in_path.to_string() + "/1", true),
(unicode_in_domain.to_string() + "/2", true),
(escaped_unicode_in_domain.to_string() + "/1", true),
];
let urls = to_search
.iter()
.map(|(url, _expect)| Url::parse(url).unwrap())
.map(|(url, _expect)| Url::parse(&url).unwrap())
.collect::<Vec<_>>();
let visited = get_visited(&conn, &urls).unwrap();
let visited = get_visited(&conn, urls).unwrap();
assert_eq!(visited.len(), to_search.len());
@ -958,6 +993,59 @@ mod tests {
Ok(())
}
#[test]
fn test_get_visited_into() {
let _ = env_logger::try_init();
let mut conn = PlacesDb::open_in_memory(None).unwrap();
let to_add = [
Url::parse("https://www.example.com/1").unwrap(),
Url::parse("https://www.example.com/12").unwrap(),
Url::parse("https://www.example.com/123").unwrap(),
];
for item in &to_add {
apply_observation(
&mut conn,
VisitObservation::new(item.clone()).with_visit_type(VisitTransition::Link),
)
.unwrap();
}
let mut results = [false; 10];
let get_visited_request = [
// 0 blank
(2, to_add[1].clone()),
(1, to_add[0].clone()),
// 3 blank
(4, to_add[2].clone()),
// 5 blank
// Note: url for 6 is not visited.
(6, Url::parse("https://www.example.com/1234").unwrap()),
// 7 blank
// Note: dupe is allowed
(8, to_add[1].clone()),
// 9 is blank
];
get_visited_into(&conn, &get_visited_request, &mut results).unwrap();
let expect = [
false, // 0
true, // 1
true, // 2
false, // 3
true, // 4
false, // 5
false, // 6
false, // 7
true, // 8
false, // 9
];
assert_eq!(expect, results);
}
#[test]
fn test_change_counter() -> Result<()> {
let _ = env_logger::try_init();