This commit is contained in:
Thom Chiovoloni 2020-04-10 10:17:19 -07:00
Родитель 9e140bae5e
Коммит 145a74ba59
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 31F01AEBD799934A
29 изменённых файлов: 0 добавлений и 7030 удалений

30
Cargo.lock сгенерированный
Просмотреть файл

@ -970,12 +970,6 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "index_vec"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1ad55c6c8931aababd9372a9be43cfeab198e6b87cc387bb505a5702d703786"
[[package]]
name = "indexmap"
version = "1.3.2"
@ -2040,27 +2034,6 @@ version = "0.6.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
[[package]]
name = "remerge"
version = "0.1.0"
dependencies = [
"error-support",
"failure",
"index_vec",
"lazy_static",
"libsqlite3-sys",
"log",
"matches",
"nss_build_common",
"rusqlite",
"semver",
"serde",
"serde_json",
"sql-support",
"sync-guid",
"url",
]
[[package]]
name = "remove_dir_all"
version = "0.5.2"
@ -2117,7 +2090,6 @@ dependencies = [
"libsqlite3-sys",
"lru-cache",
"memchr",
"serde_json",
"time",
]
@ -2214,7 +2186,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
"serde",
]
[[package]]
@ -2249,7 +2220,6 @@ version = "1.0.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78a7a12c167809363ec3bd7329fc0a3369056996de43c4b37ef3cd54a6ce4867"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",

Просмотреть файл

@ -9,7 +9,6 @@ members = [
"components/push",
"components/push/ffi",
"components/rc_log",
"components/remerge",
"components/support/cli",
"components/support/error",
"components/support/ffi",

Просмотреть файл

@ -1,35 +0,0 @@
[package]
name = "remerge"
version = "0.1.0"
authors = ["Thom Chiovoloni <tchiovoloni@mozilla.com>"]
edition = "2018"
license = "MPL-2.0"
[dependencies]
serde = { version = "1.0.104", features = ["derive", "rc"] }
failure = { version = "0.1.6" }
url = "2.1.1"
serde_json = { version = "1.0.50", features = ["preserve_order"] }
log = "0.4.8"
semver = { version = "0.9.0", features = ["serde"] }
lazy_static = "1.4.0"
index_vec = "0.1.0"
sql-support = {path = "../support/sql"}
error-support = {path = "../support/error"}
sync-guid = {path = "../support/guid", features = ["random", "rusqlite_support"]}
# it's not clear if we should actually use these deps (they're fine and not
# uncommon or anything, but we could avoid them at the cost of slightly more
# code).
matches = "0.1.8"
# A *direct* dep on the -sys crate is required for our build.rs
# to see the DEP_SQLITE3_LINK_TARGET env var that cargo sets
# on its behalf.
libsqlite3-sys = "0.17.3"
[dependencies.rusqlite]
version = "0.22.0"
features = ["functions", "bundled", "serde_json"]
[build-dependencies]
nss_build_common = { path = "../support/rc_crypto/nss/nss_build_common" }

Просмотреть файл

@ -1,17 +0,0 @@
# Remerge: A syncable store for generic data types
Unfortunately, with the (indefinite) "pause" of Mentat, there's no obvious path
forward for new synced data types beyond 'the Sync team implements a new
component'. Presumably, at some point we decided this was both desirable, but
unworkable, hence designing Mentat.
Remerge is a storage/syncing solution that attempts to get us some of the
benefits of Mentat with the following major benefits (compared to Mentat)
- Works on top of Sync 1.5, including interfacing with existing collections,
- Doesn't change the sync data model substantially.
- Has storage which is straightforward to implement on top of SQLite.
For more information, please see the full RFC and documentation for remerge,
available [here](../../docs/design/remerge/rfc.md)

Просмотреть файл

@ -1,16 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! Work around the fact that `sqlcipher` might get enabled by a cargo feature
//! another crate in teh workspace needs, without setting up nss. (This is a
//! gross hack).
fn main() {
println!("cargo:rerun-if-changed=build.rs");
// Ugh. This is really really dumb. We don't care about sqlcipher at all. really
if nss_build_common::env_str("DEP_SQLITE3_LINK_TARGET") == Some("sqlcipher".into()) {
// If NSS_DIR isn't set, we don't really care, ignore the Err case.
let _ = nss_build_common::link_nss();
}
}

Просмотреть файл

@ -1,62 +0,0 @@
-- A table containing every distinct schema we've seen.
CREATE TABLE remerge_schemas (
id INTEGER PRIMARY KEY,
is_legacy TINYINT NOT NULL,
-- The version of this schema
version TEXT NOT NULL UNIQUE,
-- If the schema is marked as having a required version, this is that
-- version
required_version TEXT,
-- The schema's text as JSON.
schema_text TEXT NOT NULL
);
-- Table of local records
CREATE TABLE rec_local (
id INTEGER PRIMARY KEY,
guid TEXT NOT NULL UNIQUE,
remerge_schema_version TEXT,
-- XXX Should this be nullable for the case where is_deleted == true?
record_data TEXT NOT NULL,
-- A local timestamp
local_modified_ms INTEGER NOT NULL DEFAULT 0,
is_deleted TINYINT NOT NULL DEFAULT 0,
sync_status TINYINT NOT NULL DEFAULT 0,
vector_clock TEXT NOT NULL,
last_writer_id TEXT NOT NULL
);
-- The "mirror", e.g. the last remote value we've seen.
CREATE TABLE rec_mirror (
id INTEGER PRIMARY KEY,
guid TEXT NOT NULL UNIQUE,
record_data TEXT NOT NULL,
remerge_schema_version TEXT,
-- in milliseconds (a sync15::ServerTimestamp multiplied by 1000 and truncated)
server_modified_ms INTEGER NOT NULL,
-- Whether or not there have been local changes to the record.
is_overridden TINYINT NOT NULL DEFAULT 0,
vector_clock TEXT, -- Can be null for legacy collections...
last_writer_id TEXT NOT NULL -- A sync guid.
);
-- Extra metadata. See `storage/bootstrap.rs` for information about the
-- contents. Arguably, should be changed into a table that only contains one
-- row, but we handle setting it up separately from schema initialization,
-- so migration would be tricky
CREATE TABLE metadata (key TEXT PRIMARY KEY, value BLOB) WITHOUT ROWID;

Просмотреть файл

@ -1,446 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::error::*;
use crate::storage::{db::RemergeDb, NativeRecord, NativeSchemaAndText, SchemaBundle};
use crate::Guid;
use std::convert::{TryFrom, TryInto};
use std::path::Path;
/// "Friendly" public api for using Remerge.
pub struct RemergeEngine {
pub(crate) db: RemergeDb,
}
impl RemergeEngine {
pub fn open(path: impl AsRef<Path>, schema_json: impl AsRef<str>) -> Result<Self> {
let schema = NativeSchemaAndText::try_from(schema_json.as_ref())?;
let conn = rusqlite::Connection::open(path.as_ref())?;
let db = RemergeDb::with_connection(conn, schema)?;
Ok(Self { db })
}
pub fn open_in_memory(schema_json: impl AsRef<str>) -> Result<Self> {
let schema = NativeSchemaAndText::try_from(schema_json.as_ref())?;
let conn = rusqlite::Connection::open_in_memory()?;
let db = RemergeDb::with_connection(conn, schema)?;
Ok(Self { db })
}
pub fn conn(&self) -> &rusqlite::Connection {
self.db.conn()
}
pub fn bundle(&self) -> &SchemaBundle {
self.db.bundle()
}
pub fn list(&self) -> Result<Vec<NativeRecord>> {
self.db.get_all()
}
pub fn exists(&self, id: impl AsRef<str>) -> Result<bool> {
self.db.exists(id.as_ref())
}
pub fn get(&self, id: impl AsRef<str>) -> Result<Option<NativeRecord>> {
self.db.get_by_id(id.as_ref())
}
pub fn delete(&self, id: impl AsRef<str>) -> Result<bool> {
self.db.delete_by_id(id.as_ref())
}
pub fn update<R>(&self, rec: R) -> Result<()>
where
R: TryInto<NativeRecord>,
Error: From<R::Error>,
{
self.db.update_record(&rec.try_into()?)
}
pub fn insert<R>(&self, rec: R) -> Result<Guid>
where
R: TryInto<NativeRecord>,
Error: From<R::Error>,
{
self.db.create(&rec.try_into()?)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::untyped_map::UntypedMap;
use crate::JsonValue;
use rusqlite::{params, Connection};
use serde_json::json;
lazy_static::lazy_static! {
pub static ref SCHEMA: String = json!({
"version": "1.0.0",
"name": "logins-example",
"legacy": true,
"fields": [
{
"name": "id",
"type": "own_guid"
},
{
"name": "formSubmitUrl",
"type": "url",
"is_origin": true,
"local_name": "formActionOrigin"
},
{
"name": "httpRealm",
"type": "text",
"composite_root": "formSubmitUrl"
},
{
"name": "timesUsed",
"type": "integer",
"merge": "take_sum"
},
{
"name": "hostname",
"local_name": "origin",
"type": "url",
"is_origin": true,
"required": true
},
{
"name": "password",
"type": "text",
"required": true
},
{
"name": "username",
"type": "text"
},
{
"name": "extra",
"type": "untyped_map",
},
],
"dedupe_on": [
"username",
"password",
"hostname"
]
}).to_string();
}
#[test]
fn test_init() {
let e: RemergeEngine = RemergeEngine::open_in_memory(&*SCHEMA).unwrap();
assert_eq!(e.bundle().collection_name(), "logins-example");
}
#[test]
fn test_insert() {
let e: RemergeEngine = RemergeEngine::open_in_memory(&*SCHEMA).unwrap();
let id = e
.insert(json!({
"username": "test",
"password": "p4ssw0rd",
"origin": "https://www.example.com",
"formActionOrigin": "https://login.example.com",
}))
.unwrap();
assert!(e.exists(&id).unwrap());
let r = e.get(&id).unwrap().expect("should exist");
let v: JsonValue = r.into_val();
assert_eq!(v["id"], id.as_str());
assert_eq!(v["username"], "test");
assert_eq!(v["password"], "p4ssw0rd");
assert_eq!(v["origin"], "https://www.example.com");
assert_eq!(v["formActionOrigin"], "https://login.example.com");
}
#[test]
fn test_duplicate_insert() {
let e: RemergeEngine = RemergeEngine::open_in_memory(&*SCHEMA).unwrap();
let id = e
.insert(json!({
"username": "test2",
"password": "p4ssw0rd2",
"origin": "https://www.example2.com",
"formActionOrigin": "https://login.example2.com",
}))
.unwrap();
assert!(e.exists(&id).unwrap());
e.get(&id).unwrap().expect("should exist");
let id2 = e
.insert(json!({
"username": "test3",
"password": "p4ssw0rd2",
"origin": "https://www.example3.com",
"formActionOrigin": "https://login.example3.com",
}))
.unwrap();
assert!(e.exists(&id2).unwrap());
e.get(&id2).unwrap().expect("should exist");
let r = e
.insert(json!({
"username": "test2",
"password": "p4ssw0rd2",
"origin": "https://www.example2.com",
"formActionOrigin": "https://login.example2.com",
}))
.unwrap_err();
assert_eq!(
r.to_string(),
"Invalid record: Record violates a `dedupe_on` constraint"
);
let id3 = e
.insert(json!({
"username": "test4",
"password": "p4ssw0rd2",
"origin": "https://www.example3.com",
"formActionOrigin": "https://login.example3.com",
}))
.unwrap();
assert!(e.exists(&id3).unwrap());
e.get(&id3).unwrap().expect("should exist");
}
#[test]
fn test_list_delete() {
let e: RemergeEngine = RemergeEngine::open_in_memory(&*SCHEMA).unwrap();
let id = e
.insert(json!({
"username": "test",
"password": "p4ssw0rd",
"origin": "https://www.example.com",
"formActionOrigin": "https://login.example.com",
}))
.unwrap();
assert!(e.exists(&id).unwrap());
e.get(&id).unwrap().expect("should exist");
let id2 = e
.insert(json!({
"id": "abcd12349876",
"username": "test2",
"password": "p4ssw0rd0",
"origin": "https://www.ex4mple.com",
"httpRealm": "stuff",
}))
.unwrap();
assert_eq!(id2, "abcd12349876");
let l = e.list().unwrap();
assert_eq!(l.len(), 2);
assert!(l.iter().any(|r| r["id"] == id.as_str()));
let v2 = l
.iter()
.find(|r| r["id"] == id2.as_str())
.expect("should exist")
.clone()
.into_val();
assert_eq!(v2["username"], "test2");
assert_eq!(v2["password"], "p4ssw0rd0");
assert_eq!(v2["origin"], "https://www.ex4mple.com");
assert_eq!(v2["httpRealm"], "stuff");
let del = e.delete(&id).unwrap();
assert!(del);
assert!(!e.exists(&id).unwrap());
let l = e.list().unwrap();
assert_eq!(l.len(), 1);
assert_eq!(l[0]["id"], id2.as_str());
}
#[test]
fn test_update() {
let e: RemergeEngine = RemergeEngine::open_in_memory(&*SCHEMA).unwrap();
let id = e
.insert(json!({
"username": "test",
"password": "p4ssw0rd",
"origin": "https://www.example.com",
"formActionOrigin": "https://login.example.com",
}))
.unwrap();
assert!(e.exists(&id).unwrap());
let v = e.get(&id).unwrap().expect("should exist").into_val();
assert_eq!(v["id"], id.as_str());
assert_eq!(v["username"], "test");
assert_eq!(v["password"], "p4ssw0rd");
assert_eq!(v["origin"], "https://www.example.com");
assert_eq!(v["formActionOrigin"], "https://login.example.com");
e.update(json!({
"id": id,
"username": "test2",
"password": "p4ssw0rd0",
"origin": "https://www.ex4mple.com",
"httpRealm": "stuff",
}))
.unwrap();
let v = e
.get(&id)
.unwrap()
.expect("should (still) exist")
.into_val();
assert_eq!(v["id"], id.as_str());
assert_eq!(v["username"], "test2");
assert_eq!(v["password"], "p4ssw0rd0");
assert_eq!(v["origin"], "https://www.ex4mple.com");
assert_eq!(v["httpRealm"], "stuff");
}
fn extra(conn: &Connection, id: &str) -> Result<UntypedMap> {
let data: JsonValue = conn.query_row_and_then(
"SELECT record_data FROM rec_local WHERE guid = ?",
params![id],
|row| row.get(0),
)?;
UntypedMap::from_local_json(data["extra"].clone())
}
#[test]
fn test_untyped_map_update() {
let e: RemergeEngine = RemergeEngine::open_in_memory(&*SCHEMA).unwrap();
let id = e
.insert(json!({
"username": "test",
"password": "p4ssw0rd",
"origin": "https://www.example.com",
"formActionOrigin": "https://login.example.com",
"extra": {
"foo": "a",
"bar": 4,
}
}))
.unwrap();
assert!(e.exists(&id).unwrap());
let v = e.get(&id).unwrap().expect("should exist").into_val();
assert_eq!(v["id"], id.as_str());
assert_eq!(v["username"], "test");
assert_eq!(v["password"], "p4ssw0rd");
assert_eq!(v["origin"], "https://www.example.com");
assert_eq!(v["formActionOrigin"], "https://login.example.com");
assert_eq!(
v["extra"],
json!({
"foo": "a",
"bar": 4,
})
);
let um0: UntypedMap = extra(e.conn(), &id).unwrap();
assert_eq!(um0.len(), 2);
assert_eq!(um0["foo"], "a");
assert_eq!(um0["bar"], 4);
assert_eq!(um0.tombstones().len(), 0);
e.update(json!({
"id": id,
"username": "test2",
"password": "p4ssw0rd0",
"origin": "https://www.ex4mple.com",
"httpRealm": "stuff",
"extra": json!({
"foo": "a",
"quux": 4,
})
}))
.unwrap();
let v = e
.get(&id)
.unwrap()
.expect("should (still) exist")
.into_val();
assert_eq!(v["id"], id.as_str());
assert_eq!(v["username"], "test2");
assert_eq!(v["password"], "p4ssw0rd0");
assert_eq!(v["origin"], "https://www.ex4mple.com");
assert_eq!(v["httpRealm"], "stuff");
assert_eq!(
v["extra"],
json!({
"foo": "a",
"quux": 4,
})
);
let um1: UntypedMap = extra(e.conn(), &id).unwrap();
assert_eq!(um1.len(), 2);
assert_eq!(um1["foo"], "a");
assert_eq!(um1["quux"], 4);
um1.assert_tombstones(vec!["bar"]);
e.update(json!({
"id": id,
"username": "test2",
"password": "p4ssw0rd0",
"origin": "https://www.ex4mple.com",
"httpRealm": "stuff",
"extra": json!({
"bar": "test",
})
}))
.unwrap();
let um2: UntypedMap = extra(e.conn(), &id).unwrap();
assert_eq!(um2.len(), 1);
assert_eq!(um2["bar"], "test");
um2.assert_tombstones(vec!["foo", "quux"]);
}
#[test]
fn test_schema_cant_go_backwards() {
const FILENAME: &str = "file:test_schema_go_backwards.sqlite?mode=memory&cache=shared";
let _e: RemergeEngine = RemergeEngine::open(FILENAME, &*SCHEMA).unwrap();
let backwards_schema: String = json!({
"version": "0.1.0",
"name": "logins-example",
"fields": [],
})
.to_string();
let open_result = RemergeEngine::open(FILENAME, &*backwards_schema);
if let Err(e) = open_result {
assert_eq!(
e.to_string(),
"Schema given is of an earlier version (0.1.0) than previously stored (1.0.0)"
);
} else {
panic!("permitted going backwards in schema versions");
}
}
#[test]
fn test_schema_doesnt_change_same_version() {
const FILENAME: &str =
"file:test_schema_change_without_version.sqlite?mode=memory&cache=shared";
let _e: RemergeEngine = RemergeEngine::open(FILENAME, &*SCHEMA).unwrap();
let backwards_schema: String = json!({
"version": "1.0.0",
"name": "logins-example",
"fields": [],
})
.to_string();
let open_result = RemergeEngine::open(FILENAME, &*backwards_schema);
if let Err(e) = open_result {
assert_eq!(
e.to_string(),
"Schema version did not change (1.0.0) but contents are different"
);
} else {
panic!("permitted changing without version bump");
}
}
}

Просмотреть файл

@ -1,104 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use failure::Fail;
#[derive(Debug, Fail)]
pub enum ErrorKind {
#[fail(
display = "The `sync_status` column in DB has an illegal value: {}",
_0
)]
BadSyncStatus(u8),
#[fail(
display = "Schema name {:?} does not match the collection name for this remerge database ({:?})",
_0, _1
)]
SchemaNameMatchError(String, String),
#[fail(display = "Invalid schema: {}", _0)]
SchemaError(#[fail(cause)] crate::schema::error::SchemaError),
#[fail(
display = "Schema given is of an earlier version ({}) than previously stored ({})",
_0, _1
)]
SchemaVersionWentBackwards(String, String),
#[fail(
display = "Schema version did not change ({}) but contents are different",
_0
)]
SchemaChangedWithoutVersionBump(String),
#[fail(display = "Invalid record: {}", _0)]
InvalidRecord(#[fail(cause)] InvalidRecord),
#[fail(
display = "No record with guid exists (when one was required): {:?}",
_0
)]
NoSuchRecord(String),
#[fail(
display = "Failed to convert local record to native record (may indicate bad remerge schema): {}",
_0
)]
LocalToNativeError(String),
#[fail(display = "Error parsing JSON data: {}", _0)]
JsonError(#[fail(cause)] serde_json::Error),
#[fail(display = "Error executing SQL: {}", _0)]
SqlError(#[fail(cause)] rusqlite::Error),
#[fail(display = "Error parsing URL: {}", _0)]
UrlParseError(#[fail(cause)] url::ParseError),
/// Note: not an 'InvalidRecord' variant because it doesn't come from the user.
#[fail(
display = "UntypedMap has a key and tombstone with the same name when OnCollision::Error was requested"
)]
UntypedMapTombstoneCollision,
}
error_support::define_error! {
ErrorKind {
(JsonError, serde_json::Error),
(SchemaError, crate::schema::error::SchemaError),
(UrlParseError, url::ParseError),
(SqlError, rusqlite::Error),
(InvalidRecord, InvalidRecord),
}
}
#[derive(Debug, Fail)]
pub enum InvalidRecord {
#[fail(display = "Cannot insert non-json object")]
NotJsonObject,
#[fail(display = "The field {:?} is required", _0)]
MissingRequiredField(String),
#[fail(display = "The field {:?} must be of type \"{}\"", _0, _1)]
WrongFieldType(String, crate::schema::FieldKind),
#[fail(display = "The field {:?} must parse as a valid url", _0)]
NotUrl(String),
#[fail(display = "The field {:?} must have a non-opaque origin", _0)]
OriginWasOpaque(String),
#[fail(display = "The field {:?} has more URL parts than just an origin", _0)]
UrlWasNotOrigin(String),
#[fail(display = "The field {:?} is out of the required bounds", _0)]
OutOfBounds(String),
#[fail(display = "The field {:?} is not a valid record_set", _0)]
InvalidRecordSet(String),
#[fail(display = "The field {:?} is not a valid guid", _0)]
InvalidGuid(String),
// TODO(issue 2232): Should be more specific.
#[fail(display = "The field {:?} is invalid: {}", _0, _1)]
InvalidField(String, String),
#[fail(display = "A record with the given guid already exists")]
IdNotUnique,
#[fail(display = "Record violates a `dedupe_on` constraint")]
Duplicate,
}

Просмотреть файл

@ -1,26 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#![deny(unsafe_code)]
#![warn(rust_2018_idioms)]
#[macro_use]
mod util;
pub mod engine;
pub mod error;
pub mod ms_time;
pub mod schema;
pub mod storage;
pub mod untyped_map;
pub mod vclock;
// Some re-exports we use frequently for local convenience
pub(crate) use sync_guid::Guid;
pub(crate) use serde_json::Value as JsonValue;
pub(crate) type JsonObject<Val = JsonValue> = serde_json::Map<String, Val>;
pub use crate::engine::RemergeEngine;
pub use crate::error::*;
pub use crate::ms_time::MsTime;
pub use crate::vclock::VClock;

Просмотреть файл

@ -1,158 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use rusqlite::types::{FromSql, FromSqlResult, ToSql, ToSqlOutput, ValueRef};
use serde::{Deserialize, Serialize};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
fn duration_ms(d: Duration) -> i64 {
(d.as_secs() as i64) * 1000 + ((d.subsec_nanos() as i64) / 1_000_000)
}
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Deserialize, Serialize, Default)]
#[serde(transparent)]
pub struct MsTime(pub i64);
/// Release of WorldWideWeb, the first web browser. Synced data could never come
/// from before this date. XXX this could be untrue for new collections...
pub const EARLIEST_SANE_TIME: MsTime = MsTime(662_083_200_000);
impl MsTime {
#[inline]
pub fn now() -> Self {
SystemTime::now().into()
}
#[inline]
pub fn from_millis(ts: i64) -> Self {
MsTime(ts)
}
/// Note: panics if `u64` is too large (which would require an
/// astronomically large timestamp)
#[inline]
pub fn from_unsigned_millis(ts: u64) -> Self {
assert!(ts < (std::i64::MAX as u64));
MsTime(ts as i64)
}
}
impl std::ops::Sub for MsTime {
type Output = Duration;
fn sub(self, o: MsTime) -> Duration {
if o > self {
log::error!(
"Attempt to subtract larger time from smaller: {} - {}",
self,
o
);
Duration::default()
} else {
Duration::from_millis((self.0 - o.0) as u64)
}
}
}
impl From<MsTime> for serde_json::Value {
fn from(ts: MsTime) -> Self {
ts.0.into()
}
}
impl From<MsTime> for u64 {
#[inline]
fn from(ts: MsTime) -> Self {
assert!(ts.0 >= 0);
ts.0 as u64
}
}
impl From<MsTime> for i64 {
#[inline]
fn from(ts: MsTime) -> Self {
ts.0
}
}
impl From<SystemTime> for MsTime {
#[inline]
fn from(st: SystemTime) -> Self {
let d = st.duration_since(UNIX_EPOCH).unwrap_or_default();
MsTime(duration_ms(d))
}
}
impl From<MsTime> for SystemTime {
#[inline]
fn from(ts: MsTime) -> Self {
UNIX_EPOCH + Duration::from_millis(ts.into())
}
}
impl std::fmt::Display for MsTime {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
impl std::fmt::Debug for MsTime {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.0, f)
}
}
impl ToSql for MsTime {
fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
Ok(ToSqlOutput::from(self.0))
}
}
impl FromSql for MsTime {
fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
value.as_i64().map(MsTime)
}
}
impl PartialEq<i64> for MsTime {
#[inline]
fn eq(&self, o: &i64) -> bool {
self.0 == *o
}
}
impl PartialEq<u64> for MsTime {
#[inline]
fn eq(&self, o: &u64) -> bool {
*o < (std::i64::MAX as u64) && self.0 > 0 && self.0 == (*o as i64)
}
}
impl PartialEq<MsTime> for i64 {
#[inline]
fn eq(&self, o: &MsTime) -> bool {
PartialEq::eq(o, self)
}
}
impl PartialEq<MsTime> for u64 {
#[inline]
fn eq(&self, o: &MsTime) -> bool {
PartialEq::eq(o, self)
}
}
impl std::cmp::PartialOrd<i64> for MsTime {
#[inline]
fn partial_cmp(&self, o: &i64) -> Option<std::cmp::Ordering> {
std::cmp::PartialOrd::partial_cmp(&self.0, o)
}
}
// partialord must be symmetric
impl std::cmp::PartialOrd<MsTime> for i64 {
#[inline]
fn partial_cmp(&self, o: &MsTime) -> Option<std::cmp::Ordering> {
std::cmp::PartialOrd::partial_cmp(self, &o.0)
}
}

Просмотреть файл

@ -1,490 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use super::merge_kinds::*;
use crate::error::*;
use crate::ms_time::EARLIEST_SANE_TIME;
use crate::{JsonObject, JsonValue};
use index_vec::IndexVec;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use url::Url;
/// The set of features understood by this client.
pub const REMERGE_FEATURES_UNDERSTOOD: &[&str] = &["record_set"];
index_vec::define_index_type! {
/// Newtype wrapper around usize, referring into the `fields` vec in a
/// RecordSchema
pub struct FieldIndex = usize;
}
/// The unserialized representation of the schema, parsed from a `RawSchema` (in
/// json.rs). If you change this, you may have to change that as well.
#[derive(Clone, Debug, PartialEq)]
pub struct RecordSchema {
pub name: String,
pub version: semver::Version,
pub required_version: semver::VersionReq,
pub remerge_features_used: Vec<String>,
pub legacy: bool,
pub fields: IndexVec<FieldIndex, Field>,
pub field_map: HashMap<String, FieldIndex>,
pub dedupe_on: Vec<FieldIndex>,
pub composite_roots: Vec<FieldIndex>,
pub composite_fields: Vec<FieldIndex>,
// If we have a semantic for an UpdatedAt Timestamp, it's this.
pub field_updated_at: Option<FieldIndex>,
// If we have an own_guid field, it's this.
pub field_own_guid: Option<FieldIndex>,
}
impl RecordSchema {
pub fn field<'a, S: ?Sized + AsRef<str>>(&'a self, name: &S) -> Option<&'a Field> {
let idx = *self.field_map.get(name.as_ref())?;
Some(&self.fields[idx])
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub enum CompositeInfo {
Member { root: FieldIndex },
Root { children: Vec<FieldIndex> },
}
/// A single field in a record.
#[derive(Clone, Debug, PartialEq)]
pub struct Field {
pub name: String,
// Note: frequently equal to name.
pub local_name: String,
pub required: bool,
pub deprecated: bool,
pub change_preference: Option<ChangePreference>,
pub composite: Option<CompositeInfo>,
/// The type-specific information about a field.
pub ty: FieldType,
pub own_idx: FieldIndex,
}
impl Field {
pub(crate) fn validate_guid(name: &str, v: &JsonValue) -> Result<crate::Guid, InvalidRecord> {
if let JsonValue::String(s) = v {
if s.len() < 8 || !crate::Guid::from(s.as_str()).is_valid_for_sync_server() {
throw!(InvalidRecord::InvalidGuid(name.to_string()))
} else {
Ok(crate::Guid::from(s.as_str()))
}
} else {
throw!(InvalidRecord::WrongFieldType(
name.to_string(),
FieldKind::OwnGuid
));
}
}
pub fn validate(&self, v: JsonValue) -> Result<JsonValue> {
// TODO(issue 2232): most errors should be more specific.
use InvalidRecord::*;
if !self.required && v.is_null() {
return Ok(v);
}
match &self.ty {
FieldType::Untyped { .. } => Ok(v),
FieldType::OwnGuid { .. } => Ok(Self::validate_guid(&self.name, &v).map(|_| v)?),
FieldType::Text { .. } => {
if v.is_string() {
Ok(v)
} else {
throw!(WrongFieldType(self.name.clone(), self.ty.kind()));
}
}
FieldType::Boolean { .. } => {
if let JsonValue::Bool(b) = v {
Ok(JsonValue::Bool(b))
} else {
throw!(WrongFieldType(self.name.clone(), self.ty.kind()));
}
}
FieldType::UntypedMap { .. } => {
if v.is_object() {
Ok(v)
} else {
throw!(WrongFieldType(self.name.clone(), self.ty.kind()));
}
}
FieldType::RecordSet { id_key, .. } => self.validate_record_set(id_key.as_str(), v),
FieldType::Url { is_origin, .. } => {
if let JsonValue::String(s) = v {
if let Ok(url) = Url::parse(&s) {
if *is_origin {
let o = url.origin();
if !o.is_tuple() {
throw!(OriginWasOpaque(self.name.clone()));
}
if url.username() != ""
|| url.password().is_some()
|| url.path() != "/"
|| url.query().is_some()
|| url.fragment().is_some()
{
throw!(UrlWasNotOrigin(self.name.clone()));
}
// Truncate value to just origin
Ok(o.ascii_serialization().into())
} else {
Ok(url.to_string().into())
}
} else {
throw!(NotUrl(self.name.clone()));
}
} else {
throw!(WrongFieldType(self.name.clone(), self.ty.kind()));
}
}
FieldType::Real {
min,
max,
if_out_of_bounds,
..
} => {
if let JsonValue::Number(n) = v {
let v = n
.as_f64()
.ok_or_else(|| WrongFieldType(self.name.clone(), self.ty.kind()))?;
self.validate_num(v, *min, *max, *if_out_of_bounds)
.map(JsonValue::from)
} else {
throw!(WrongFieldType(self.name.clone(), self.ty.kind()));
}
}
FieldType::Integer {
min,
max,
if_out_of_bounds,
..
} => {
if let JsonValue::Number(n) = v {
let v = n
.as_i64()
.ok_or_else(|| WrongFieldType(self.name.clone(), self.ty.kind()))?;
self.validate_num(v, *min, *max, *if_out_of_bounds)
.map(JsonValue::from)
} else {
throw!(WrongFieldType(self.name.clone(), self.ty.kind()));
}
}
FieldType::Timestamp { .. } => {
// We don't really have enough info to validate `semantic` here
// (See also comments in `native_to_local` in `storage::info`),
// so we don't check it.
if let JsonValue::Number(n) = v {
let v = n
.as_i64()
.ok_or_else(|| WrongFieldType(self.name.clone(), self.ty.kind()))?;
if v <= EARLIEST_SANE_TIME {
throw!(OutOfBounds(self.name.clone()));
}
Ok(v.into())
} else {
throw!(WrongFieldType(self.name.clone(), self.ty.kind()));
}
}
}
}
fn validate_record_set(&self, id_key: &str, v: JsonValue) -> Result<JsonValue> {
use InvalidRecord::*;
if let JsonValue::Array(a) = v {
let mut seen: HashSet<&str> = HashSet::with_capacity(a.len());
for item in &a {
if let JsonValue::Object(o) = item {
if let Some(JsonValue::String(k)) = o.get(id_key) {
if seen.contains(k.as_str()) {
log::trace!(
"Record set entry {:?} has id_key {:?} more than once",
item,
id_key
);
throw!(InvalidRecordSet(self.name.clone()));
}
seen.insert(k.as_str());
} else {
log::trace!(
"Invalid id for id_key {:?} in record_set entry {:?}",
id_key,
item,
);
throw!(InvalidRecordSet(self.name.clone()));
}
} else {
log::trace!("Record set entry {:?} is not an object", item);
throw!(InvalidRecordSet(self.name.clone()));
}
}
Ok(JsonValue::Array(a))
} else {
throw!(WrongFieldType(self.name.clone(), self.ty.kind()));
}
}
fn validate_num<N: PartialOrd + Copy>(
&self,
val: N,
min: Option<N>,
max: Option<N>,
if_oob: IfOutOfBounds,
) -> Result<N> {
let mut vc = val;
if let Some(min) = min {
if vc < min {
vc = min;
}
}
if let Some(max) = max {
if vc > max {
vc = max;
}
}
if vc != val {
match if_oob {
IfOutOfBounds::Discard => {
throw!(crate::error::InvalidRecord::OutOfBounds(self.name.clone()))
}
IfOutOfBounds::Clamp => Ok(vc),
}
} else {
Ok(val)
}
}
pub fn timestamp_semantic(&self) -> Option<TimestampSemantic> {
match &self.ty {
FieldType::Timestamp { semantic, .. } => *semantic,
_ => None,
}
}
pub fn is_kind(&self, k: FieldKind) -> bool {
self.ty.is_kind(k)
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum FieldType {
Untyped {
merge: UntypedMerge,
default: Option<JsonValue>,
},
Text {
merge: TextMerge,
default: Option<String>,
},
Url {
merge: TextMerge,
is_origin: bool,
default: Option<Url>,
},
Real {
merge: NumberMerge,
min: Option<f64>,
max: Option<f64>,
if_out_of_bounds: IfOutOfBounds,
default: Option<f64>,
},
Integer {
merge: NumberMerge,
min: Option<i64>,
max: Option<i64>,
if_out_of_bounds: IfOutOfBounds,
default: Option<i64>,
},
Timestamp {
merge: TimestampMerge,
semantic: Option<TimestampSemantic>,
default: Option<TimestampDefault>,
},
Boolean {
merge: BooleanMerge,
default: Option<bool>,
},
OwnGuid {
auto: bool,
},
UntypedMap {
prefer_deletions: bool,
default: Option<JsonObject>,
},
RecordSet {
id_key: String,
prefer_deletions: bool,
default: Option<Vec<JsonObject>>,
},
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum TimestampSemantic {
CreatedAt,
UpdatedAt,
}
impl std::fmt::Display for TimestampSemantic {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TimestampSemantic::CreatedAt => f.write_str("created_at"),
TimestampSemantic::UpdatedAt => f.write_str("updated_at"),
}
}
}
impl TimestampSemantic {
pub fn required_merge(self) -> TimestampMerge {
match self {
TimestampSemantic::CreatedAt => TimestampMerge::TakeMin,
TimestampSemantic::UpdatedAt => TimestampMerge::TakeMax,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ChangePreference {
Missing,
Present,
}
// We handle serialization specially.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum TimestampDefault {
Value(i64),
Now,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum FieldKind {
Untyped,
Text,
Url,
Real,
Integer,
Timestamp,
Boolean,
OwnGuid,
UntypedMap,
RecordSet,
}
impl std::fmt::Display for FieldKind {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
FieldKind::Untyped => "untyped",
FieldKind::Text => "text",
FieldKind::Url => "url",
FieldKind::Real => "real",
FieldKind::Integer => "integer",
FieldKind::Timestamp => "timestamp",
FieldKind::Boolean => "boolean",
FieldKind::OwnGuid => "own_guid",
FieldKind::UntypedMap => "untyped_map",
FieldKind::RecordSet => "record_set",
})
}
}
impl FieldType {
pub fn kind(&self) -> FieldKind {
match self {
FieldType::Untyped { .. } => FieldKind::Untyped,
FieldType::Text { .. } => FieldKind::Text,
FieldType::Url { .. } => FieldKind::Url,
FieldType::Real { .. } => FieldKind::Real,
FieldType::Integer { .. } => FieldKind::Integer,
FieldType::Timestamp { .. } => FieldKind::Timestamp,
FieldType::Boolean { .. } => FieldKind::Boolean,
FieldType::OwnGuid { .. } => FieldKind::OwnGuid,
FieldType::UntypedMap { .. } => FieldKind::UntypedMap,
FieldType::RecordSet { .. } => FieldKind::RecordSet,
}
}
pub fn is_kind(&self, k: FieldKind) -> bool {
self.kind() == k
}
pub fn uses_untyped_merge(&self, um: UntypedMerge) -> bool {
match self {
// These branches must be separate since many of the `merge`s
// have diff. types, but they all impl PartialEq<UntypedMerge>.
FieldType::Untyped { merge, .. } => &um == merge,
FieldType::Text { merge, .. } | FieldType::Url { merge, .. } => &um == merge,
FieldType::Real { merge, .. } | FieldType::Integer { merge, .. } => &um == merge,
FieldType::Timestamp { merge, .. } => &um == merge,
FieldType::Boolean { merge, .. } => &um == merge,
// List these out so new additions need to update this.
FieldType::OwnGuid { .. }
| FieldType::UntypedMap { .. }
| FieldType::RecordSet { .. } => false,
}
}
pub fn get_default(&self) -> Option<JsonValue> {
match self {
FieldType::Untyped { default, .. } => default.clone(),
FieldType::Text { default, .. } => default.as_ref().map(|s| s.as_str().into()),
FieldType::Url { default, .. } => default.as_ref().map(|s| s.to_string().into()),
FieldType::Real { default, .. } => default.map(|s| s.into()),
FieldType::Integer { default, .. } => default.map(|s| s.into()),
FieldType::Timestamp {
default: Some(TimestampDefault::Now),
..
} => Some(crate::ms_time::MsTime::now().into()),
FieldType::Timestamp {
default: Some(TimestampDefault::Value(v)),
..
} => Some((*v).into()),
FieldType::Timestamp { default: None, .. } => None,
FieldType::Boolean { default, .. } => default.map(|s| s.into()),
FieldType::OwnGuid { .. } => None,
FieldType::UntypedMap { default, .. } => {
default.as_ref().map(|s| JsonValue::Object(s.clone()))
}
FieldType::RecordSet { default, .. } => default.as_ref().map(|s| {
JsonValue::Array(s.iter().map(|v| JsonValue::Object(v.clone())).collect())
}),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)]
pub enum IfOutOfBounds {
#[serde(rename = "clamp")]
Clamp,
#[serde(rename = "discard")]
Discard,
}

Просмотреть файл

@ -1,242 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use super::desc::*;
use super::json::ParsedMerge;
use super::merge_kinds::*;
use failure::Fail;
#[derive(Debug, Clone, Fail)]
pub enum FieldError {
#[fail(display = "Record field names must be ascii, nonempty, and contain [a-zA-Z0-9_$]")]
InvalidName,
#[fail(
display = "Merge strategy '{:?}' and type '{:?}' are not compatible.",
merge, ty
)]
IllegalMergeForType { ty: FieldKind, merge: ParsedMerge },
#[fail(display = "Composite fields may not specify a merge strategy")]
CompositeFieldMergeStrat,
#[fail(display = "Cannot find composite_root '{}'", _0)]
UnknownCompositeRoot(String),
#[fail(display = "Field of type '{}' may not be part of dedupe_on", _0)]
BadTypeInDedupeOn(FieldKind),
#[fail(display = "Invalid merge strategy for composite root: {}", _0)]
CompositeRootInvalidMergeStrat(ParsedMerge),
#[fail(display = "Fields of type '{}' may not specify a merge strategy", _0)]
TypeForbidsMergeStrat(FieldKind),
#[fail(display = "Fields of type '{}' may not be part of a composite", _0)]
TypeNotComposite(FieldKind),
#[fail(display = "\"deprecated\" and \"required\" may not both be true on a field")]
DeprecatedRequiredConflict,
#[fail(display = "Missing `if_out_of_bounds` on bounded number")]
NoBoundsCheckInfo,
#[fail(display = "Bounded number max/min are not finite, or 'max' value is less than 'min'.")]
BadNumBounds,
#[fail(display = "Default value for bounded number is not inside the bounds")]
BadNumDefault,
#[fail(display = "Composite roots may not have numeric clamping (discard is allowed)")]
NumberClampOnCompositeRoot,
#[fail(display = "A field's composite root cannot be part of a composite")]
CompositeRecursion,
#[fail(
display = "Invalid URL \"{}\" as default value of `url` field: {}",
_0, _1
)]
BadDefaultUrl(String, url::ParseError),
#[fail(
display = "is_origin URL field has default value of \"{}\", which isn't an origin",
_0
)]
BadDefaultOrigin(String),
#[fail(
display = "Semantic timestamp '{}' must use the '{}' merge strategy (got '{}').",
sem, want, got
)]
BadMergeForTimestampSemantic {
sem: TimestampSemantic,
want: TimestampMerge,
got: TimestampMerge,
},
// There are a few cases here, it doesn't feel worth it to merge them
// somehow.
#[fail(display = "record_set has illegal default value: {}", _0)]
BadRecordSetDefault(BadRecordSetDefaultKind),
#[fail(display = "merge strategy 'take_sum' forbids specifying a 'max' value")]
MergeTakeSumNoMax,
#[fail(
display = "Illegal default timestamp. Must be after the release of the first web browser"
)]
DefaultTimestampTooOld,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum BadRecordSetDefaultKind {
IdKeyMissing,
IdKeyDupe,
IdKeyInvalidType,
}
impl std::fmt::Display for BadRecordSetDefaultKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use BadRecordSetDefaultKind::*;
match self {
IdKeyMissing => f.write_str("contains an item without an id_key"),
IdKeyDupe => f.write_str("contains an item with a duplicate id_key"),
IdKeyInvalidType => f.write_str(
"contains an item with an id_key with an invalid type (must be a string)",
),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum SemverProp {
Version,
RequiredVersion,
}
impl std::fmt::Display for SemverProp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use SemverProp::*;
match self {
Version => f.write_str("version"),
RequiredVersion => f.write_str("required_version"),
}
}
}
#[derive(Debug, Fail)]
pub enum SchemaError {
#[fail(display = "Schema format error: {}", _0)]
FormatError(#[fail(cause)] serde_json::Error),
#[fail(display = "Cannot parse format_version: {}", _0)]
WrongFormatVersion(usize),
#[fail(
display = "Failed to parse semantic version string {:?} from property '{}': {}",
got, prop, err
)]
VersionParseFailed {
prop: SemverProp,
got: String,
#[fail(cause)]
err: semver::SemVerError,
},
#[fail(
display = "Failed to parse semantic version requirement {:?} from property '{}': {}",
got, prop, err
)]
VersionReqParseFailed {
prop: SemverProp,
got: String,
#[fail(cause)]
err: semver::ReqParseError,
},
#[fail(
display = "Schema required_version '{}' and version '{}' are not compatible.",
_0, _1
)]
LocalRequiredVersionNotCompatible(semver::VersionReq, semver::Version),
#[fail(
display = "Remerge feature {} is required but not supported locally",
_0
)]
MissingRemergeFeature(String),
#[fail(
display = "Remerge feature {} is required but not listed in remerge_features_used",
_0
)]
UndeclaredFeatureRequired(String),
#[fail(display = "Duplicate field: {}", _0)]
DuplicateField(String),
#[fail(display = "Field '{}': {}", _0, _1)]
FieldError(String, #[fail(cause)] FieldError),
#[fail(
display = "Composite root '{}' has an illegal type / merge combination",
_0
)]
IllegalCompositeRoot(String),
#[fail(
display = "A record with a non-empty dedupe_on list may not use the `duplicate` merge strategy"
)]
DedupeOnWithDuplicateField,
#[fail(display = "Unknown field in dedupe_on: {}", _0)]
UnknownDedupeOnField(String),
#[fail(display = "Deprecated field in dedupe_on: {}", _0)]
DeprecatedFieldDedupeOn(String),
#[fail(display = "Only part of a composite field appears in dedupe_on")]
PartialCompositeDedupeOn,
#[fail(display = "Legacy collections must have an `OwnId` field.")]
LegacyMissingId,
#[fail(display = "Only one field with the 'updated_at' timestamp semantic is allowed")]
MultipleUpdateAt,
#[fail(display = "Only one 'own_guid' field is allowd")]
MultipleOwnGuid,
#[fail(display = "Remote schema missing 'remerge_features_used'")]
RemoteMissingRemergeFeaturesUsed,
#[fail(
display = "'required_remerge_version' specified locally (as \"{}\"), but it's greater than our actual version \"{}\"",
_0, _1
)]
LocalRemergeVersionFailsLocalRequired(semver::VersionReq, semver::Version),
#[fail(display = "'remerge_version' can not be specified locally.")]
LocalRemergeVersionSpecified,
#[fail(
display = "Locked out of remote schema since our remerge_version \"{}\" is not compatible with requirement \"{}\"",
version, req
)]
LocalRemergeVersionFailsRemoteRequired {
version: semver::Version,
req: semver::VersionReq,
},
#[fail(
display = "Remote remerge_version \"{}\" is not compatible with its own listed requirement \"{}\"",
version, req
)]
RemoteRemergeVersionFailsOwnRequirement {
version: semver::Version,
req: semver::VersionReq,
},
}
pub type SchemaResult<T> = std::result::Result<T, SchemaError>;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,240 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum UntypedMerge {
TakeNewest,
PreferRemote,
Duplicate,
CompositeMember,
}
impl std::fmt::Display for UntypedMerge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
UntypedMerge::TakeNewest => f.write_str("take_newest"),
UntypedMerge::PreferRemote => f.write_str("prefer_remote"),
UntypedMerge::Duplicate => f.write_str("duplicate"),
UntypedMerge::CompositeMember => f.write_str("<composite member>"),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum TextMerge {
Untyped(UntypedMerge),
}
impl std::fmt::Display for TextMerge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TextMerge::Untyped(u) => write!(f, "{}", u),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum TimestampMerge {
Untyped(UntypedMerge),
TakeMin,
TakeMax,
}
impl std::fmt::Display for TimestampMerge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TimestampMerge::Untyped(u) => write!(f, "{}", u),
TimestampMerge::TakeMin => f.write_str("take_min"),
TimestampMerge::TakeMax => f.write_str("take_max"),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum NumberMerge {
Untyped(UntypedMerge),
TakeMin,
TakeMax,
TakeSum,
}
impl std::fmt::Display for NumberMerge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
NumberMerge::Untyped(u) => write!(f, "{}", u),
NumberMerge::TakeMin => f.write_str("take_min"),
NumberMerge::TakeMax => f.write_str("take_max"),
NumberMerge::TakeSum => f.write_str("take_sum"),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum BooleanMerge {
Untyped(UntypedMerge),
PreferFalse,
PreferTrue,
}
impl std::fmt::Display for BooleanMerge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BooleanMerge::Untyped(u) => write!(f, "{}", u),
BooleanMerge::PreferFalse => f.write_str("prefer_false"),
BooleanMerge::PreferTrue => f.write_str("prefer_true"),
}
}
}
// macro to remove boilerplate
macro_rules! merge_boilerplate {
// base case.
(@type [$MergeT:ident]) => {
};
// @common_methods: implement an common_methods method returning Option<UntypedMerge>
(@type [$MergeT:ident] @common_methods $($tt:tt)*) => {
impl $MergeT {
pub fn as_untyped(&self) -> Option<UntypedMerge> {
#[allow(unreachable_patterns)]
match self {
$MergeT::Untyped(u) => Some(*u),
_ => None
}
}
pub fn is_composite_member(&self) -> bool {
self.as_untyped() == Some(UntypedMerge::CompositeMember)
}
}
merge_boilerplate!(@type [$MergeT] $($tt)*);
};
// @from_untyped: impl From<Untyped> for $MergeT
(@type [$MergeT:ident] @from_untyped $($tt:tt)+) => {
impl From<UntypedMerge> for $MergeT {
#[inline]
fn from(u: UntypedMerge) -> Self {
$MergeT::Untyped(u)
}
}
merge_boilerplate!(@type [$MergeT] $($tt)+);
};
// @compare_untyped : implement PartialEq<UntypedMerge> automatically.
(@type [$MergeT:ident] @compare_untyped $($tt:tt)*) => {
impl PartialEq<UntypedMerge> for $MergeT {
#[inline]
fn eq(&self, o: &UntypedMerge) -> bool {
#[allow(unreachable_patterns)]
match self {
$MergeT::Untyped(u) => u == o,
_ => false,
}
}
}
impl PartialEq<$MergeT> for UntypedMerge {
#[inline]
fn eq(&self, o: &$MergeT) -> bool {
o == self
}
}
merge_boilerplate!(@type [$MergeT] $($tt)*);
};
// @compare_via_untyped [$T0, ...], implement PartialEq<$T0> for $MergeT, assuming
// that $T0 and $MergeT only overlap in UntypedMerge impls.
(@type [$MergeT:ident] @compare_via_untyped [$($T0:ident),* $(,)?] $($tt:tt)*) => {
$(
impl PartialEq<$T0> for $MergeT {
fn eq(&self, o: &$T0) -> bool {
#[allow(unreachable_patterns)]
match (self, o) {
($MergeT::Untyped(self_u), $T0::Untyped(t0_u)) => self_u == t0_u,
_ => false
}
}
}
impl PartialEq<$MergeT> for $T0 {
fn eq(&self, o: &$MergeT) -> bool {
PartialEq::eq(o, self)
}
}
)*
merge_boilerplate!(
@type [$MergeT]
$($tt)*
);
};
// @compare_with [SomeTy { Enums, Vals, That, Are, The, Same }]
(@type [$MergeT:ident] @compare_with [$T0:ident { $($Variant:ident),+ $(,)? }] $($tt:tt)*) => {
impl PartialEq<$T0> for $MergeT {
#[inline]
fn eq(&self, o: &$T0) -> bool {
#[allow(unreachable_patterns)]
match (self, o) {
($MergeT::Untyped(self_u), $T0::Untyped(t0_u)) => self_u == t0_u,
$(($MergeT::$Variant, $T0::$Variant) => true,)+
_ => false
}
}
}
impl PartialEq<$MergeT> for $T0 {
#[inline]
fn eq(&self, o: &$MergeT) -> bool {
o == self
}
}
merge_boilerplate!(@type [$MergeT] $($tt)*);
};
// @from [SomeEnum { Variants, That, Are, The, Same }]
(@type [$MergeT:ident] @from [$T0:ident { $($Variant:ident),+ $(,)? }] $($tt:tt)*) => {
impl From<$T0> for $MergeT {
fn from(t: TimestampMerge) -> Self {
match t {
$T0::Untyped(u) => $MergeT::Untyped(u),
$($T0::$Variant => $MergeT::$Variant,)+
}
}
}
merge_boilerplate!(@type [$MergeT] $($tt)*);
}
}
merge_boilerplate!(
@type [BooleanMerge]
@from_untyped
@common_methods
@compare_untyped
@compare_via_untyped [NumberMerge, TextMerge, TimestampMerge]
);
merge_boilerplate!(
@type [TextMerge]
@from_untyped
@common_methods
@compare_untyped
@compare_via_untyped [NumberMerge, TimestampMerge]
);
merge_boilerplate!(
@type [NumberMerge]
@from_untyped
@common_methods
@compare_untyped
@compare_via_untyped []
@compare_with [TimestampMerge { TakeMax, TakeMin }]
@from [TimestampMerge { TakeMax, TakeMin }]
);
merge_boilerplate!(
@type [TimestampMerge]
@from_untyped
@common_methods
@compare_untyped
);

Просмотреть файл

@ -1,14 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
pub mod desc;
pub mod error;
pub mod json;
pub mod merge_kinds;
pub use desc::*;
pub use error::SchemaError;
pub use json::parse_from_string;
pub use merge_kinds::*;

Просмотреть файл

@ -1,116 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! This module is concerned mainly with initializing the schema and metadata
//! tables in the database. Specifically it has to handle the following cases
//!
//! ## First time initialization
//!
//! - Must insert the provided native schema into schemas table
//! - Must populate the metadata keys with their initial values. Specifically:
//! - remerge/collection-name
//! - remerge/local-schema-version
//! - remerge/native-schema-version
//! - remerge/client-id
//! - remerge/change-counter
use super::{meta, SchemaBundle};
use crate::error::*;
use crate::Guid;
use rusqlite::Connection;
use std::sync::Arc;
pub(super) fn load_or_bootstrap(
db: &Connection,
native: super::NativeSchemaAndText<'_>,
) -> Result<(SchemaBundle, Guid)> {
if let Some(name) = meta::try_get::<String>(db, meta::COLLECTION_NAME)? {
let native = native.parsed;
if name != native.name {
throw!(ErrorKind::SchemaNameMatchError(native.name.clone(), name));
}
let local_ver: String = meta::get(db, meta::LOCAL_SCHEMA_VERSION)?;
let native_ver: String = meta::get(db, meta::NATIVE_SCHEMA_VERSION)?;
let client_id: sync_guid::Guid = meta::get(db, meta::OWN_CLIENT_ID)?;
if native_ver != native.version.to_string() {
// XXX migrate existing records here!
let native_ver = semver::Version::parse(&*native_ver)
.expect("previously-written version is no longer semver");
if native.version < native_ver {
throw!(ErrorKind::SchemaVersionWentBackwards(
native.version.to_string(),
native_ver.to_string()
));
}
meta::put(db, meta::NATIVE_SCHEMA_VERSION, &native.version.to_string())?;
} else {
let previous_native: String = db.query_row(
"SELECT schema_text FROM remerge_schemas WHERE version = ?",
rusqlite::params![native_ver],
|r| r.get(0),
)?;
let previous_native = crate::schema::parse_from_string(&*previous_native, false)?;
if *native != previous_native {
throw!(ErrorKind::SchemaChangedWithoutVersionBump(
native.version.to_string()
));
}
}
let local_schema: String = db.query_row(
"SELECT schema_text FROM remerge_schemas WHERE version = ?",
rusqlite::params![local_ver],
|r| r.get(0),
)?;
// XXX need to think about what to do if this fails! More generally, is
// it sane to run validation on schemas already in the DB? If the answer
// is yes, we should probably have more tests to ensure we never begin
// rejecting a schema we previously considered valid!
let parsed = crate::schema::parse_from_string(&local_schema, false)?;
Ok((
SchemaBundle {
local: Arc::new(parsed),
native,
collection_name: name,
},
client_id,
))
} else {
bootstrap(db, native)
}
}
pub(super) fn bootstrap(
db: &Connection,
native: super::NativeSchemaAndText<'_>,
) -> Result<(SchemaBundle, Guid)> {
let guid = sync_guid::Guid::random();
meta::put(db, meta::OWN_CLIENT_ID, &guid)?;
let sql = "
INSERT INTO remerge_schemas (is_legacy, version, required_version, schema_text)
VALUES (:legacy, :version, :req_version, :text)
";
let ver_str = native.parsed.version.to_string();
db.execute_named(
sql,
rusqlite::named_params! {
":legacy": native.parsed.legacy,
":version": ver_str,
":req_version": native.parsed.required_version.to_string(),
":text": native.source,
},
)?;
meta::put(db, meta::LOCAL_SCHEMA_VERSION, &ver_str)?;
meta::put(db, meta::NATIVE_SCHEMA_VERSION, &ver_str)?;
meta::put(db, meta::COLLECTION_NAME, &native.parsed.name)?;
meta::put(db, meta::CHANGE_COUNTER, &1)?;
Ok((
SchemaBundle {
collection_name: native.parsed.name.clone(),
native: native.parsed.clone(),
local: native.parsed.clone(),
},
guid,
))
}

Просмотреть файл

@ -1,364 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use super::{LocalRecord, NativeRecord};
use crate::error::*;
use crate::schema::{FieldKind, FieldType, RecordSchema};
use crate::untyped_map::{OnCollision, UntypedMap};
use crate::{Guid, JsonObject, JsonValue};
use std::sync::Arc;
/// Reason for converting a native record to a local record. Essentially a
/// typesafe `is_creation: bool`. Exists just to be passed to `native_to_local`,
/// see that function's docs for more info.
#[derive(Clone, Debug)]
pub enum ToLocalReason {
/// The record is going to be compared with existing records, and won't be
/// inserted into the DB. This means we're going to perform deduping
Comparison,
/// The record is being created
Creation,
/// The record is expected to exist, and is being updated.
Update {
/// Needed for UntypedMap, and eventually RecordSet.
prev: LocalRecord,
},
}
#[derive(Clone, Debug, PartialEq)]
pub struct SchemaBundle {
pub(crate) collection_name: String,
pub(crate) native: Arc<RecordSchema>,
pub(crate) local: Arc<RecordSchema>,
}
impl SchemaBundle {
/// Convert a native record to a local record.
///
/// The reason parameter influences the behavior of this function.
///
/// ### if `reason == Creation`
///
/// - We assume we need to generate an ID for this record, unless it has a
/// non-auto OwnGuid field (in which case we ensure this is present).
///
/// - If we have a timestamp field with the created_at semantic, we populate
/// that.
///
/// - Note that if you use this, you will likely need to check that no such
/// record is in the database already (this function can't).
///
/// ### if `reason == Update`
///
/// - We require the OwnGuid field to be populated.
/// - If we have a timestamp field with the updated_at semantic, it's
/// updated.
///
/// ### if `reason == Comparison`
///
/// - The OwnGuid field may optionally be populated. If it's not populated,
/// the resulting LocalRecord will not have a populated guid, and the
/// first member of the tuple will be an empty guid.
///
/// - The OwnGuid field is optional for comparison, since for deduping
/// you might want to validate an existing record. If this is the
/// case, the guid allows us to avoid saying an item is it's own dupe.
///
/// However, if validating prior to creation, you wouldn't provide a
/// guid (unless the own_guid is not auto)
///
/// - Semantic timestamps are not filled in (Hrm...)
pub fn native_to_local(
&self,
record: &NativeRecord,
reason: ToLocalReason,
) -> Result<(Guid, LocalRecord)> {
use crate::util::into_obj;
let mut id = Guid::random();
let mut fields = JsonObject::default();
// TODO: Maybe we should ensure this for all `Record`s?
let mut seen_guid = false;
let now_ms = crate::MsTime::now();
for field in &self.local.fields {
let native_field = &self.native.field(&field.name);
// XXX `local_name` in the schema should be renamed to something
// else. It's the property used to rename a field locally, while
// leaving it's canonical name the same. For example, this is
// (eventually) what logins wants to do for hostname/origin.
//
// All good so far, the confusion is that `local` generally refers
// to the on-disk type, and `native` refers to the values coming
// from the running local application (which will use `local_name`).
//
// Or maybe renaming `LocalRecord` and such would be enough.
let native_name = native_field.map(|n| n.local_name.as_str());
let is_guid = FieldKind::OwnGuid == field.ty.kind();
let is_umap = FieldKind::UntypedMap == field.ty.kind();
let ts_sema = field.timestamp_semantic();
if let Some(v) = native_name.and_then(|s| record.get(s)) {
let mut fixed = field.validate(v.clone())?;
if is_guid {
if let JsonValue::String(s) = &fixed {
id = Guid::from(s.as_str());
seen_guid = true;
} else {
unreachable!(
"Field::validate checks that OwnGuid fields have string values."
);
}
} else if let Some(semantic) = ts_sema {
use crate::schema::TimestampSemantic::*;
// Consider a format where in v1 there's a timestamp field
// which has no semantic, but the devs are manually making
// it behave like it had the `updated_at` semantic.
//
// Then, in v2, they did a closer read of the remerge docs
// (or something) and changed it to have the `updated_at`
// semantic.
//
// Erroring here would make this a breaking change. However,
// we don't really want to just support it blindly, so we
// check and see if the native schema version thinks this
// should be a timestamp field too, and if so we allow it.
//
// However, we use our own timestamps, so that they're
// consistent with timestamps we generate elsewhere.
if native_field.map_or(false, |nf| !nf.is_kind(FieldKind::Timestamp)) {
throw!(InvalidRecord::InvalidField(
native_name
.unwrap_or_else(|| field.name.as_str())
.to_owned(),
format!(
"A value was provided for timestamp with {:?} semantic",
semantic
),
));
}
match (&reason, semantic) {
(ToLocalReason::Creation, _) => {
// Initialize both CreatedAt/UpdatedAt to now_ms on creation
fixed = now_ms.into();
}
(ToLocalReason::Update { .. }, UpdatedAt) => {
fixed = now_ms.into();
}
// Keep these here explicitly to ensure this gets
// updated if the enums changed.
(ToLocalReason::Update { .. }, CreatedAt) => {}
(ToLocalReason::Comparison, _) => {
// XXX The result of this won't be "fully" valid...
// Shouldn't matter for deduping (what Comparison is
// currently used for), since you cant dedupe_on a
// semantic timestamp (validation checks this).
}
}
} else if is_umap {
// Untyped maps have to be converted into a `{ map:
// <payload>, tombs: [...] }` payload to handle storing
// tombstones.
//
// Additionally, for updates, we make sure (inside
// `update_local_from_native` and callees) that:
// - entries which are being removed in this update
// should get tombstones.
// - entries which are added which have tombstones
// have the tombstones removed.
match &reason {
ToLocalReason::Update { prev } => {
// Note that the equivalent field in `prev`'s schema
// might not exist (or it might exist but have been
// optional). For now, just
if let Some(prev) = prev.get(&field.name) {
fixed = UntypedMap::update_local_from_native(prev.clone(), fixed)?;
} else {
fixed = UntypedMap::from_native(into_obj(fixed)?).into_local_json();
}
}
ToLocalReason::Creation | ToLocalReason::Comparison => {
fixed = UntypedMap::from_native(into_obj(fixed)?).into_local_json();
}
}
}
fields.insert(field.name.clone(), fixed);
} else if let Some(def) = field.ty.get_default() {
if is_umap {
let def_obj = into_obj(def)?;
let val = UntypedMap::new(def_obj, vec![], OnCollision::KeepEntry);
fields.insert(field.name.clone(), val.into_local_json());
} else {
fields.insert(field.name.clone(), def);
}
} else if is_guid {
match &reason {
ToLocalReason::Update { .. } => {
throw!(InvalidRecord::InvalidField(
native_name
.unwrap_or_else(|| field.name.as_str())
.to_owned(),
"no value provided in ID field for update".into()
));
}
ToLocalReason::Creation => {
// Note: auto guids are handled below
fields.insert(field.name.clone(), id.to_string().into());
}
ToLocalReason::Comparison => {
// Records from Comparison are allowed to omit their
// guids. Motivation for this is in fn header comment
// (tldr: you'll want to omit it when running a
// validation/dupe check for a fresh record, and provide
// it for an existing record)
// Clear the `id`. This isn't great, but I doubt anybody
// will care about it. Using an Option<Guid> for the
// return where it will always be Some(id) for
// Creation/Update, and None for Comparison seems worse
// to me.
//
// eh. Comparison is only half-implemented for now
// anyway.
id = Guid::empty();
}
}
} else if field.required {
throw!(InvalidRecord::MissingRequiredField(
native_name
.unwrap_or_else(|| field.name.as_str())
.to_owned()
));
}
}
// XXX We should error if there are any fields in the native record we
// don't know about, instead of silently droppin them.
if !seen_guid && matches::matches!(reason, ToLocalReason::Creation) {
self.complain_unless_auto_guid()?;
}
Ok((id, LocalRecord::new_unchecked(fields)))
}
pub fn local_to_native(&self, record: &LocalRecord) -> Result<NativeRecord> {
let mut fields = JsonObject::default();
// Note: we should probably report special telemetry for many of these
// errors, as they indicate (either a bug in remerge or in the provided
// schema)
for native_field in &self.native.fields {
// First try the record. Note that the `name` property isnt'
// supposed to change, barring removal or similar. (This is why
// `local_name` exists)
if let Some(value) = record.get(&native_field.name) {
let mut value: JsonValue = value.clone();
// If it's an UntypedMap, we need to replace the `{ map:
// {payload here}, tombs: ... }` structure with just the payload.
if native_field.ty.kind() == FieldKind::UntypedMap {
value = UntypedMap::from_local_json(value)?.into_native().into();
}
fields.insert(native_field.local_name.clone(), value);
continue;
} else if let Some(default) = native_field.ty.get_default() {
// Otherwise, we see if the field has a default value specified
// in the native schema.
fields.insert(native_field.local_name.clone(), default);
continue;
}
// If not, see if it has a default specified in the local schema.
// Even though we apply defaults when writing local records into the
// DB, this can happen if the local schema we wrote `record` with is
// an older version than our current local schema version.
if let Some(default) = self
.local
.field(&native_field.name)
.and_then(|lf| lf.ty.get_default())
{
// Make sure that that default is valid. If it's not, we
// ignore it (unless it's a required native field, in which
// case we complain).
if let Ok(fixed) = native_field.validate(default.clone()) {
if fixed == default {
fields.insert(native_field.local_name.clone(), default);
continue;
}
// If this is actually a problem (e.g. the field is
// required), we'll complain loudly below (this is likely a
// schema issue if the field is required).
log::error!(
"More recent schema has default record for field {:?}, but it required fixups according to the native schema!",
native_field.local_name,
);
} else {
// The local schema's default value for some field is
// invalid according to the native schema. This should be a
// breaking change if it ever happens, and means the schema
// has problems, so we report an error here (even if it's an
// optional field...)
throw!(ErrorKind::LocalToNativeError(format!(
"More recent schema has default record for field {:?}, \
but it was not valid according to the native schema",
native_field.local_name
)));
}
}
if !native_field.required {
// We didn't have it, but it's optional.
continue;
}
// Everything we tried failed, which means we have a bad record in
// our DB. This is probably caused by an incompatible schema update
// that didn't specify the right required version. :(
//
// In practice this can be fixed by pushing a updated schema with a
// default value / fixed default value for this, so it's unclear
// what to actually do here until we see what kinds of things cause
// it in the wild, if any.
throw!(ErrorKind::LocalToNativeError(format!(
"Local record is missing or has invalid required field {:?}",
native_field.local_name
)));
}
Ok(NativeRecord::new_unchecked(fields))
}
/// Called if the guid isn't provided, returns Err if it wasn't needed.
fn complain_unless_auto_guid(&self) -> Result<()> {
let mut required_own_guid_field = None;
for &schema in &[&*self.local, &*self.native] {
if let Some(idx) = schema.field_own_guid {
if let FieldType::OwnGuid { auto } = &schema.fields[idx].ty {
if *auto {
return Ok(());
}
required_own_guid_field = Some(schema.fields[idx].name.as_str());
} else {
// Validation ensures this.
panic!("bug: field_own_guid refers to non-OwnGuid field");
}
}
}
if let Some(name) = required_own_guid_field {
throw!(InvalidRecord::MissingRequiredField(name.to_string()));
}
Ok(())
}
pub fn collection_name(&self) -> &str {
&self.collection_name
}
pub fn native_schema(&self) -> &RecordSchema {
&self.native
}
pub fn local_schema(&self) -> &RecordSchema {
&self.local
}
}

Просмотреть файл

@ -1,435 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use super::{bundle::ToLocalReason, LocalRecord, NativeRecord, SchemaBundle, SyncStatus};
use crate::error::*;
use crate::ms_time::MsTime;
use crate::vclock::{Counter, VClock};
use crate::Guid;
use rusqlite::{named_params, Connection};
use sql_support::ConnExt;
use std::convert::TryFrom;
use std::sync::Mutex;
pub struct RemergeDb {
db: Connection,
info: SchemaBundle,
client_id: sync_guid::Guid,
}
lazy_static::lazy_static! {
// XXX: We should replace this with something like the PlacesApi path-based
// hashmap, but for now this is better than nothing.
static ref DB_INIT_MUTEX: Mutex<()> = Mutex::new(());
}
impl RemergeDb {
pub(crate) fn with_connection(
mut db: Connection,
native: super::NativeSchemaAndText<'_>,
) -> Result<Self> {
let _g = DB_INIT_MUTEX.lock().unwrap();
let pragmas = "
-- The value we use was taken from Desktop Firefox, and seems necessary to
-- help ensure good performance. The default value is 1024, which the SQLite
-- docs themselves say is too small and should be changed.
PRAGMA page_size = 32768;
-- Disable calling mlock/munlock for every malloc/free.
-- In practice this results in a massive speedup, especially
-- for insert-heavy workloads.
PRAGMA cipher_memory_security = false;
-- `temp_store = 2` is required on Android to force the DB to keep temp
-- files in memory, since on Android there's no tmp partition. See
-- https://github.com/mozilla/mentat/issues/505. Ideally we'd only
-- do this on Android, and/or allow caller to configure it.
-- (although see also bug 1313021, where Firefox enabled it for both
-- Android and 64bit desktop builds)
PRAGMA temp_store = 2;
-- We want foreign-key support.
PRAGMA foreign_keys = ON;
-- we unconditionally want write-ahead-logging mode
PRAGMA journal_mode=WAL;
-- How often to autocheckpoint (in units of pages).
-- 2048000 (our max desired WAL size) / 32768 (page size).
PRAGMA wal_autocheckpoint=62
";
db.execute_batch(pragmas)?;
let tx = db.transaction()?;
super::schema::init(&tx)?;
let (info, client_id) = super::bootstrap::load_or_bootstrap(&tx, native)?;
tx.commit()?;
Ok(RemergeDb {
db,
info,
client_id,
})
}
pub(crate) fn conn(&self) -> &rusqlite::Connection {
&self.db
}
pub fn exists(&self, id: &str) -> Result<bool> {
Ok(self.db.query_row_named(
"SELECT EXISTS(
SELECT 1 FROM rec_local
WHERE guid = :guid AND is_deleted = 0
UNION ALL
SELECT 1 FROM rec_mirror
WHERE guid = :guid AND is_overridden IS NOT 1
)",
named_params! { ":guid": id },
|row| row.get(0),
)?)
}
pub fn create(&self, native: &NativeRecord) -> Result<Guid> {
let (id, record) = self
.info
.native_to_local(&native, ToLocalReason::Creation)?;
let tx = self.db.unchecked_transaction()?;
// TODO: Search DB for dupes based on the value of the fields listed in dedupe_on.
let id_exists = self.exists(id.as_ref())?;
if id_exists {
throw!(InvalidRecord::IdNotUnique);
}
if self.dupe_exists(&record)? {
throw!(InvalidRecord::Duplicate);
}
let ctr = self.counter_bump()?;
let vclock = VClock::new(self.client_id(), ctr);
let now = MsTime::now();
self.db.execute_named(
"INSERT INTO rec_local (
guid,
remerge_schema_version,
record_data,
local_modified_ms,
is_deleted,
sync_status,
vector_clock,
last_writer_id
) VALUES (
:guid,
:schema_ver,
:record,
:now,
0,
:status,
:vclock,
:client_id
)",
named_params! {
":guid": id,
":schema_ver": self.info.local.version.to_string(),
":record": record,
":now": now,
":status": SyncStatus::New as u8,
":vclock": vclock,
":client_id": self.client_id,
},
)?;
tx.commit()?;
Ok(id)
}
fn counter_bump(&self) -> Result<Counter> {
use super::meta;
let mut ctr = meta::get::<i64>(&self.db, meta::CHANGE_COUNTER)?;
assert!(
ctr >= 0,
"Corrupt db? negative global change counter: {:?}",
ctr
);
ctr += 1;
meta::put(&self.db, meta::CHANGE_COUNTER, &ctr)?;
// Overflowing i64 takes around 9 quintillion (!!) writes, so the only
// way it can realistically happen is on db corruption.
//
// FIXME: We should be returning a specific error for DB corruption
// instead of panicing, and have a maintenance routine (a la places).
Ok(Counter::try_from(ctr).expect("Corrupt db? i64 overflow"))
}
fn get_vclock(&self, id: &str) -> Result<VClock> {
Ok(self.db.query_row_named(
"SELECT vector_clock FROM rec_local
WHERE guid = :guid AND is_deleted = 0
UNION ALL
SELECT vector_clock FROM rec_mirror
WHERE guid = :guid AND is_overridden IS NOT 1",
named_params! { ":guid": id },
|row| row.get(0),
)?)
}
pub fn delete_by_id(&self, id: &str) -> Result<bool> {
let tx = self.db.unchecked_transaction()?;
let exists = self.exists(id)?;
if !exists {
// Hrm, is there anything else we should do here? Logins goes
// through the whole process (which is tricker for us...)
return Ok(false);
}
let now_ms = MsTime::now();
let vclock = self.get_bumped_vclock(id)?;
// Locally, mark is_deleted and clear sensitive fields
self.db.execute_named(
"UPDATE rec_local
SET local_modified_ms = :now_ms,
sync_status = :changed,
is_deleted = 1,
record_data = '{}',
vector_clock = :vclock,
last_writer_id = :own_id
WHERE guid = :guid",
named_params! {
":now_ms": now_ms,
":changed": SyncStatus::Changed as u8,
":guid": id,
":vclock": vclock,
":own_id": self.client_id,
},
)?;
// Mark the mirror as overridden. XXX should we clear `record_data` here too?
self.db.execute_named(
"UPDATE rec_mirror SET is_overridden = 1 WHERE guid = :guid",
named_params! { ":guid": id },
)?;
// If we don't have a local record for this ID, but do have it in the
// mirror, insert tombstone.
self.db.execute_named(
"INSERT OR IGNORE INTO rec_local
(guid, local_modified_ms, is_deleted, sync_status, record_data, vector_clock, last_writer_id, remerge_schema_version)
SELECT guid, :now_ms, 1, :changed, '{}', :vclock, :own_id, :schema_ver
FROM rec_mirror
WHERE guid = :guid",
named_params! {
":now_ms": now_ms,
":guid": id,
":schema_ver": self.info.local.version.to_string(),
":vclock": vclock,
":changed": SyncStatus::Changed as u8,
})?;
tx.commit()?;
Ok(exists)
}
fn get_local_by_id(&self, id: &str) -> Result<Option<LocalRecord>> {
Ok(self.db.try_query_row(
"SELECT record_data FROM rec_local WHERE guid = :guid AND is_deleted = 0
UNION ALL
SELECT record_data FROM rec_mirror WHERE guid = :guid AND is_overridden = 0
LIMIT 1",
named_params! { ":guid": id },
|r| r.get(0),
true, // cache
)?)
}
pub fn get_by_id(&self, id: &str) -> Result<Option<NativeRecord>> {
self.get_local_by_id(id)?
.map(|v| self.info.local_to_native(&v))
.transpose()
}
pub fn get_all(&self) -> Result<Vec<NativeRecord>> {
let mut stmt = self.db.prepare_cached(
"SELECT record_data FROM rec_local WHERE is_deleted = 0
UNION ALL
SELECT record_data FROM rec_mirror WHERE is_overridden = 0",
)?;
let rows = stmt.query_and_then(rusqlite::NO_PARAMS, |row| -> Result<NativeRecord> {
let r: LocalRecord = row.get("record_data")?;
self.info.local_to_native(&r)
})?;
rows.collect::<Result<_>>()
}
fn ensure_local_overlay_exists(&self, guid: &str) -> Result<()> {
let already_have_local: bool = self.db.query_row_named(
"SELECT EXISTS(SELECT 1 FROM rec_local WHERE guid = :guid)",
named_params! { ":guid": guid },
|row| row.get(0),
)?;
if already_have_local {
return Ok(());
}
log::debug!("No overlay; cloning one for {:?}.", guid);
self.clone_mirror_to_overlay(guid)
}
// Note: unlike the version of this function in `logins`, we return Err if
// `guid` is invalid instead of expecting the caller to check
fn clone_mirror_to_overlay(&self, guid: &str) -> Result<()> {
let sql = "
INSERT OR IGNORE INTO rec_local
(guid, record_data, vector_clock, last_writer_id, local_modified_ms, is_deleted, sync_status)
SELECT
guid, record_data, vector_clock, last_writer_id, 0 as local_modified_ms, 0 AS is_deleted, 0 AS sync_status
FROM rec_mirror
WHERE guid = :guid
";
let changed = self
.db
.execute_named_cached(sql, named_params! { ":guid": guid })?;
if changed == 0 {
log::error!("Failed to create local overlay for GUID {:?}.", guid);
throw!(ErrorKind::NoSuchRecord(guid.to_owned()));
}
Ok(())
}
fn mark_mirror_overridden(&self, guid: &str) -> Result<()> {
self.db.execute_named_cached(
"UPDATE rec_mirror SET is_overridden = 1 WHERE guid = :guid",
named_params! { ":guid": guid },
)?;
Ok(())
}
/// Combines get_vclock with counter_bump, and produces a new VClock with the bumped counter.
fn get_bumped_vclock(&self, id: &str) -> Result<VClock> {
let vc = self.get_vclock(id)?;
let counter = self.counter_bump()?;
Ok(vc.apply(self.client_id.clone(), counter))
}
/// Returns NoSuchRecord if, well, there's no such record.
fn get_existing_record(&self, rec: &NativeRecord) -> Result<LocalRecord> {
use crate::{
schema::desc::{Field, FieldType},
JsonValue,
};
let native = self.info.native_schema();
let fidx = native
.field_own_guid
.expect("FIXME: own_guid should be explicitly mandatory");
let field = &native.fields[fidx];
assert!(
matches::matches!(field.ty, FieldType::OwnGuid { .. }),
"Validation/parsing bug -- field_own_guid must point to an own_guid"
);
// Just treat missing and null the same.
let val = rec.get(&field.local_name).unwrap_or(&JsonValue::Null);
let guid = Field::validate_guid(&field.local_name, val)?;
self.get_local_by_id(guid.as_str())?
.ok_or_else(|| ErrorKind::NoSuchRecord(guid.into()).into())
}
pub fn update_record(&self, record: &NativeRecord) -> Result<()> {
let tx = self.db.unchecked_transaction()?;
// fails with NoSuchRecord if the record doesn't exist.
// Potential optimization: we could skip this for schemas that don't use
// types which need `prev` (untyped_map, record_set, ...)
let prev = self.get_existing_record(&record)?;
let (guid, record) = self
.info
.native_to_local(record, ToLocalReason::Update { prev })?;
if self.dupe_exists(&record)? {
throw!(InvalidRecord::Duplicate);
}
// Note: These fail with NoSuchRecord if the record doesn't exist.
self.ensure_local_overlay_exists(guid.as_str())?;
self.mark_mirror_overridden(guid.as_str())?;
let now_ms = MsTime::now();
let vclock = self.get_bumped_vclock(&guid)?;
let sql = "
UPDATE rec_local
SET local_modified_ms = :now_millis,
record_data = :record,
vector_clock = :vclock,
last_writer_id = :own_id,
remerge_schema_version = :schema_ver,
sync_status = max(sync_status, :changed)
WHERE guid = :guid
";
let ct = self.db.execute_named(
&sql,
named_params! {
":guid": guid,
":changed": SyncStatus::Changed as u8,
":record": record,
":schema_ver": self.info.local.version.to_string(),
":now_millis": now_ms,
":own_id": self.client_id,
":vclock": vclock,
},
)?;
debug_assert_eq!(ct, 1);
tx.commit()?;
Ok(())
}
pub fn client_id(&self) -> Guid {
// Guid are essentially free unless the Guid ends up in the "large guid"
// path, which should never happen for remerge client ids, so it should
// be fine to always clone this.
self.client_id.clone()
}
pub fn bundle(&self) -> &SchemaBundle {
&self.info
}
fn dupe_exists(&self, record: &LocalRecord) -> Result<bool> {
let dedupe_field_indexes = &self.info.local.dedupe_on;
let mut dupe_exists = false;
// Return false if the schema contains no dedupe_on fields.
if dedupe_field_indexes.is_empty() {
return Ok(dupe_exists);
}
let db_records = self.get_all().unwrap_or_default();
// Return false if there are no records in the database.
if db_records.is_empty() {
return Ok(dupe_exists);
}
dupe_exists = db_records
.iter()
.filter(|db_record| {
let db_id = &db_record.as_obj()["id"];
let local_id = &record.as_obj()["id"];
//Filter out updates.
db_id != local_id
})
.any(|db_record| {
dedupe_field_indexes.iter().all(|dedupe_field_index| {
let dedupe_field = &self.info.local.fields[*dedupe_field_index];
let db_field_value = &db_record.as_obj()[&dedupe_field.local_name];
let local_field_value = &record.as_obj()[&dedupe_field.name];
db_field_value == local_field_value
})
});
Ok(dupe_exists)
}
}

Просмотреть файл

@ -1,51 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::error::Result;
use rusqlite::{
types::{FromSql, ToSql},
Connection,
};
use sql_support::ConnExt;
// For type safety
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd)]
pub(crate) struct MetaKey(pub &'static str);
pub(crate) const COLLECTION_NAME: MetaKey = MetaKey("remerge/collection-name");
pub(crate) const LOCAL_SCHEMA_VERSION: MetaKey = MetaKey("remerge/local-schema");
pub(crate) const NATIVE_SCHEMA_VERSION: MetaKey = MetaKey("remerge/native-schema");
pub(crate) const OWN_CLIENT_ID: MetaKey = MetaKey("remerge/client-id");
pub(crate) const CHANGE_COUNTER: MetaKey = MetaKey("remerge/change-counter");
// pub(crate) const LAST_SYNC_SERVER_MS: MetaKey = MetaKey("remerge/server-last-sync-ms");
pub(crate) fn put(db: &Connection, key: MetaKey, value: &dyn ToSql) -> Result<()> {
db.execute_named_cached(
"REPLACE INTO metadata (key, value) VALUES (:key, :value)",
&[(":key", &key.0), (":value", value)],
)?;
Ok(())
}
pub(crate) fn try_get<T: FromSql>(db: &Connection, key: MetaKey) -> Result<Option<T>> {
let res = db.try_query_one(
"SELECT value FROM metadata WHERE key = :key",
&[(":key", &key.0)],
true,
)?;
Ok(res)
}
pub(crate) fn get<T: FromSql>(db: &Connection, key: MetaKey) -> Result<T> {
let res = db.query_row_and_then(
"SELECT value FROM metadata WHERE key = ?",
rusqlite::params![key.0],
|row| row.get(0),
)?;
Ok(res)
}
// pub(crate) fn delete_meta(db: &PlacesDb, key: MetaKey) -> Result<()> {
// db.execute_named_cached("DELETE FROM moz_meta WHERE key = :key", &[(":key", &key.0)])?;
// Ok(())
// }

Просмотреть файл

@ -1,55 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
pub mod bootstrap;
mod bundle;
pub mod db;
mod meta;
pub mod records;
pub mod schema;
pub use bundle::SchemaBundle;
pub use records::{LocalRecord, NativeRecord};
use crate::schema::RecordSchema;
use std::sync::Arc;
/// Basically just input for initializing the database.
///
/// XXX Ideally this would just be Arc<RecordSchema>, but during bootstrapping
/// we need to insert the schema into the database, which requires that we have
/// the serialized form. Eventually we should (maybe?) allow turning a
/// RecordSchema back into a JSON (e.g. raw) schema. (We don't really want to
/// support serializing/deserializing a RecordSchema directly, since we already
/// have a stable serialization format for schemas, and don't need two).
///
/// Note: Create this with TryFrom, e.g. something like
/// `NativeSchemaAndText::try_from(some_str)` after bringing
/// `std::convert::TryFrom` into scope.
///
#[derive(Clone)]
pub struct NativeSchemaAndText<'a> {
pub parsed: Arc<RecordSchema>,
pub source: &'a str,
}
impl<'a> std::convert::TryFrom<&'a str> for NativeSchemaAndText<'a> {
type Error = crate::schema::SchemaError;
fn try_from(s: &'a str) -> std::result::Result<Self, Self::Error> {
let schema = crate::schema::parse_from_string(s, false)?;
Ok(Self {
parsed: Arc::new(schema),
source: s,
})
}
}
// This doesn't really belong here.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[repr(u8)]
pub enum SyncStatus {
Synced = 0,
Changed = 1,
New = 2,
}

Просмотреть файл

@ -1,211 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! This module provides wrappers around JsonValue that allow for better
//! documentation and type safety for the format a method (usually in `db.rs`)
//! is expected to take/return.
//!
//! XXX The names "Local" vs "Native" here (and around) is confusing, but beats
//! everything passing around `serde_json::Value`s directly, and it matches the
//! terms used in the RFC. I'm open to name suggestions, though.
use crate::{error::*, JsonObject, JsonValue};
use std::marker::PhantomData;
mod private {
/// Sealed trait to prevent code from outside from implementing RecordFormat
/// for anything other than the implementations here.
pub trait Sealed {}
impl Sealed for super::LocalFormat {}
impl Sealed for super::NativeFormat {}
}
/// Used to distinguish different categories of records.
///
/// Most of the bounds are just so that we don't have to manually implement
/// traits we could otherwise derive -- in practice we just use this in a
/// PhantomData.
pub trait RecordFormat:
private::Sealed + Copy + std::fmt::Debug + PartialEq + 'static + Sync + Send
{
}
/// Record format for records in the current local schema. This is the format
/// which we insert into the database, and it should always be newer or
/// equal to the native format.
#[derive(Debug, Clone, PartialEq, Copy, Eq, Ord, Hash, PartialOrd)]
pub struct LocalFormat;
/// A record in the native format understood by the local application using
/// remerge. Data that comes from the FFI, and that is returned over the FFI
/// should be in this format.
#[derive(Debug, Clone, PartialEq, Copy, Eq, Ord, Hash, PartialOrd)]
pub struct NativeFormat;
// Note: For sync we'll likely want a RemoteFormat/RemoteRecord too.
impl RecordFormat for LocalFormat {}
impl RecordFormat for NativeFormat {}
/// A [`Record`] in [`LocalFormat`].
pub type LocalRecord = Record<LocalFormat>;
/// A [`Record`] in [`NativeFormat`].
pub type NativeRecord = Record<NativeFormat>;
/// A wrapper around `serde_json::Value` which indicates what format the record
/// is in. Note that converting between formats cannot be done without schema
/// information, so this is a paper-thin wrapper.
///
/// # Which record format to use
///
/// - Data coming from the FFI, or being returned to the FFI is always in
/// [`NativeFormat`], so use NativeRecord.
///
/// - Data going into the database, or that came out of the database is in
/// [`LocalFormat`], so use LocalRecord.
///
/// - Data from remote servers will likely be a future `RemoteFormat`, and you'd
/// use [`RemoteRecord`].
///
/// Converting between a record in one format to another requires schema
/// information. This can generally done by methods on `SchemaBundle`.
#[repr(transparent)]
#[derive(Debug, Clone, PartialEq)]
pub struct Record<F: RecordFormat>(pub(crate) JsonObject, PhantomData<F>);
impl<F: RecordFormat> Record<F> {
/// Create a new record with the format `F` directly.
///
/// The name of this function contains `unchecked` as it's up to the caller
/// to ensure that the `record_json` is actually in the requested format.
/// See the [`Record`] docs for how to make this determination.
#[inline]
pub fn new_unchecked(record_json: JsonObject) -> Self {
Self(record_json, PhantomData)
}
/// If `record` is a JSON Object, returns `Ok(Self::new_unchecked(record))`,
/// otherwise, returns `Err(InvalidRecord::NotJsonObject)`
///
/// The name of this function contains `unchecked` as it's up to the caller
/// to ensure that the `record_json` is actually in the requested format.
/// See the [`Record`] docs for how to make this determination.
pub fn from_value_unchecked(record_json: JsonValue) -> Result<Self, InvalidRecord> {
if let JsonValue::Object(m) = record_json {
Ok(Self::new_unchecked(m))
} else {
Err(crate::error::InvalidRecord::NotJsonObject)
}
}
#[inline]
pub fn as_obj(&self) -> &JsonObject {
&self.0
}
#[inline]
pub fn into_obj(self) -> JsonObject {
self.0
}
#[inline]
pub fn into_val(self) -> JsonValue {
self.into_obj().into()
}
}
impl NativeRecord {
/// Parse a record from a str given to us over the FFI, returning an error
/// if it's obviously bad (not a json object).
pub fn from_native_str(s: &str) -> Result<Self> {
let record: JsonValue = serde_json::from_str(s)?;
if let JsonValue::Object(m) = record {
Ok(Self(m, PhantomData))
} else {
Err(crate::error::InvalidRecord::NotJsonObject.into())
}
}
}
impl<F: RecordFormat> std::ops::Deref for Record<F> {
type Target = JsonObject;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_obj()
}
}
impl<F: RecordFormat> AsRef<JsonObject> for Record<F> {
#[inline]
fn as_ref(&self) -> &JsonObject {
self.as_obj()
}
}
impl<F: RecordFormat> From<Record<F>> for JsonValue {
#[inline]
fn from(r: Record<F>) -> JsonValue {
r.into_val()
}
}
impl<F: RecordFormat> From<Record<F>> for JsonObject {
#[inline]
fn from(r: Record<F>) -> JsonObject {
r.into_obj()
}
}
impl<'a, F: RecordFormat> From<&'a Record<F>> for &'a JsonObject {
#[inline]
fn from(r: &'a Record<F>) -> &'a JsonObject {
&r.0
}
}
impl From<JsonObject> for NativeRecord {
#[inline]
fn from(o: JsonObject) -> NativeRecord {
NativeRecord::new_unchecked(o)
}
}
impl std::convert::TryFrom<JsonValue> for NativeRecord {
type Error = Error;
#[inline]
fn try_from(v: JsonValue) -> Result<NativeRecord, Self::Error> {
Ok(Self::from_value_unchecked(v)?)
}
}
impl<F: RecordFormat> std::fmt::Display for Record<F> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut writer = crate::util::FormatWriter(f);
serde_json::to_writer(&mut writer, &self.0).map_err(|_| std::fmt::Error)
}
}
// Separated because we're going to glob import rusqlite::types::*, since we
// need nearly all of them.
mod sql_impls {
use super::LocalRecord;
use rusqlite::{types::*, Result};
impl ToSql for LocalRecord {
fn to_sql(&self) -> Result<ToSqlOutput<'_>> {
Ok(ToSqlOutput::from(self.to_string()))
}
}
impl FromSql for LocalRecord {
fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
match value {
ValueRef::Text(s) => serde_json::from_slice(s),
ValueRef::Blob(b) => serde_json::from_slice(b),
_ => return Err(FromSqlError::InvalidType),
}
.map(LocalRecord::new_unchecked)
.map_err(|err| FromSqlError::Other(err.into()))
}
}
}

Просмотреть файл

@ -1,45 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
const VERSION: i64 = 1;
use crate::error::Result;
use rusqlite::Connection;
use sql_support::ConnExt;
pub fn init(db: &Connection) -> Result<()> {
let user_version = db.query_one::<i64>("PRAGMA user_version")?;
if user_version == 0 {
create(db.conn())?;
} else if user_version != VERSION {
if user_version < VERSION {
upgrade(db.conn(), user_version)?;
} else {
log::warn!(
"Loaded future database schema version {} (we only understand version {}). \
Optimistically ",
user_version,
VERSION
)
}
}
Ok(())
}
fn upgrade(_: &Connection, from: i64) -> Result<()> {
log::debug!("Upgrading schema from {} to {}", from, VERSION);
if from == VERSION {
return Ok(());
}
unimplemented!("FIXME: migration");
}
pub fn create(db: &Connection) -> Result<()> {
log::debug!("Creating schema");
db.execute_batch(include_str!("../../sql/schema.sql"))?;
db.execute_batch(&format!(
"PRAGMA user_version = {version}",
version = VERSION
))?;
Ok(())
}

Просмотреть файл

@ -1,301 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! this module implements the untyped_map data type. This works as follows:
//!
//! - The "native" representation of an "untyped_map" value is just the
//! underlying map data, e.g. a `JsonObject`.
//! - To convert a native map to any other format of map, the tombstone list
//! must be provided (of course, it's initially empty), creating an
//! `UntypedMap`
//! - To convert a local map to a native map, the tombstone gets stored in the
//! database (if applicable) and then discarded. (Note: this happens in
//! storage/records.rs, with the other local -> native conversion code)
//!
//! See the RFC for the merge algorithm for these.
use crate::{error::*, JsonObject, JsonValue};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
pub type MapData = BTreeMap<String, JsonValue>;
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Default)]
pub struct UntypedMap {
map: MapData,
#[serde(default)]
tombs: BTreeSet<String>,
}
// duplication is annoying here, but keeps the api clean and isn't that much
// code
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum OnCollision {
/// Remove the map entry, keeping the tombstone (for prefer_deletion: true, or if
/// the tombstone is newer than the data in the map).
DeleteEntry,
/// Keep the map entry, remove the tombstone (for prefer_deletion: false, or
/// if the data in the map is newer than tombstones, e.g. when updating a
/// record with new data).
KeepEntry,
}
impl From<OnCollision> for CollisionHandling {
fn from(src: OnCollision) -> Self {
match src {
OnCollision::DeleteEntry => Self::DeleteEntry,
OnCollision::KeepEntry => Self::KeepEntry,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum CollisionHandling {
/// Emit a `UntypedMapTombstoneCollision` error (a decent default if the map
/// data and tombstones shouldn't collide).
Error,
/// See OnCollision::DeleteEntry
DeleteEntry,
/// See OnCollision::KeepEntry
KeepEntry,
}
impl UntypedMap {
pub fn empty() -> Self {
Self::default()
}
pub fn from_native(map: JsonObject) -> Self {
Self {
map: map.into_iter().collect(),
tombs: Default::default(),
}
}
fn new_impl(
mut map: MapData,
mut tombs: BTreeSet<String>,
on_tombstone_map_collision: CollisionHandling,
) -> Result<UntypedMap> {
// Should usually be empty so the cloning probably isn't worth fighting
let collided = map
.keys()
.filter(|k| tombs.contains(k.as_str()))
.cloned()
.collect::<Vec<_>>();
for key in collided {
match on_tombstone_map_collision {
CollisionHandling::Error => {
// this is definitely PII, so only log at trace level.
log::trace!("UntypedMap tombstone collision for key {:?}", key);
throw!(ErrorKind::UntypedMapTombstoneCollision)
}
CollisionHandling::DeleteEntry => {
map.remove(&key);
}
CollisionHandling::KeepEntry => {
tombs.remove(&key);
}
}
}
Ok(UntypedMap { map, tombs })
}
pub fn new<M, T>(map: M, tombstones: T, on_collision: OnCollision) -> Self
where
M: IntoIterator<Item = (String, JsonValue)>,
T: IntoIterator<Item = String>,
{
Self::new_impl(
map.into_iter().collect(),
tombstones.into_iter().collect(),
on_collision.into(),
)
.expect("bug: new_impl error when not passed CollisionHandling::Error")
}
pub fn try_new<M, T>(map: M, tombstones: T) -> Result<Self>
where
M: IntoIterator<Item = (String, JsonValue)>,
T: IntoIterator<Item = String>,
{
Self::new_impl(
map.into_iter().collect(),
tombstones.into_iter().collect(),
CollisionHandling::Error,
)
}
pub fn into_local_json(self) -> JsonValue {
serde_json::to_value(self).expect("UntypedMap can always be represented as json")
}
pub fn from_local_json(json: JsonValue) -> Result<Self> {
serde_json::from_value(json)
.map_err(Error::from)
.and_then(|Self { map, tombs }| {
// Ensure the entry is valid. TODO: eventually maintenance will
// need to handle this. Is this fine until then?
Self::try_new(map, tombs)
})
}
// Note: we don't use NativeRecord and such here since these are fields on
// records, and not actually records themselves. It's not really clear to me
// if/how we could use the RecordFormat markers or similar here either...
pub(crate) fn update_local_from_native(
old_local: JsonValue,
new_native: JsonValue,
) -> Result<JsonValue> {
Ok(Self::from_local_json(old_local)?
.with_native(crate::util::into_obj(new_native)?)
.into_local_json())
}
/// Create a new representing an update of our data to the data in to
/// `new_native`, updating `tombs` in the process. Specifically:
/// 1. entries in `tombs` which refer to keys in `new_native` are removed
/// 2. entries in `self.map` which are missing from `new_native` are added
/// to `tombs`.
#[must_use]
pub fn with_native(&self, new_native: JsonObject) -> Self {
let now_missing = self.map.keys().filter(|&k| !new_native.contains_key(k));
let tombs = now_missing
.chain(self.tombs.iter())
.filter(|t| !new_native.contains_key(t.as_str()))
.cloned()
.collect::<BTreeSet<String>>();
Self {
map: new_native.into_iter().collect(),
tombs,
}
}
pub fn into_native(self) -> JsonObject {
self.map.into_iter().collect()
}
#[inline]
pub fn map(&self) -> &MapData {
&self.map
}
#[inline]
pub fn tombstones(&self) -> &BTreeSet<String> {
&self.tombs
}
#[cfg(test)]
pub(crate) fn assert_tombstones<V>(&self, expect: V)
where
V: IntoIterator,
V::Item: Into<String>,
{
assert_eq!(
self.tombs,
expect
.into_iter()
.map(|s| s.into())
.collect::<BTreeSet<_>>(),
);
}
}
// Note: no derefmut, need to maintain `tombs` array.
impl std::ops::Deref for UntypedMap {
type Target = MapData;
#[inline]
fn deref(&self) -> &Self::Target {
self.map()
}
}
#[cfg(test)]
mod test {
use super::*;
use matches::matches;
#[test]
fn test_new_err() {
let v = UntypedMap::try_new(
json_obj!({
"foo": 3,
"bar": 4,
}),
vec!["a".to_string()],
)
.unwrap();
assert_eq!(v["foo"], 3);
assert_eq!(v["bar"], 4);
assert_eq!(v.len(), 2);
v.assert_tombstones(vec!["a"]);
let e = UntypedMap::try_new(
json_obj!({
"foo": 3,
"bar": 4,
}),
vec!["foo".to_string()],
)
.unwrap_err();
assert!(matches!(e.kind(), ErrorKind::UntypedMapTombstoneCollision));
}
#[test]
fn test_new_delete() {
let v = UntypedMap::new(
json_obj!({
"foo": 3,
"bar": 4,
}),
vec!["foo".to_string(), "quux".to_string()],
OnCollision::DeleteEntry,
);
assert!(!v.contains_key("foo"));
assert_eq!(v["bar"], 4);
assert_eq!(v.len(), 1);
v.assert_tombstones(vec!["foo", "quux"]);
}
#[test]
fn test_new_keep() {
let v = UntypedMap::new(
json_obj!({
"foo": 3,
"bar": 4,
}),
vec!["foo".to_string(), "quux".to_string()],
OnCollision::KeepEntry,
);
assert_eq!(v["foo"], 3);
assert_eq!(v["bar"], 4);
assert_eq!(v.len(), 2);
v.assert_tombstones(vec!["quux"]);
}
#[test]
fn test_update() {
let o = UntypedMap::try_new(
json_obj!({
"foo": 3,
"bar": 4,
}),
vec!["frob".to_string(), "quux".to_string()],
)
.unwrap();
let updated = o.with_native(json_obj!({
"foo": 5,
"quux": 10,
}));
assert_eq!(updated["foo"], 5);
assert_eq!(updated["quux"], 10);
assert_eq!(updated.len(), 2);
updated.assert_tombstones(vec!["bar", "frob"]);
}
}

Просмотреть файл

@ -1,99 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use crate::{JsonObject, JsonValue};
use std::io::{Error as IoError, ErrorKind as IoErrorKind, Result as IoResult, Write};
/// For use with `#[serde(skip_serializing_if = )]`
#[inline]
pub fn is_default<T: PartialEq + Default>(v: &T) -> bool {
*v == T::default()
}
/// Returns true if the byte `b` is a valid base64url byte.
#[inline]
#[rustfmt::skip]
pub fn is_base64url_byte(b: u8) -> bool {
// For some reason, if this is indented the way rustfmt wants,
// the next time this file is opened, VSCode deduces it *must*
// actually use 8 space indent, and converts the whole file on
// save. This is a VSCode bug, but is really annoying, so I'm
// just preventing rustfmt from reformatting this to avoid it.
(b'A' <= b && b <= b'Z') ||
(b'a' <= b && b <= b'z') ||
(b'0' <= b && b <= b'9') ||
b == b'-' ||
b == b'_'
}
/// Return with the provided Err(error) after invoking Into conversions
///
/// Essentially equivalent to explicitly writing `Err(e)?`, but logs the error,
/// and is more well-behaved from a type-checking perspective.
macro_rules! throw {
($e:expr $(,)?) => {{
let e = $e;
log::error!("Error: {}", e);
return Err(std::convert::Into::into(e));
}};
}
/// Like assert! but with `throw!` and not `panic!`.
///
/// Equivalent to explicitly writing `if !cond { throw!(e) }`, but logs what the
/// failed condition was (at warning levels).
macro_rules! ensure {
($cond:expr, $e:expr $(,)?) => {
if !($cond) {
log::warn!(concat!("Ensure ", stringify!($cond), " failed!"));
throw!($e)
}
};
}
/// Like `serde_json::json!` but produces a `JsonObject` (aka a
/// `serde_json::Map<String, serde_json::Value>`).
#[cfg(test)]
macro_rules! json_obj {
($($toks:tt)*) => {
match serde_json::json!($($toks)*) {
serde_json::Value::Object(o) => o,
_ => panic!("bad arg to json_obj!"),
}
};
}
pub(crate) fn into_obj(v: JsonValue) -> crate::Result<JsonObject, crate::InvalidRecord> {
match v {
JsonValue::Object(o) => Ok(o),
x => {
log::error!("Expected json object");
log::trace!(" Got: {:?}", x);
Err(crate::InvalidRecord::NotJsonObject)
}
}
}
/// Helper to allow passing a std::fmt::Formatter to a function needing
/// std::io::Write.
///
/// Mainly used to implement std::fmt::Display for the Record types without
/// requiring cloning them (which would be needed because serde_json::Value is
/// the one that impls Display, not serde_json::Map, aka JsonObject).
///
/// Alternatively we could have done `serde_json::to_string(self).unwrap()` or
/// something, but this this is cleaner.
pub struct FormatWriter<'a, 'b>(pub &'a mut std::fmt::Formatter<'b>);
impl<'a, 'b> Write for FormatWriter<'a, 'b> {
fn write(&mut self, buf: &[u8]) -> IoResult<usize> {
std::str::from_utf8(buf)
.ok()
.and_then(|s| self.0.write_str(s).ok())
.ok_or_else(|| IoError::new(IoErrorKind::Other, std::fmt::Error))?;
Ok(buf.len())
}
fn flush(&mut self) -> IoResult<()> {
Ok(())
}
}

Просмотреть файл

@ -1,265 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! Our implementation of vector clocks. See the remerge RFC's appendix for an
//! overview of how these work if you're unfamiliar.
use crate::Guid;
use rusqlite::types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef};
use std::collections::BTreeMap;
pub type Counter = u64;
/// A vector clock.
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct VClock(pub BTreeMap<Guid, Counter>);
/// Basically equivalent to Option<std::cmp::Ordering>, but more explicit about
/// what each value means. The variant documentation assumes this is generated by
/// something similar to `lhs.get_ordering(rhs)`.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum ClockOrdering {
/// The two clocks are equivalent.
Equivalent,
/// The `lhs` clock is an ancestor of the `rhs` clock.
Ancestor,
/// The `lhs` clock is a decendent of the `rhs` clock.
Descendent,
/// The two clocks are in conflict, and some other means of resolution must
/// be used.
Conflicting,
}
impl VClock {
pub fn new(own_client_id: Guid, counter: Counter) -> Self {
VClock(std::iter::once((own_client_id, counter)).collect())
}
/// Determine the ordering between `self` and `other`.
pub fn get_ordering(&self, other: &VClock) -> ClockOrdering {
let mut seen_gt = false;
let mut seen_lt = false;
let self_kvs = self.0.iter().map(|(id, &ctr)| (id, Some(ctr), None));
let other_kvs = other.0.iter().map(|(id, &ctr)| (id, None, Some(ctr)));
for (k, sv, ov) in self_kvs.chain(other_kvs) {
let sv = sv.unwrap_or_else(|| self.get(k));
let ov = ov.unwrap_or_else(|| other.get(k));
if sv > ov {
seen_gt = true;
}
if sv < ov {
seen_lt = true;
}
if seen_gt && seen_lt {
// No need to keep going once we've seen both.
return ClockOrdering::Conflicting;
}
}
match (seen_gt, seen_lt) {
(false, false) => ClockOrdering::Equivalent,
(true, false) => ClockOrdering::Descendent,
(false, true) => ClockOrdering::Ancestor,
(true, true) => ClockOrdering::Conflicting,
}
}
pub fn is_equivalent(&self, o: &VClock) -> bool {
self.get_ordering(o) == ClockOrdering::Equivalent
}
pub fn is_ancestor_of(&self, o: &VClock) -> bool {
self.get_ordering(o) == ClockOrdering::Ancestor
}
pub fn is_descendent_of(&self, o: &VClock) -> bool {
self.get_ordering(o) == ClockOrdering::Descendent
}
pub fn is_conflicting(&self, o: &VClock) -> bool {
self.get_ordering(o) == ClockOrdering::Conflicting
}
/// Get the clock's value for client_id, or 0 if it hasn't seen it.
pub fn get(&self, client_id: &Guid) -> Counter {
self.0.get(&client_id).copied().unwrap_or_default()
}
/// Add one to the clock's value for client_id
pub fn increment(&mut self, client_id: Guid) {
*self.0.entry(client_id).or_default() += 1
}
/// Assign `value` for client_id directly. Usually you want `apply` instead
pub fn set_directly(&mut self, client_id: Guid, value: Counter) {
if value == 0 {
self.0.remove(&client_id);
} else {
self.0.insert(client_id, value);
}
}
/// If `value` is greater than the current value for client_id store that
/// instead. Otherwise, do nothing.
///
/// Notes that this clock has seen the `value`th event of `client_id`.
#[must_use]
pub fn apply(mut self, client_id: Guid, value: Counter) -> Self {
if value == 0 {
// Avoid inserting 0 if we can help it.
return self;
}
let old_value = self.0.entry(client_id).or_default();
if *old_value < value {
*old_value = value;
}
self
}
#[must_use]
pub fn combine(self, o: &VClock) -> Self {
o.0.iter()
.fold(self, |accum, (id, ctr)| accum.apply(id.clone(), *ctr))
}
}
impl<'a> IntoIterator for &'a VClock {
type IntoIter = std::collections::btree_map::Iter<'a, Guid, Counter>;
type Item = (&'a Guid, &'a Counter);
#[inline]
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
impl PartialOrd for VClock {
fn partial_cmp(&self, other: &VClock) -> Option<std::cmp::Ordering> {
match self.get_ordering(other) {
ClockOrdering::Equivalent => Some(std::cmp::Ordering::Equal),
ClockOrdering::Ancestor => Some(std::cmp::Ordering::Less),
ClockOrdering::Descendent => Some(std::cmp::Ordering::Greater),
ClockOrdering::Conflicting => None,
}
}
}
impl ToSql for VClock {
fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
// serde_json::to_string only fails for types which can't be encoded as
// JSON (recursive graphs, maps with non-string keys, etc) so unwrap
// here is fine.
Ok(ToSqlOutput::from(serde_json::to_string(self).unwrap()))
}
}
impl FromSql for VClock {
fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
value.as_str().and_then(|s| {
serde_json::from_str(s).map_err(|e| {
log::error!("Failed to read vector clock from SQL");
log::debug!(" error: {:?}", e);
FromSqlError::Other(Box::new(e))
})
})
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_clock_basic() {
let id = Guid::new("000000000000");
let a = VClock::new(id.clone(), 1);
assert!(!a.is_descendent_of(&a));
assert!(!a.is_ancestor_of(&a));
assert!(!a.is_conflicting(&a));
assert!(a.is_equivalent(&a));
let b = VClock::new(id, 2);
assert!(!a.is_descendent_of(&b));
assert!(b.is_descendent_of(&a));
assert!(!b.is_ancestor_of(&a));
assert!(a.is_ancestor_of(&b));
assert!(!b.is_conflicting(&a));
assert!(!a.is_conflicting(&b));
assert!(!b.is_equivalent(&a));
assert!(!a.is_equivalent(&b));
assert!(a < b);
assert!(a <= b);
assert!(b > a);
assert!(b >= a);
assert_ne!(a, b);
// b completely subsumes a, so this just copies b.
let b2 = b.clone().combine(&a);
assert!(b.is_equivalent(&b2));
assert!(b2.is_equivalent(&b));
assert_eq!(b2, b);
}
#[test]
fn test_clock_multi_ids() {
let id0 = Guid::new("000000000000");
let id1 = Guid::new("111111111111");
let a = VClock::new(id0.clone(), 1).apply(id1, 2);
let b = VClock::new(id0, 1);
assert!(a.is_descendent_of(&b));
assert!(!b.is_descendent_of(&a));
assert!(b.is_ancestor_of(&a));
assert!(!a.is_ancestor_of(&b));
assert!(!b.is_conflicting(&a));
assert!(!a.is_conflicting(&b));
assert!(!b.is_equivalent(&a));
assert!(!a.is_equivalent(&b));
}
#[allow(clippy::neg_cmp_op_on_partial_ord)]
#[test]
fn test_clock_conflict() {
let id0 = Guid::new("000000000000");
let id1 = Guid::new("111111111111");
let a = VClock::new(id0.clone(), 1).apply(id1, 2);
let b = VClock::new(id0, 2);
assert!(b.is_conflicting(&a));
assert!(a.is_conflicting(&b));
assert!(!b.is_equivalent(&a));
assert!(!a.is_equivalent(&b));
// all of these should be false, per partialeq rules
assert!(!(a < b));
assert!(!(a <= b));
assert!(!(b > a));
assert!(!(b >= a));
assert_ne!(a, b);
}
#[test]
fn test_clock_combine() {
let id0 = Guid::new("000000000000");
let id1 = Guid::new("111111111111");
let a = VClock::new(id0.clone(), 1).apply(id1, 2);
let b = VClock::new(id0, 2);
let updated = b.clone().combine(&a);
assert!(updated.is_descendent_of(&a));
assert!(updated.is_descendent_of(&b));
assert!(a.is_ancestor_of(&updated));
assert!(b.is_ancestor_of(&updated));
assert!(!updated.is_conflicting(&a));
assert!(!updated.is_conflicting(&b));
assert!(!updated.is_equivalent(&a));
assert!(!updated.is_equivalent(&b));
}
}

Просмотреть файл

@ -1,497 +0,0 @@
[
{
"error": "WrongFormatVersion",
"remote": true,
"schema": {
"name": "test",
"format_version": 5
}
},
{
"error": "VersionParseFailed",
"schema": {
"name": "test",
"version": "garbage",
"fields": []
}
},
{
"error": "VersionReqParseFailed",
"schema": {
"name": "test",
"version": "1.0.0",
"required_version": "garbage",
"fields": []
}
},
{
"error": "LocalRequiredVersionNotCompatible",
"schema": {
"name": "test",
"version": "1.0.0",
"required_version": "2.0.0",
"fields": []
}
},
{
"error": "MissingRemergeFeature",
"schema": {
"name": "test",
"version": "1.0.0",
"fields": [],
"remerge_features_used": ["example"]
}
},
{
"error": "DuplicateField",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "text"
},
{
"name": "example",
"type": "text"
}
]
}
},
{
"error": "IllegalMergeForType",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "text",
"merge": "take_max"
}
]
}
},
{
"error": "IllegalMergeForType",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "timestamp",
"merge": "take_sum"
}
]
}
},
{
"error": "UnknownCompositeRoot",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "text",
"composite_root": "nothing"
}
]
}
},
{
"error": "TypeForbidsMergeStrat",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "own_guid",
"merge": "take_newest"
}
]
}
},
{
"error": "TypeNotComposite",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "own_guid",
"composite_root": "something"
},
{
"name": "something",
"type": "text"
}
]
}
},
{
"error": "DeprecatedRequiredConflict",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "text",
"deprecated": true,
"required": true
}
]
}
},
{
"error": "NoBoundsCheckInfo",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "integer",
"max": 1000,
"min": 0
}
]
}
},
{
"error": "BadNumBounds",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "integer",
"min": 1000,
"max": 0,
"if_out_of_bounds": "clamp"
}
]
}
},
{
"error": "BadNumDefault",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "example",
"type": "integer",
"min": 0,
"max": 1000,
"if_out_of_bounds": "clamp",
"default": 4000
}
]
}
},
{
"error": "NumberClampOnCompositeRoot",
"skip": true,
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "integer",
"min": 0,
"max": 1000,
"if_out_of_bounds": "clamp"
},
{
"name": "field2",
"type": "text",
"composite_root": "field1"
}
]
}
},
{
"error": "CompositeRecursion",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "integer",
"composite_root": "field2"
},
{
"name": "field2",
"type": "text",
"composite_root": "field1"
}
]
}
},
{
"error": "BadDefaultUrl",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "url",
"default": "not a url"
}
]
}
},
{
"error": "BadDefaultOrigin",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "url",
"is_origin": true,
"default": "http://www.example.com/not-a-origin"
}
]
}
},
{
"error": "MergeTakeSumNoMax",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "integer",
"merge": "take_sum",
"max": 30,
"min": 0,
"if_out_of_bounds": "clamp"
}
]
}
},
{
"error": "DefaultTimestampTooOld",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "timestamp",
"default": 0
}
]
}
},
{
"error": "DedupeOnWithDuplicateField",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "text"
},
{
"name": "field2",
"type": "text",
"merge": "duplicate"
}
],
"dedupe_on": ["field1"]
}
},
{
"error": "UnknownDedupeOnField",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "text"
}
],
"dedupe_on": [
"field2"
]
}
},
{
"error": "PartialCompositeDedupeOn",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "text"
},
{
"name": "field2",
"type": "text",
"composite_root": "field1"
}
],
"dedupe_on": [
"field2"
]
}
},
{
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "text",
"merge": "take_newest"
},
{
"name": "field2",
"type": "text",
"composite_root": "field1"
}
],
"dedupe_on": [
"field1",
"field2"
]
}
},
{
"error": "MultipleOwnGuid",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "own_guid"
},
{
"name": "field2",
"type": "own_guid"
}
]
}
},
{
"error": "MultipleUpdateAt",
"schema": {
"version": "1.0.0",
"name": "test",
"fields": [
{
"name": "field1",
"type": "timestamp",
"semantic": "updated_at"
},
{
"name": "field2",
"type": "timestamp",
"semantic": "updated_at"
}
]
}
},
{
"error": "LegacyMissingId",
"schema": {
"version": "1.0.0",
"name": "test",
"legacy": true,
"fields": [
{
"name": "stuff",
"type": "text"
}
]
}
},
{
"error": "DeprecatedFieldDedupeOn",
"schema": {
"version": "1.0.0",
"name": "test",
"legacy": true,
"fields": [
{
"name": "stuff",
"type": "text",
"deprecated": true
}
],
"dedupe_on": [
"stuff"
]
}
},
{
"schema": {
"version": "1.0.0",
"name": "logins-example",
"legacy": true,
"fields": [
{
"name": "id",
"type": "own_guid"
},
{
"name": "formSubmitUrl",
"type": "url",
"is_origin": true,
"local_name": "formActionOrigin"
},
{
"name": "httpRealm",
"type": "text",
"composite_root": "formSubmitUrl"
},
{
"name": "timesUsed",
"type": "integer",
"merge": "take_sum"
},
{
"name": "hostname",
"type": "url",
"is_origin": true,
"required": true
},
{
"name": "password",
"type": "text",
"required": true
},
{
"name": "username",
"type": "text"
}
],
"dedupe_on": [
"username",
"password",
"hostname"
]
}
}
]

Просмотреть файл

@ -1,43 +0,0 @@
use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
const SCHEMAS: &str = include_str!("./test_schemas.json");
#[derive(Serialize, Deserialize, Debug)]
struct SchemaTest {
#[serde(default)]
error: Option<String>,
#[serde(default)]
remote: bool,
#[serde(default)]
skip: bool,
schema: JsonValue,
}
#[test]
fn test_validation() {
let schemas: Vec<SchemaTest> = serde_json::from_str(SCHEMAS).unwrap();
for (i, v) in schemas.into_iter().enumerate() {
if v.skip {
eprintln!("Skipping schema number {}", i);
continue;
}
let schema_str = v.schema.to_string();
let res = remerge::schema::parse_from_string(&schema_str, v.remote);
if let Some(e) = v.error {
if let Err(val_err) = res {
let ve_str = format!("{:?}", val_err);
if !ve_str.contains(&e) {
panic!("Schema number {} should fail to validate with an error like {:?}, but instead failed with {:?}", i, e, ve_str);
}
} else {
panic!(
"Schema number {} should fail to validate with error {:?}, but passed",
i, e
);
}
} else if let Err(v) = res {
panic!("Schema number {} should pass, but failed with {:?}", i, v);
}
}
}

Просмотреть файл

@ -1,957 +0,0 @@
# Remerge: A syncable store for generic data types
The Remerge component provides a data storage mechanism. Using Remerge
has similar ergonomics to using the venerable
[JSONFile](https://mozilla.github.io/firefox-browser-architecture/text/0012-jsonfile.html)
mechanism, but with some ability to sync. It does not go as far as
being built with true CRDTs, but in exchange it is very easy and
natural to use for common use cases.
## Background/Goals/Etc
Synchronization and synchronized data types are table stakes for
client software in the modern era. Despite this, implementation of
syncing is technically challenging and often left as an
afterthought. Implementation of syncing features often relies on the
Sync team implementing a new component. This is unworkable.
The Mentat project was an attempt to solve this problem globally by
defining a storage mechanism that could sync natively and was easy to
use by client developers. Unfortunately, with the (indefinite)
"[pause](https://mail.mozilla.org/pipermail/firefox-dev/2018-September/006780.html)"
of Mentat, there's no obvious path forward for new synced data types.
After some thought, I've come up with a design that gets us some of the benefits
of Mentat with the following major benefits (compared to Mentat):
- Works on top of Sync 1.5, including interfacing with existing
collections. This means we don't need to throw away our existing
server-side mechanisms or do costly migrations.
- Doesn't change the sync data model substantially. Unlike Mentat,
which had a very ambitious data model and query language, Remerge is
designed around normal collections of records which are "dumb" JSON
blobs.
- Has storage which is straightforward to implement on top of SQLite.
In one of the AllHands, Lina had a presentation which defined three different
types of sync data stores.
1. Tree stores (bookmarks). The defining features of these stores are that:
- They represent a tree.
- They are considered corrupt if tree constraints are invalidated.
2. Log stores (history). The defining features of these stores are that:
- Typically too large to fit in memory.
- We expect to only sync a subset of the records in them.
3. Record stores (logins, addresses, credit cards, addons, etc)
This document describes a plan for syncing "Type 3" data stores in a generic
way, however extended to allow the following additional major features not
present in the current system:
1. Schema evolution with both forwards and backwards compatibility, with the
ability to break compatibility gradually, avoiding breaking sync with
versions that are still supported
2. Flexible merge logic.
3. Relatively rich data types, with the ability to add more data types over time
without breaking old clients (which IIUC was a problem mentat had).
### Main Ideas
The high level summary of the "big ideas" behind Remerge, in very rough order of
importance, are as follows:
1. By syncing the schema, we can allow older clients to understand enough
semantic data about the record format so that they're still able to sync and
perform correct three way merges, even if they don't actually understand how
to use the data that they're merging.
See the [schema format reference](schema-format.md) for details.
2. We can allow schema evolution and forwards compatibility by storing both the
schema version, and a cutoff version.
See the section on [version strings](version-strings.md) for details.
3. We can add new functionality / data type support to `remerge` without
breaking old clients by including information about the set of functionality
the schema requires in the schema itself.
See the section on [extending remerge](extending-remerge.md) for details.
4. We detect conflicts in a much more accurate manner by using vector clocks,
which allows us to avoid running merge logic in far more cases than we
currently do.
See the section on [clocks and timestamps](clocks.md) for details.
### Intentional Limitations
Remerge comes with the following limitations, which simplify both the API and
implementation, or allow for better compatibility with sync15 or ease of use.
#### 1. Large data sets are out of scope.
For collections that expect to store many thousands of records for typical use
cases, another solution is required (we should encourage those teams to reach
out to us instead).
#### 2. No support for inter-record references.
Note: Some form of support for this may be added in a future version.
#### 3. Limited read API
The initial version will broadly only support simple `get_by_id` and `get_all`
queries.
(Note: a `get_records_with_field_value(field_name, field_value, limit)` might
also be necessary in the initial version for form autofill)
While the set of functions for this is sure to expand, a query builder API (let
alone a query language) is very much out of scope.
Because the data sets are not expected to be large, it's expected that calling
`get_all` (or using a cached version) and performing the query on the in-memory
data set is sufficient, and for cases where it's not, we can add such via fixed
functions.
#### 4. Limited write API
The initial version will broadly only support `insert`, `update`, `delete`, and
`delete_all(include_remote: bool)` functions. All storage operations should be explicit.
(Note: a function to set an individual field / subset of fields of a record
might also be necessary in the initial version for form autofill)
That said, there are no transactions, triggers, or other advanced data
manipulation features.
#### 5. No strong eventual consistency (e.g. Why not a CRDT)
Given the use of some CRDT-esque elements in this design (vector clocks, for
example), a version that was based around CRDTs would gain the propety of
strong eventual consistency (that no matter the order events occur in, the
same outcome will be produced).
We don't do this. Most sync collections would be difficult or impossible to
express as CRDTs, and it would force us to either expose a complex API, severely
limit the types of data that can be synced, or introduce a large abstraction
boundary between the actual data representation that is stored and synced, and
what the public API is (which would impose a greate deal of implementation
complexity).
Instead, Remerge records are essentially JSON blobs (that conform to a given schema).
The case where this matters is when conflict occurs. If we detect a conflict,
then we fall back to the merge algorithm described later.
#### 6. No support for custom merge code.
Several reasons for this:
- It would be a pain (to put it lightly) to expose custom merge handlers over
the FFI.
- This would be easy to get wrong unless it was very simple and was operating on
primitives.
- I'm not convinced it's necessary, I strongly suspect most merge handling would
be implementing one of the strategies or data types described here.
- Changes to the custom merge handler could easily cause confusion in clients.
- Couldn't realistically do a proper 3 way merge for most cases without a lot of
effort.
Instead we'd either expose something like this as a new data type or merge
strategy and put it behind a remerge feature (see the Extending Remerge
section), or accept remerge isn't a good fit for them (for example, if someone
wanted a custom merge handler so that they could sync something of similar
structural complexity to bookmarks).
### High level overview
At a high level, we will store a versioned schema somewhere on the server. This
describes both the record format, and how to perform merges. The goal is to
allow clients who have not fully been updated to perform merges without data
loss.
Additionally, the schema contains two version numbers. Both the schema version,
and the minimum (native schema) version a client must have to sync. These two
allow us to migrate the schema progressively, only locking out clients that are
past a certain age, while letting users with devices which are only a single
version behind sync.
In order to keep compatibility while new versions of remerge add new features,
we use an explicit list of required feature flags. A secondary list of optional
feature flags is also present, but not necessary besides to sanity-check the
schema.
The schema is typed, and contains both type information for fields, which merge
strategies to use, constraints to place on those fields during schema updates.
Locally, multiple collections are stored in a single database, and every
collection has both the local copy and mirror available, so that a 3-way-merge
may be performed.
## The Schema
The reference material for the schema has been moved into its own file.
## Clocks and Timestamps
In Remerge, we use vector clocks for conflict detection (where possible --
we support syncing with legacy collections that may be written to by naive
clients, so for collections explicitly marked as legacy, we fall back to using
modification timestamps in the case that vector clocks are not present).
Additionally, there are some cases where we use modification timestamps for
conflict *resolution*, but only after the vector clock has detected that a
conflict truly has occurred (or is unavailable, as may be the case when
interacting with legacy collections), and the schema doesn't give us a better
guideline. This is done much in the same way as the current sync system, and is
discussed in more detail in the section on merging records.
Anyway, vector clocks may be unfamiliar, see [the appendix][vector_clock_overview]
for a brief explanation on how they work if this is the case.
### Our use of Vector clocks
One of the noted downsides of vector clocks is that they tend to grow without
bound. That is, there are cases when an item is added to a vector clock (a
client that has never modified a given record does so for the fist time), but no
cases where an item is removed from one.
This seems likely to be a non-problem for us in practice. A record will need to
be changed on a given client in order for that client to add it's ID to the
clock, and in practice the number of devices remains small for most users (let
alone the number of devices which have edited a specific record, which is what
matters here).
We can also implement pruning strategies if in practice this assumption turns
out to be wrong, and to support this the `last_sync` time is recorded in the new
`client_info` meta record for this collection.
#### Client IDs
As mentioned elsewhere, we'll generate a local client ID when the database is
created. This is used for the client ID for the vector clock, and in the
`client_info` record (see the section on new metadata). There's a problem here
in that users may copy ther profile around, or have it copied by their OS (on
e.g. iOS).
Ideally we'd get some notification when that happens. If we do, we generate a new
client ID and be fine. In practice, this seems tricky to ensure. It seems likely
that we will want to add logic to handle the case that a client notices that
some other client is using it's client ID.
If this happens, it needs to take the following steps:
1. Generate a new client id.
2. Find all records that it has changed since the last sync:
- Set their vector clock to be value of the vector clock stored in the
mirror + `{new_client_id: old_vector_clock[old_client_id]}`. That is, it
makes it appear that it had made the changes under the new client ID all
along.
JS pseudocode since rereading that explanation is confusing:
```js
newVectorClock = Object.assign({}, mirrorVectorClock, {
[newClientId]: oldVectorClock[oldClientId]
});
```
This should be rare, but seems worth handling since it would be bad if the
system were fundamentally unable to handle it
Vector clocks do not help us perform merges. They only detect the cases where
merges are required. The solutions to this typically involve both using a large
number of vector clocks, and careful data format design so that merges are
always deterministic and do not depend on timestamps (See the section on 'why
not CRDTs' for a bit more on this).
Instead, remerge handles conflict resolutio is a schema-driven three way merge
algorithm, based on comparison with the most recent known server value (the
"mirror" record), and uses record modification timestamps where in cases where
an age-based comparison is required. This is discussed further in the section on
merging.
It's worth noting that a newer variant of vector clocks, known as interval tree
clocks exists, which attempts to handle the finnicky nature of client IDs.
However, these are substantially more complex and harder to understand than
vector clocks, so I don't think they make sense for our use case.
#### Legacy collections
Collections that still must interact with legacy sync clients will neither
update nor round trip vector clocks. To handle this, in the case that these are
missing, we fall back to the current conflict resolution algorithm, based on
modification timestamps. This is unfortunate, but it will allow us to behave
progressively better as more clients are changed to use this system.
## Sync and Merging
Sync works as follows.
### Sync algorithm
This assumes there are no migrations, or only compatible migrations. The case of
incompatible migrations is described in the section on migrations.
Note that the following steps must be done transactionally (that is, if any step
fails completely, no changes should be recorded).
1. We download the schema and client_info from the server.
- If we're locked out of the schema due to it being incompatible with our
native schema version, then we stop here and return an error to the
caller.
2. If we need to upgrade the local version of the schema based on the version
from the server, we do that.
- This may change local versions of records, but we assume identical changes
have been made other clients.
- This is following the principal described before that the remote data
may not be on the latest version of the schema, but the remote data
combined with the schema is enough to bring things up to date.
3. All changed records are downloaded from the server.
4. All tombstones in the set of changed records are applied. For each tombstone:
1. If a local modification of the record exists:
- If deletions are preferred to local changes, then we continue as if a
local modification does not exist, deleting the record. (see step 4.2)
- If updates are preferred to deletions, then we will resolve in favor
of undeleting the tombstone.
2. If a local record exists, but without any modifications, then we forward
it and the mirror to the incoming tombstone.
3. Incoming tombstones that we have never seen are persisted (in order to
prevent them from being undeleted)
- It's possible we will want to consider expiring these eventually,
however such functionality is easy to add in a future update.
5. For each non-tombstone record:
1. If the record is not valid under the schema:
- If the most recent schema version is our native schema version, delete
the record. XXX this is dodgy since we don't know that our schema is
actually the latest, we probably want to just always skip for now
until we can do transactional schema / data updates.
- Otherwise, assume we're wrong and that someone else will clean it up,
and treat it as an unchanged server record (E.g. if we have local
changes, we'll overwrite them, otherwise we'll ignore)
2. If we have no local record with the same guid, then we search for
duplicates using the schema's dedupe_on.
- If there is a duplicate, then we mark that we need to change the
duplicate record's id to the new ID, and proceed as if we had a
local record with this ID (and a mirror with it, if the duplicate
had a mirror record).
3. If the incoming record is not in conflict with our local record (see
the section on vector clocks for how we determine conflict), then we
forward the mirror and local records to the incoming record.
4. If the incoming record *is* in conflict with our local record, then we
take one of the following steps:
1. If the local record is a tombstone and the collection is set to
prefer deletions, we resolve in favor of the local tombstone.
- Note that this still involves the conflict check -- tombstones
still have clocks.
2. If the local record is a tombstone and the collection is set to
prefer updates, we forward the local and mirror to the incoming
remote record.
3. If we don't have a mirror record, we need to perform a two way merge.
4. If we have a mirror record, and the incoming record is a descendent
of the mirror, then we can perform a three way merge correctly. This
is likely to be the most common case.
5. If we have a mirror record, and the incoming record is in conflict
with the mirror record, then we should either
1. For non-legacy collections, discard the record.
- This should never happen unless the client that wrote the record
has a bug. Otherwise, it would perform a merge with server before
uploading the record.
- Note that we wipe the mirror when node reassignment/password reset
occur, so that case doesn't apply here.
2. For legacy collections, which could have had the vector clock wiped
by a legacy client, assume this is what has happened, and do a three
way merge.
5. If we performed a two or three way merge, and the outcome of the merge
indicates that we must duplicate the record, then
1. We create a new local record with identical contents (but a new ID, and
fresh vector clock) to the current local version.
2. Then replacing the current local and mirror with the incoming record.
6. All new and locally changed records (including records that were not
resolved by forwarding the local and mirror versions to the incoming
record) must be uploaded as part of this sync.
- Before uploading the records, we validate them against the schema. If
validation fails, we record telemetry, and only sync the records which
pass validation.
- Records which aren't synced should be flagged as such (XXX: flesh this out)
- This telemetry should be monitored, as it indicates a bug.
7. Upon completing the upload, record the last sync time for this collection,
and commit any changes.
- This, unfortunately, ignores the case where the upload was split over
multiple batches, and the first batch succeeded, but the subsequent
batch failed. I'm not sure this is possible to handle sanely... The
fact that this system is not intended for collections which have so
many records that this is an issue helps, although in practice there
will be edge-case users who do have this many records.
- More worrying, this ignores the case where we succesfully commit
batches, but fail to commit the database transaction locally.
- In both cases, the records we uploaded will be strict descendents of
our local changes, however the strategy of detecting duplicate client
ids above assumes that if a server write occurs with our client id,
then that means our client ID needs to change. This is not ideal.
### Merging a record
This depends on the data type and merge strategy selected.
Before performing the two or three-way merges, we perform compatible schema
migrations (filling in missing default values, etc) on all input records. This
is not treated as changing the record in a material way (it does not effect its
value in it's vector clock, or it's sync_status).
#### Three way merge algorithm
The input to this is the schema, the local, (incoming) remote, and mirror records.
All records also have their modification timestamps.
1. The local record is compared to the mirror record to produce the "local delta":
- For each field in the record, we either have `None` (no change), or
`Some<new value>` indicating the local record has a change not present in the mirror.
2. The remote record is compared to the mirror record to produce the "incoming delta":
- For each field in the record, we either have `None` (no change), or
`Some<new value>` indicating the remote record has a change not present in the mirror.
- If the field is numeric and has bounds, perform bounds checks here.
- For `if_out_of_bounds: "clamp"`, clamp both the new value and the
mirror, and check them once again against each-other.
- For `if_out_of_bounds: "ignore"`, if the new value would bring
the field out of bounds, ignore it.
3. A "merged delta" is produced as followed:
1. For any change, if it was modified in only one of the two deltas and is
not part of a composite field, copy it into the merged delta.
2. For each composite field containing one or more changed non-deprecated
sub-fields:
- If the composite root has `prefer_remote`, then prefer
remote for all members of the composite.
- If the composite root has `take_newest`, then copy the
fields in the composite from whichever has been modified more recently
(as determined by modification timestamps) between the local and
incoming remote records.
- Note that we're copying from the records, not from the deltas.
- If the composite root has `take_min` or
`take_max` as it's merge strategy, then compare the values
of the composite root in the local and remote records numerically, and
copy all fields in the composite from the winner to the merged delta.
3. For remaining (non-composite) fields in both the "local delta" and
"incoming delta",
- For fields with the `take_newest`, `prefer_remote`, `duplicate`,
`take_min`, `take_max`, `take_sum`, `prefer_false`, `prefer_true`
strategy, follow the description listed in the schema format document
under "Merge Strategies".
- If the field is an `untyped_map`: performs a similar operation to the 3WM
where deltas are created, and each field is merged by `take_newest`.
- If `prefer_deletions` is true, then if one field is set to the
tombstone value, delete it.
- This is a little hand-wavey, but seems sufficiently specified,
esp. given that we aren't planning on implementing it immediately.
- If the field is a `record_set`:
- The set difference is computed between the local and mirror
- The set difference is computed between the incoming and mirror
- The new mirror is the `old_mirror UNION (local - mirror) UNION (remote - mirror)`
- In the case of conflict (a new or changed record present in both local or mirror),
the newer value is taken.
- Note: Deletions (from the set) are stored as explicit tombstones,
and preferred over modificatons iff. `prefer_deletions` is true.
- This is a little hand-wavey, but seems sufficiently specified,
esp. given that we aren't planning on implementing it immediately.
4. The "merged delta" is applied to the mirror record to produce the new local
record which will be uploaded to the server, and become the next mirror.
- This record will have a vector clock that is a descendent of the local,
mirror, and incoming remote clocks.
#### Two way merge algorithm
Two-way merges are not ideal. They are performed only if we don't have a mirror.
They're intended to do as little damage as possible
The input to this is the schema, the local, and incoming remote records.
1. A delta is computed between the local and incoming remote records, in
a similar manner to the three-way-merge case. This is known as "the delta".
2. A merged record ("the merged record") is created which starts with all values
from the local record not present in "the delta"
- It doesn't matter if you take from local or remote here, since
these are the fields that we just determined were equal.
3. For each composite field containing one or more non-deprecated subfields
present in "the delta": the merge is performed roughly the same as the 3WM
case.
- If the composite root has `prefer_remote`, then prefer
remote for all members of the composite.
- If the composite root has `take_newest`, then copy the fields
in the composite from whichever has been modified more recently (as
determined by modification timestamps) between the local and incoming
remote records into "the merged record".
- Note that we're copying from the records, not from the deltas.
- If the composite root has `take_min` or `take_max` as it's merge strategy,
then compare the values of the composite root in the local and remote
records numerically, and copy all fields in the composite from the winner
to "the merged record".
4. For remaining (non-composite) fields present in "the delta",
"incoming delta", Store the result of the following in "the merged record":
- For fields with the `take_newest`, `prefer_remote`, `duplicate`,
`take_min`, `take_max`, `take_sum`, `prefer_false`, `prefer_true`, and
`own_guid` strategy, follow the description listed in the schema format
document under "Merge Strategies", noting that this is the two way merge
case.
- For `untyped_map`: The maps are merged directly, breaking ties in favor of
the more recently modified.
- if `prefer_deletions` is true, any field represented by a tombstone in
either side is a tombstone in the output
- For `record_set`:
- The result is the set union of the two, with deletions preferred if
`prefer_deletion` is true
5. The "merged delta" is applied to the mirror record to produce the new mirror
record which will be uploaded to the server.
- This record will have a vector clock that is a descendent of the local,
mirror, and incoming remote clocks.
## New metadata records:
Some additional per-collection meta-information is required to make remerge
work.
They are stored at `meta-$collection_name/blah`. This doesn't allow for
transactional updates at the same time as the records, but in practice so long
as the schema is always uploaded prior to uploading the records, this should be
fine.
TODO: should this be `meta/$collection_name:blah` or similar?
#### `meta-$collection_name/schema`
This stores the most recent schema record. See [the schema format](schema-format.md)
reference for detailed information on its layout.
#### `meta-$collection_name/client_info`
Information about clients. An object with a single field currently, but possibly
more in the future (the library must make an effort to not drop fields it does
not understand when updating this record). The only field is `"clients"`, which
is an array of records, each with the following properties
- `"id"`: A unique ID generated on DB creation. Unrelated to any sort of current
client ID. Discussed in the section on counters/consistency. This is a string.
- It's illegal for this to be duplicated. If that happens, the `client_info`
record is considered corrupted and is discarded.
- `"native_schema_version"`: This clients "native" schema version for this collection.
- This is a semver version string.
- This is the latest version it was told to use locally, even if in practice
it uses a more up to date schema it fetched. This is effectively the
version that the code adding records understands
- `"local_schema_version"`: The latest version of the schema that this client
understands.
- This is also a semver version string.
- `"last_sync"`: The most recent X-Weave-Timestamp (as returned by e.g. the
fetch to `info/collections` we do before the sync or something). This is for
expiring records from this list.
## Migrations
There are several ways you might want to do to evolve your schema, but they boil
down to two types:
Changes that have a migration path (compatible migrations), and those that do
not (incompatible migrations).
Remerge attempts to make the 2nd rare enough that you don't have to ever do it.
Eventually it will probably support it better, but that's being deferred for
future work.
### A note on version strings/numbers
I've opted to go for semver strings in basically all cases where it's a string
that a developer would write. This is nice and familiar, and helps us out a lot
in the case of 'prerelease' versions, but there are several cases where it
doesn't make sense, or isn't enough:
- The version of remerge itself, where we may add features (for example, new
data types) to the library. We avoid using version numbers at all here, by
specifying the feature dependencies explicitly, which is more flexible anyway.
See the section on extending remerge for more details.
- Locking out old clients. Ideally, you could do migrations in slowly, in
multiple steps:
For example, if you want to make a new mandatory field, in version X you
start populating it, then once enough users are on X, you release a
version Y that makes it mandatory, but locks out users who have not yet
reached X.
Similarly for field removal, although our design handles that more
explicitly and generally with the `deprecated` flag on fields.
This is more or less the reason that we never change the version number
in meta/global. It immediately impacts every unreleased version.
For both of these, we distinguish between the `current` version, and the `required`.
This is how the two are related:
- The current version must always be greater or the same than the required version
for the client imposing the restriction. It's nonsensical otherwise.
- The required version must be semver compatible with the "current" version, and
by default it is the smallest version that is semver-compatible with the
current version
This is to say, if you add a new optional "foobar" field to your record in
"0.1.2", once "0.1.2" is everywhere, you can make it mandatory in a new "0.1.3",
which is listed as requiring "0.1.2".
This has the downside of... not really being what semver means at all. So I'm
open to suggestions for alternatives.
#### Native, local, and remote versions
There's another complication here, and that's the distinction between native, local,
and remote versions.
- The "remote" schema is any schema from the server, but almost always we use it
to mean the latest schema version.
- The "native" schema version is the version that the client would be using if it
never synced a new schema down from the server.
- The "local" schema version is the version the client actually uses. Initially
it's the same as the native version, and if the client syncs, and sees a
compatible 'remote' schema, then it will use the remote schema as it's new local
schema.
Critically, the `required` schema check (described above) is performed against the
*native* schema version, and *not* the local schema version. This is required for
the system to actually lock out older clients -- otherwise they'd just confuse
themselves (in practice they should still be locked out -- we will need to make
sure we validate all records we're about to upload against the remote schema,
but this should allow them to avoid wasting a great deal of effort and possibly
reporting error telemetry or something).
Anyway, the way this will work is that if a client's *native* (**not** local)
schema version falls behind the required version, it will stop syncing.
### Semver-compatible migrations (for shipping code)
There are two categories here: Either `dedupe_on` is unchanged/relaxed, or
additional constraints are added.
Most of the time, the server data does not need to change here. The combination
of the new schema with the data the server has (which will be semver-compatible
with the new data -- or else you need to read the next section) should be enough
when combined to give all clients (who are capable of understanding the schema)
identical results.
However, we also allow adding additional constraints to `dedupe_on`. In this case,
some records may now be duplicates of existing records. Failing to fix these may
result in different clients deciding one record or another is the canonical record,
and it's not great if they disagree, so we fix it up when uploading the schema.
#### Algorithm for increasing `dedupe_on` strictness
The client uploading the schema with the new dedupe_on restriction performs the
following steps.
1. Find all combinations of records that are now considered duplicated.
- Note that this isn't a set of pairs, it's a set of lists, e.g. changing
`dedupe_on` could could cause any number of records to be unified.
2. For each list of records containing 2 or more records:
1. Take the most recently modified record, and delete (uploading tombstones)
for all others.
- XXX It's not clear what else we should do here. Sort by modification
date and
2. Merge them front to back using two_way_merge until only a
single record remains.
- XXX: Or should we just take the one with the highest update_counter outright?
3. The result will have the ID of the first record in the list, and will
have a prev_id of the 2nd record.
4. Each subsequent record will be recorded as a tombstone with a prev_id of
the record following it (except for the last record, which will have nothing).
For example, to merge `[a, b, c, d]`, payload of `a` will be `merge(merge(merge(a, b), c), d)`. We'd then upload (records equivalent to after adding the rest of the bso fields and encrypting)
```json
[
{ "id": "a", "prev_id": "b", "payload": "see above" },
{ "id": "b", "prev_id": "c", "payload": { "deleted": true } },
{ "id": "c", "prev_id": "d", "payload": { "deleted": true } },
{ "id": "d", "payload": { "deleted": true } }
]
```
3. Upload the outgoing records and (on success) commit the changes locally.
### Semver-incompatible migrations
A lot of thought has been given to allowing evolution of the schema such that
these are not frequently required. Most of the time you should be able to
either deprecate fields, or move through a compatible upgrade path and block
out the old data by using `required_version`.
However, some of the time, outright breaking schema may be unavoidable.
Fundamentally, this will probably look like our API requiring that for a
semver-major change, the code either explicitly migrating all the records (e.g.
give them a list of the old records, get the new ones back), or very explicitly
saying that the old records should be deleted.
There are a few ways to do this in the API, I won't bikeshed that here since
they aren't super important.
The big concern here is that it means that now all records on the server must go,
and be replaced. This is very unlikely to lead to happy servers, even if the
record counts are small. Instead, what I propose is as follows:
1. If the user explicitly syncs, we do the full migration right away. The danger
here is automatic syncs, not explicit ones. We will need to adjust the API to
allow indicating this.
2. Otherwise, use a variant of our bookmark repair throttling logic:
- There's an N% (for N around, IDK, 10) chance every day that a given
client does the full sync/upgrade routine.
- If, after M days of being updated, none of the clients have done this,
just go for it.
- TODO: discuss this with ops for how aggressive seems sane here.
## Extending Remerge
The initial version of Remerge will be missing several things we expect to need
in the future, but won't need for simple datatypes. Adding these cannot be a
breaking change, or Remerge is likely to never be very useful.
When a new data type or property is added to Remerge which some (but not all)
schemas may use, we also come up with a short identifier string. Any schemas
that use this feature then must specify it in the `remerge_features` property.
Adding a new entry to the `remerge_features` will lock out clients if their
version of Remerge does not support this feature. See
[the first example][required-feature-example] for an example.
Some features may be used by a schema, but in such a way that legacy clients can
still do the right thing. The motivating example here is allowing a new optional
field of some data type where it's fine for legacy clients to just ignore but
round-trip it. In this case, specifying it in `optional_remerge_features` allows
this behavior. See [the second example][optional-feature-example] for an example.
##### Example 1: A `compression` Remerge feature
[required-feature-example]: #example-1-a-compression-remerge-feature
Consider adding support for transparently compressing records before (encrypting
and) uploading them to the server.
This is a change where old clients will be completely unable to comprehend new
records, which means that, naively, unless careful coordination is performed,
this sort of change can not be done without locking out many clients.
However, we can avoid that problem with Remerge using `remerge_features`:
1. We implement support for compression in this way which can be turned on by
specifying `compressed: true` in the schema.
2. We add "compression" to Remerge's internal static list of supported features,
and require any schema that uses `compressed: true` to specify `compression` in
`remerge_features`.
3. Then, collections that wish to have compression enabled just need to ensure
that every version they consider "supported" bundles a Remerge library
which understands the `compression` feature before (e.g. the Remerge code
adding the `compression` feature must ride the trains before it's used).
**Note**: There are a lot of devils in the details here if we actually wanted to
support compression in this matter, this is just an example of the sort of
change that seems impossible to add support for after the fact.
##### Example 2: A new data type
[optional-feature-example]: #example-2-a-new-data-type
The usage implied in example 1 has two major drawbacks:
1. Code that might not gotten nightly / beta testing (despite riding the trains)
is suddenly enabled remotely all at once.
2. For the common case where a feature defines a new Remerge datatype, as long
as the field is optional (not part of a composite), and that clients
could simply round trip it (e.g. treat it as an opaque 'prefer_remote' blob
of data).
In practice number 1 seems unlikely to be an issue, as a new (required) feature
would either be motivated by some new collection wanting to use it, or it would
come with a breaking change anyway.
Number 2 can be avoided by using the `optional_remerge_features` property:
Note: the motivating example here is a bit more nebulous than the above, despite
the fact that it's probably the more likely case.
Lets say we're adding support for a `binary_blob` type to represent a small
optional image (like a favicon), which it represnts as a string that's
guaranteed to parse as base64 (Note: this probably wouldn't be useful, but
that's not the point).
1. Remerge implements support for this under the "binary_blob" feature.
2. The collection updates its schema to have:
- Both `"remerge_features": ["binary_blob"]` and `"optional_remerge_features": ["binary_blob"]`
- A new optional `favicon` field, which is optional and has type `binary_blob`.
3.
## Future Work
To be explicit, the following is deferred as future design work.
1. Detecting and deleting corrupt server records.
- Instead, we just ignore them. This is probably bad.
2. References to other elements
- This was a big part of the complexity in the previous version of the spec,
and it's not totally clear that it's actually useful. Usually either
another identifier exists (possibly whatever that type is deduped on)
3. Enumerations and fields that are mutually exclusive with other fields.
- Exclusive fields are ecessary to model the `formSubmitURL` xor `httpRealm`
in logins.
- Enum-esque types, which could more or less be modeled as 'sets of
exculsive fields where which fields are active is controlled by some
`type` value'...
4. Support for nested objects of some kind.
- This probably just looks like:
- Allowing `path.to.prop` in the `name` field.
- Coming up with the restrictions (e.g. the first segment can't already
be a field `name` or `local_name`, all segments must follow the rules
for names, etc)
- ...
- If we need addresses and credit card engine support as part of form
autofill, we need this.
5. Properly handling when `dedupe_on` strictness is increased in a new version
of the schema.
- It's not clear what the actual right thing to do is, but 'delete all
duplicates except the most recently modified' seems too risky.
6. More broadly: How/when to handle when a schema upgrade tightens constraints.
- For example, numbers can be clamped, for now the plan is to just check on
insert/update/sync...
- We should at least do this when the native schema is upgraded, but we want
to be careful to ensure it doesn't cause us to ignore incoming changes to
those fields when we sync next.
7. Storing multiple collections in the same database.
- Initially I had thought this was desirable, but the locking issues that
have been cause by places make me much less sure about this, so I'm
deferring it. It also simplifies the implementation.
### Features Deferred for after initial implementation
These are designed and specced, and I think they're very important for remerge
to actually be useful, but
1. The `record_set` type.
2. TODO: What else? There's probably a lot we could cut just to support form
autofill, but if "everything but what form autofill needs" is cut, this would
not be a very useful system.
## Appendix 1: Vector clock overview
[vector_clock_overview]: #appendix-1-vector-clock-overview
Feel free to skip this if you know how these work.
The goal of a vector clock is basically to let us detect the difference between
stale and conflicting data, which is something we have no ability to detect
currently.
Despite it's name, it is not a vector, and does not measure time. It is
basically a `HashMap<ClientId, u64>`, where ClientId is something like the local
client id generated by each client upon database creation, and the u64 is a
per-client change counter.
Wherever a client makes a change to some item (where item may be a record, a
field, etc), it increments it's current change counter, and sets the value
stored in the clock for it's ID to the current value of the change counter.
This lets you easily tell if one version of a record is an ancestor of another:
if record A's clock has an entry for every client in record B's clock, and they
all have the same value or higher, then record B is just a previous version of
record A, and vice versa. If neither is strictly greater than the other, then
a conflict has occurred.
See the following resources for more background, if desired:
- http://basho.com/posts/technical/why-vector-clocks-are-easy/
- http://basho.com/posts/technical/why-vector-clocks-are-hard/
- https://www.datastax.com/dev/blog/why-cassandra-doesnt-need-vector-clocks
- https://en.wikipedia.org/wiki/Vector_clock

Просмотреть файл

@ -1,541 +0,0 @@
# Schema format
The remerge schema format is canonically YAML (Previously it was JSON, but it
seems extremely likely that comments would be valuable).
## Top level properties
As of the current version, it contains the following fields:
- `version`: Required semver version string. This schema's version number.
- `required_version`: Optional semver version string. The "native version" that
remote clients must support in order to attempt to merge this schema.
By default, this defaults to the lowest version that is still
semver-compatible with `version`.
- `remerge_features`: A list of strings, the set of remerge features (see
rfc.md) this schema uses that must be supported in order for a client not to
be locked out.
This must be specified if any functionality added in a remerge feature is
allowed to be used.
- **Caveat**: Using a new feature will lock out clients that do not have a
version of `remerge` which understands that feature unless it's explicitly
present in `optional_remerge_features` as well.
Developers using `remerge`, unless they would be locking out
old clients anyway (e.g. during initial development, or whatever).
- `optional_remerge_features`: A list of strings. If you specify
`remerge_features`, this must be specified as well, even if it's empty.
Every string in this list must also be present in remerge_features, and must
pass the following test:
1. If the feature is a new datatype, any fields which use that datatype must
be optional and not part of a composite.
2. Otherwise, is an error for the feature to be listed as optional.
- *Note*: In the future, it's likely that certain new features will be
allowed to appear in this list, in which case this documentation
should be updated.
- `legacy`: Optional bool (defaults to false). Is this a collection that will be
written to by non-remerge clients?
This currently adds the following restrictions:
- A single `own_guid` field must be present in `fields`.
- XXX: What else?
- `fields`: An array of field records. See [Field records](field_records) for details.
- `dedupe_on`: Optional. Array of strings (defaults to `[]`). Each string must
reference the name of an item in `fields`.
## Field records
[field_records]: #field-records
Each entry in fields has the following properties:
- `name`: Required string: The fields name. It is an error if another field has
the same `name` or `local_name` as this name. The following restrictions apply
to field names:
- Field names must be non-empty.
- Field names not be longer than 64 bytes/characters (the restriction below
means bytes and characters are equivalent here)
- Field names must only use the following characters: `[a-zA-Z0-9_-$]`.
- This is to allow the possibility that a future version of remerge will
allow you to reference properties in nested objects like `foo.bar`.
- Field `name`s should not change (this is the name of the field on the
server). Renaming a field conceptually can be done by specifying a
`local_name`.
- `local_name`: Optional string, used to rename fields, defaults to `name`. Only
the *native* schema's `local_name` values are ever used, since that's what
calling code understands.
- On writing to/reading from the DB, the `local_name` fields in the local
version's native schema are mapped to `name` (for writes into the DB) or
vice versa (for reads).
- It is not a schema incompatible change to change a `local_name` (however a
schema version bump is required to ensure that the mapping of `version` to
`schema data` is unique), however it is a mistake if a `local_name` (or
`name`) collides with any `local_name` or `name` that has been active in
the past, however this is not currently checked.
- The same identifier restrictions exist as with `name` (non-empty, `a-zA-Z0-9_-$`, )
- It's an error if this `local_name` collides with any other `local_name` or
`name`.
- It is a mistake if a `local_name` (or `name`) collides with any
`local_name` or `name` that has been active in the past, however this is
not currently checked.
- `type`: Required string. The type of the field. See the section
titled Field Types for the list of field types.
- `merge`: The merge strategy to use. Must be one of the merge strategies
listed in the section on merge strategies.
Note that not all `type`s support all `merge` strategies. In fact, none do.
Additionally, some types (`own_guid`, `record_set`, and `untyped_map`) forbid
the use of a merge strategy. Be sure to read the `Restrictions` section on
any field you use.
You also may not specify a merge strategy and a composite root.
- `composite_root`: In place of a `merge` strategy, all types that do not
specifically forbid it (Note: many do) may specify `composite_root` to
indicate that they are part of a composite, which is a string that speficies
the root of the composite.
- `required`: Whether or not the field is required.
- `deprecated`: Indicates that the field will be ignored for the purposes of
merging and validation. It is illegal for a type to be both `deprecated` and
`required`.
- `change_preference`: Optional string, one of the following values. Used to
help disambiguate problems during a conflict.
- `"missing"`: In the case of two conflicting changes, if one of the
changes removes the value *or* resets it to the default value provided
for that field (if any), then that change will be taken.
- `"present"`: In the case of two conflicting changes, if one of the
changes removes the value *or* resets it to the default value provided
for that field (if any), then that change will be discarded.
- These are used prior to application of the merge strategy, see the section
describing the `sync` algorithm for details.
Additionally many types contain a `default` value, which is discussed in the
section on the relevant record type, and in the RFC's section on the merge
algorithm (Merging a record). These are applied both to incoming items that are
missing it, and to items inserted locally that do not have it.
Some types have additional options (most types support providing a `default`
value too, for example). See below for specifics.
## `untyped`
Indicates that this field can contain any type of item representable using JSON.
Untyped data may use the following merge strategies:
- `take_newest`
- `prefer_remote`
- `duplicate`
## `text`
Indicates that this field contains text.
Text may use the following merge strategies:
- `take_newest`
- `prefer_remote`
- `duplicate`
### Options
- `default`: Optional default value string.
## `url`
Indicates that this field contains a URL.
Urls are equivalent to `text` in most ways, except that attempts to assign
invalid URLs to them are prevented, and they are guaranteed to be canonicalized
to a punycoded/percent-encoded format. (Note: canonicalization happens
transparently during update and insert)
URLs may use the following merge strategies:
- `take_newest`
- `prefer_remote`
- `duplicate`
### Options
- `is_origin`: Optional bool to indicate that this field only stores
origins, not full URLs. A URL that contains information besides the
origin (e.g. username, password, path, query or fragment) will be
rejected for this field. Defaults to false.
- `default`: Optional default value.
## `real`
Indicates that this field is numeric. Reals are 64 bit floats, but NaN is
forbidden.
Numbers may use the following merge strategies:
- `take_newest`
- `prefer_remote`
- `duplicate`
- `take_min`
- `take_max`
- `take_sum`
### Options
- `default`: Optional default value. Must be a number, and must be between `min`
and `max` if they are specified.
- `min`: Optional number specifying an (inclusive) minimum value for this field.
If specified, the field must also contain the `if_out_of_bounds` option.
- `max`: Optional number specifying an (inclusive) minimum value for this field.
If specified, must contain the `if_out_of_bounds` option.
- `if_out_of_bounds`: Optional string. Required if `max` or `min` are specified.
- `"discard"`: Changes that move this value outside the range specified by
min/max are discarded.
- `"clamp"`: Changes are clamped between `min` and `max`
### Restrictions
- May not be part of `dedupe_on`:
- We could loosen this to only apply for `take_sum` or bounding by `clamp`,
and maybe a couple others, but we probably want to discourage people from
using numeric keys for things.
- `max` may not be specified on a `take_sum` value.
- `min` and `max` must be finite. They may not be NaN or +/- infinity.
- `default` must be between `min` and `max`.
- `min` must be less than `max`.
## `integer`
Indicates that this field is an integer. Integers are equivalent to numbers
except they are represented as 64-bit signed integers.
These have all the same options and restrictions as `real`, but using 64
bit integers.
## `timestamp`
Indicates that this field is a timestamp. Timestamps are stored as integer
milliseconds since 1970.
Timestamps automatically forbid unreasonable values, such as
- Values before the release of the first browser.
- Values from the future
- XXX Probably will need a little wiggleroom here.
- Maybe only throw out values from more than a week in the future?
Timestamps may use the following merge strategies.
- `take_newest`
- `prefer_remote`
- `take_min`
- `take_max`
### Options
- `semantic`: Optional string. Indicates that this timestamp is a special type
of timestamp that's automatically managed by remerge.
- `"updated_at"`: Indicates that this timestamp should store
a modification timestamp. If this schema is synced to older
devices, they'll start doing the right thing here too.
Only one field per record may have this semantic.
If this semantic is used, the merge strategy must be `take_max`.
- `"created_at"`: Indicates that this timestamp stores the
creation date of the record.
If this semantic is used, the merge strategy must be `take_min`.
- **Note**: The reason these are fields and not simply built-in to every
record is so that they may be used as composite roots, and for
compatibility with legacy collections.
- `default`: Default value for the timestamp. Must either be an integer, or the
string "now", which indicates that we'll use the current time.
### Restrictions
- Only one field per record may have the `updated_at` semantic
- Timestamps may not be part of `dedupe_on`.
- Timestamps with the `created_at` semantic must use the `take_min` merge strategy.
- Timestamps with the `updated_at` semantic must use the `take_max` merge strategy.
## `boolean`
Indicates that this field is a boolean flag.
Boolean values may use the following merge strategies:
- `take_newest`
- `prefer_remote`
- `duplicate`
- `prefer_true`
- `prefer_false`
### Options
- `default`: Optional default value. If provided, must be true or false.
## `own_guid`
Indicates that this field should be used to store the record's own guid.
This means the field is not separately stored on the server or in the database,
and instead is populated before returning to the record in APIs for querying
records.
### Options
- `auto`: Optional boolean, defaults to true. Means an ID should be
automatically assigned during insertion if not present.
### Restrictions
- It's an error to use `own_guid` in a schema's `dedupe_on`.
- `own_guid` fields may not specify a merge strategy.
- `own_guid` fields may not be part of a composite.
- It's an error to have more than one `own_guid`.
## `untyped`
This is an unstructured JSON payload that can have any value.
May use the following conflict strategies:
- `take_newest`
- `prefer_remote`
- `duplicate`
### Options
- `default`: Optional default value, can take any valid JSON value.
## `untyped_map`
**Note**: This is a planned feature and will not be implemented in the initial
version.
Indicates that this field stores a dictionary of key value pairs which
should be merged individually. It's effectively for storing and merging
a user defined JSON objects.
This does not take a merge strategy parameter, because it implies one
itself. If you would like to use a different merge strategy for
json-like data, then `untyped` is available.
The map supports deletions. When you write to it, if your write is
missing keys that are currently present in (the local version of) the
map, they are assumed to be deleted. If necessary, this means you may
need to be sure a sync has not occurred since your last read in order
to avoid discarding remotely added data.
### Options
- `prefer_deletions`: Optional (default to false). Indicates whether updates to a
field, or deletions of that field win in the case of conflict. If true, then
deletions always win, even if they are older. If false, then the last write
wins.
- `default`: Optional default value. If provided, must a JSON object
### Restrictions
- It's an error to use `untyped_map` in a schema's `dedupe_on`.
- `untyped_map` fields may not specify a merge strategy.
- `untyped_map` fields may not be part of a composite.
## `record_set`
**Note**: This is a planned feature and will not be implemented in the initial version.
A unordered set of JSON records. Records within the set will not be
merged, however the set itself will be.
This does not take a merge strategy parameter, because it implies one
itself.
The id_key is the string key that is used test members of this set
for uniqueness. Two members with the same value for their id_key are
considered identical. This is typically some UUID string you generate in
your application, but could also be something like a URL or origin.
The set supports deletion in so far as when you write to the set, if
your write is missing items that are currently present in the (local
version of the) set is assumed to be deleted.
### Options:
- `id_key`: Required. The key that identifies records in the set. Used for
deduplication, deletion syncing, etc.
This must point to a string property in each record.
- `prefer_deletions`: Optional (default to false). indicates whether updates or
deletions win in the case of conflict. If true, then deletions always win,
even if they are older. If false, then the last write wins.
- `default`: Optional default value. If provided, must an array of json objects.
If the array is not empty, every item in it must have the `id_key`, the
properties of those `id_key`s must be strings, and there may not be any two
objects with the same `id_key`.
### Restrictions
- It's an error to use `record_set` in a schema's `dedupe_on`.
- `record_set` fields may not specify a merge strategy.
- `record_set` fields may not be part of a composite.
# Composite fields
If a field needs to indicate that it's conceptually part of a group that is
updated atomically, instead of a `merge` strategy, it can mark a
`composite_root`.
Composites are good for compound data types where any part of them may change,
but merging two records across these changes is fundamentally broken.
For example, credit card number and expiration date. If a user updates *just the
number* on device 1, then *just the expiration date* on device two, these two
updates are conceptually in conflict, but a field level three-way-merge (as we
do) will blindly combine them, producing a record that doesn't represent
anything the user ever saw.
Theyre also useful for cases where one or more fields store extra information
that pretains to the root field.
For example, you might want to merge using `take_max` for a last use timestamp,
and also some information about the use -- for example, which device it occurred
on. This can be done correctly by storing the last use timestamp as a `take_max`
`timestamp` field, and storing the device information on one or more fields
which reference the `timestamp` as their `composite_root`.
## Restrictions on composites
- Not all types can be part of a composite (many can't). Furthermore, some types
may be part of a composite, but not as the root. See the field type docs for
details.
- Members of composite fields may not specify a merge strategy.
- The composite root must use one of the following merge strategies, which
effectively applies to the composite as a whole:
- `take_newest`: On conflict of *any of the fields in the composite*,
the most recently written value is used for all fields.
- `prefer_remote`: On conflict of *any of the fields in the composite*,
the remote value is used for all fields.
- `take_min` and `take_max`: If the root of uses this as its merge strategy
then on conflict with *any of the fields in the composite*, we compare the
value of the remote composite root to the local composite root, and on
conflict, the lesser/greater root value decides how all fields of the
composite are resolved.
- If any member in a composite is part of a `dedupe_on`, all members must be listed
in the dedupe_on.
- A field which is listed as a `composite_root` of another field may not, itself
list a `composite_root`.
# Merge Strategies
It is important to note that these are only applied in the case of field-level
conflicts. That is, when the value is modified both locally and remotely.
- `take_newest`: Take the value for the field that was changed most recently.
This is the default, and recommended for most data. That is, last write wins.
- `prefer_remote`: On conflict, assume we're wrong.
- Note that this typically is not useful when specified manually, but is
automatically used for cases where we
- TODO: This should probably take a parameter for the native schema version.
- `duplicate`: On conflict, duplicate the record. This is not recommended for
most cases. Additionally no field may use this strategy in a record with a
non-empty dedupe-on list.
- `take_min`: Numeric fields only. Take the minimum value between the two
fields. Good for creation timestamps, and specified by default for timestamps
with the `created_at` semantic.
- `take_max`: Numeric fields only. Take the larger value between the two fields.
Good for creation timestamps, and specified by default for timestamps with the
`updated_at` semantic.
- `take_sum`: Numeric fields only. Treat the value as if it's a monotonic sum.
In the case of a conflict, if we have a common shared parent stored in the
mirror, the the result value is computed as
```
mirror_value += max(remote_value - mirror_value, 0) +
max(local_value - mirror_value, 0)
```
In the case of a two way merge (when we do not have a shared parent), the
larger value is used (this will generally be rare).
- `prefer_false`: Boolean fields only. On conflict, if either field is set to
`false`, then the output is `false`.
This is equivalent to a boolean "and" operation.
- `prefer_true`: Boolean fields only. On conflict, if either field is set to
`true`, then the output is `true`.
This is equivalent to a boolean "or" operation.
# `dedupe_on`
This indicates an identity relationship for your type.
In SQL terms, it effectively is a compound `UNIQUE` key, but perhaps without the
performance implications. If an incoming record appears which has identical
values to a local record for all keys listed in `dedupe_on`, then we treat it
as if the write applied to the same record (but perform a two way merge).
## Restrictions
- All strings listed in `dedupe_on` must point at ,
- No fields listed in `dedupe_on` may have a `type` or `merge` strategy that
specifies that they cannot be part of `dedupe_on`
- Either all members of a composite, or no members of that composite may be
listed in `dedupe_on`. (You may not have list only some of a composite's
members)
- Types with non-empty `dedupe_on` lists may not use the `duplicate` merge
strategy for any of their fields.