From 91c55bf4f3acd9ac1c6949b860d179a4ddbeefb6 Mon Sep 17 00:00:00 2001 From: Bastian Gruber Date: Sat, 16 Nov 2024 13:40:16 -0400 Subject: [PATCH] feat: adding a remote-settings CLI download feature, sync with local data Adding two commands to the cargo remote-settings CLI, `dump-sync` and `dump-get`. This allows to download a local dump of a set of collections, and keep it up to date with the remote version. It's also possible to open a PR right away to update this file in the app-services repo. `cargo remote-settings dump-sync --create-pr` will create a local branch and push it to the repo. When trying to `get_records` from the component, it first checks if the database has some, if not, it checks if the collection exists and takes it from the local file. --- Cargo.lock | 458 ++++++++-- .../dumps/main/search-telemetry-v2.json | 820 ++++++++++++++++++ components/remote_settings/src/client.rs | 421 ++++++++- components/remote_settings/src/config.rs | 2 +- components/remote_settings/src/service.rs | 1 + components/remote_settings/src/storage.rs | 4 +- examples/remote-settings-cli/Cargo.toml | 12 + .../remote-settings-cli/src/dump/client.rs | 332 +++++++ .../remote-settings-cli/src/dump/error.rs | 17 + examples/remote-settings-cli/src/dump/git.rs | 73 ++ examples/remote-settings-cli/src/dump/lib.rs | 3 + examples/remote-settings-cli/src/main.rs | 49 +- 12 files changed, 2107 insertions(+), 85 deletions(-) create mode 100644 components/remote_settings/dumps/main/search-telemetry-v2.json create mode 100644 examples/remote-settings-cli/src/dump/client.rs create mode 100644 examples/remote-settings-cli/src/dump/error.rs create mode 100644 examples/remote-settings-cli/src/dump/git.rs create mode 100644 examples/remote-settings-cli/src/dump/lib.rs diff --git a/Cargo.lock b/Cargo.lock index fb7ecfc33..d9a2da84b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -221,6 +221,12 @@ dependencies = [ "syn 2.0.72", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "atty" version = "0.2.14" @@ -274,9 +280,9 @@ dependencies = [ "bitflags 1.3.2", "bytes", "futures-util", - "http", - "http-body", - "hyper", + "http 0.2.9", + "http-body 0.4.5", + "hyper 0.14.27", "itoa 1.0.11", "matchit", "memchr", @@ -304,8 +310,8 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", + "http 0.2.9", + "http-body 0.4.5", "mime", "rustversion", "tower-layer", @@ -339,6 +345,12 @@ version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "basic-toml" version = "0.1.2" @@ -624,7 +636,7 @@ dependencies = [ "bitflags 1.3.2", "strsim 0.8.0", "textwrap 0.11.0", - "unicode-width", + "unicode-width 0.1.11", "vec_map", "yaml-rust 0.3.5", ] @@ -703,7 +715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" dependencies = [ "termcolor", - "unicode-width", + "unicode-width 0.1.11", ] [[package]] @@ -742,7 +754,7 @@ dependencies = [ "encode_unicode 0.3.6", "lazy_static", "libc", - "unicode-width", + "unicode-width 0.1.11", "windows-sys 0.42.0", ] @@ -1147,7 +1159,7 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2ea1d2f2cc974957a4e2575d8e5bb494549bab66338d6320c2789abcfff5746" dependencies = [ - "base64", + "base64 0.21.2", "byteorder", "hex", "once_cell", @@ -1359,7 +1371,7 @@ name = "example-sync-pass" version = "0.1.0" dependencies = [ "anyhow", - "base64", + "base64 0.21.2", "chrono", "clap 4.2.2", "cli-support", @@ -1382,7 +1394,7 @@ name = "example-tabs-sync" version = "0.1.0" dependencies = [ "anyhow", - "base64", + "base64 0.21.2", "chrono", "cli-support", "interrupt-support", @@ -1433,9 +1445,17 @@ dependencies = [ "anyhow", "clap 4.2.2", "env_logger", + "futures", + "indicatif", "log", "remote_settings", + "reqwest 0.12.4", + "serde", + "serde_json", + "thiserror", + "tokio", "viaduct-reqwest", + "walkdir", ] [[package]] @@ -1608,46 +1628,87 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bd79fa345a495d3ae89fb7165fec01c0e72f41821d642dda363a1e97975652e" [[package]] -name = "futures-channel" -version = "0.3.21" +name = "futures" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", + "futures-sink", ] [[package]] name = "futures-core" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] [[package]] name = "futures-io" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] [[package]] name = "futures-sink" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ + "futures-channel", "futures-core", "futures-io", + "futures-macro", + "futures-sink", "futures-task", "memchr", "pin-project-lite", @@ -1660,7 +1721,7 @@ name = "fxa-client" version = "0.1.0" dependencies = [ "anyhow", - "base64", + "base64 0.21.2", "error-support", "hex", "jwcrypto", @@ -1795,7 +1856,26 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.9", + "indexmap 2.5.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", "indexmap 2.5.0", "slab", "tokio", @@ -1840,7 +1920,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ba86b7cbed4f24e509c720688eaf4963eac20d9341689bf69bcf5ee5e0f1cd2" dependencies = [ "anyhow", - "base64", + "base64 0.21.2", "log", "once_cell", "thiserror", @@ -1900,6 +1980,17 @@ dependencies = [ "itoa 1.0.11", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa 1.0.11", +] + [[package]] name = "http-body" version = "0.4.5" @@ -1907,7 +1998,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", - "http", + "http 0.2.9", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -1954,20 +2068,40 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.3.26", + "http 0.2.9", + "http-body 0.4.5", "httparse", "httpdate", "itoa 1.0.11", "pin-project-lite", - "socket2", + "socket2 0.4.9", "tokio", "tower-service", "tracing", "want", ] +[[package]] +name = "hyper" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "httparse", + "itoa 1.0.11", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + [[package]] name = "hyper-tls" version = "0.5.0" @@ -1975,12 +2109,48 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.27", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.5.0", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "hyper 1.5.0", + "pin-project-lite", + "socket2 0.5.7", + "tokio", + "tower", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.53" @@ -2041,6 +2211,19 @@ dependencies = [ "hashbrown 0.14.3", ] +[[package]] +name = "indicatif" +version = "0.17.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width 0.2.0", + "web-time", +] + [[package]] name = "instant" version = "0.1.12" @@ -2179,7 +2362,7 @@ checksum = "2a071f4f7efc9a9118dfb627a0a94ef247986e1ab8606a4c806ae2b3aa3b6978" dependencies = [ "ahash", "anyhow", - "base64", + "base64 0.21.2", "bytecount", "fancy-regex", "fraction", @@ -2203,7 +2386,7 @@ dependencies = [ name = "jwcrypto" version = "0.1.0" dependencies = [ - "base64", + "base64 0.21.2", "error-support", "log", "rc_crypto", @@ -2638,12 +2821,12 @@ dependencies = [ "copypasta", "glob", "heck 0.4.1", - "hyper", + "hyper 0.14.27", "local-ip-address", "nimbus-fml", "percent-encoding", "remote_settings", - "reqwest", + "reqwest 0.11.18", "serde", "serde_json", "serde_yaml 0.9.21", @@ -2681,7 +2864,7 @@ dependencies = [ "itertools", "jsonschema", "lazy_static", - "reqwest", + "reqwest 0.11.18", "serde", "serde_json", "serde_yaml 0.8.24", @@ -2759,7 +2942,7 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" name = "nss" version = "0.1.0" dependencies = [ - "base64", + "base64 0.21.2", "error-support", "nss_sys", "serde", @@ -2887,6 +3070,12 @@ dependencies = [ "libc", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "numtoa" version = "0.1.0" @@ -3245,6 +3434,12 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portable-atomic" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" + [[package]] name = "powerfmt" version = "0.2.0" @@ -3320,7 +3515,7 @@ dependencies = [ "is-terminal", "lazy_static", "term 0.7.0", - "unicode-width", + "unicode-width 0.1.11", ] [[package]] @@ -3425,7 +3620,7 @@ dependencies = [ name = "push" version = "0.1.0" dependencies = [ - "base64", + "base64 0.21.2", "bincode", "env_logger", "error-support", @@ -3544,7 +3739,7 @@ dependencies = [ name = "rc_crypto" version = "0.1.0" dependencies = [ - "base64", + "base64 0.21.2", "ece", "error-support", "hawk", @@ -3631,7 +3826,7 @@ name = "relevancy" version = "0.1.0" dependencies = [ "anyhow", - "base64", + "base64 0.21.2", "error-support", "interrupt-support", "log", @@ -3682,16 +3877,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" dependencies = [ "async-compression", - "base64", + "base64 0.21.2", "bytes", "encoding_rs", "futures-core", "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-tls", + "h2 0.3.26", + "http 0.2.9", + "http-body 0.4.5", + "hyper 0.14.27", + "hyper-tls 0.5.0", "ipnet", "js-sys", "log", @@ -3711,7 +3906,49 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "winreg", + "winreg 0.10.1", +] + +[[package]] +name = "reqwest" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" +dependencies = [ + "base64 0.22.1", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.5.0", + "hyper-tls 0.6.0", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "system-configuration", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg 0.52.0", ] [[package]] @@ -3891,6 +4128,21 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" + [[package]] name = "rustversion" version = "1.0.12" @@ -4108,6 +4360,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + [[package]] name = "similar" version = "2.1.0" @@ -4146,9 +4407,9 @@ checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e" [[package]] name = "smallvec" -version = "1.9.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "smawk" @@ -4194,6 +4455,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "sql-support" version = "0.1.0" @@ -4336,7 +4607,7 @@ dependencies = [ name = "sync-guid" version = "0.1.0" dependencies = [ - "base64", + "base64 0.21.2", "rand", "rusqlite", "serde", @@ -4349,7 +4620,7 @@ version = "0.1.0" dependencies = [ "anyhow", "base16", - "base64", + "base64 0.21.2", "env_logger", "error-support", "interrupt-support", @@ -4398,6 +4669,27 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "systest" version = "0.1.0" @@ -4499,7 +4791,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" dependencies = [ - "unicode-width", + "unicode-width 0.1.11", ] [[package]] @@ -4510,7 +4802,7 @@ checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80" dependencies = [ "smawk", "unicode-linebreak", - "unicode-width", + "unicode-width 0.1.11", ] [[package]] @@ -4612,8 +4904,10 @@ dependencies = [ "libc", "mio", "num_cpus", + "parking_lot", "pin-project-lite", - "socket2", + "signal-hook-registry", + "socket2 0.4.9", "tokio-macros", "windows-sys 0.48.0", ] @@ -4705,8 +4999,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 0.2.9", + "http-body 0.4.5", "http-range-header", "httpdate", "mime", @@ -4733,8 +5027,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e90e6da0427c5e111e03c764d49c4e970f5a9f6569fe408e5a1cbe257f48388" dependencies = [ "bytes", - "http", - "http-body", + "http 0.2.9", + "http-body 0.4.5", "pin-project-lite", "tokio", "tower", @@ -4853,6 +5147,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode-xid" version = "0.2.3" @@ -5086,18 +5386,17 @@ version = "0.2.0" dependencies = [ "log", "once_cell", - "reqwest", + "reqwest 0.11.18", "viaduct", ] [[package]] name = "walkdir" -version = "2.3.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -5272,6 +5571,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webbrowser" version = "0.8.7" @@ -5433,6 +5742,15 @@ dependencies = [ "windows-targets 0.48.0", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.4", +] + [[package]] name = "windows-targets" version = "0.48.0" @@ -5637,6 +5955,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "winreg" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +dependencies = [ + "cfg-if 1.0.0", + "windows-sys 0.48.0", +] + [[package]] name = "x11-clipboard" version = "0.7.1" diff --git a/components/remote_settings/dumps/main/search-telemetry-v2.json b/components/remote_settings/dumps/main/search-telemetry-v2.json new file mode 100644 index 000000000..ef5624b04 --- /dev/null +++ b/components/remote_settings/dumps/main/search-telemetry-v2.json @@ -0,0 +1,820 @@ +{ + "data": [ + { + "codeParamName": "pc", + "components": [ + { + "included": { + "children": [ + { + "countChildren": true, + "selector": ".pa_item" + } + ], + "parent": { + "selector": ".adsMvCarousel" + }, + "related": { + "selector": ".cr" + } + }, + "type": "ad_carousel" + }, + { + "excluded": { + "parent": { + "selector": "aside" + } + }, + "included": { + "children": [ + { + "selector": ".b_vlist2col", + "type": "ad_sitelink" + } + ], + "parent": { + "selector": ".sb_adTA" + } + }, + "type": "ad_link" + }, + { + "included": { + "children": [ + { + "countChildren": true, + "selector": ".pa_item, .sb_adTA" + } + ], + "parent": { + "selector": "aside" + } + }, + "type": "ad_sidebar" + }, + { + "included": { + "children": [ + { + "selector": "input[name='q']" + } + ], + "parent": { + "selector": "form#sb_form" + }, + "related": { + "selector": "#sw_as" + } + }, + "topDown": true, + "type": "incontent_searchbox" + }, + { + "included": { + "children": [ + { + "eventListeners": [ + { + "action": "clicked_accept", + "eventType": "click" + } + ], + "selector": "button#bnp_btn_accept" + }, + { + "eventListeners": [ + { + "action": "clicked_reject", + "eventType": "click" + } + ], + "selector": "button#bnp_btn_reject" + }, + { + "eventListeners": [ + { + "action": "clicked_more_options", + "eventType": "click" + } + ], + "selector": "a#bnp_btn_preference" + } + ], + "parent": { + "selector": "div#bnp_cookie_banner" + } + }, + "topDown": true, + "type": "cookie_banner" + }, + { + "default": true, + "type": "ad_link" + } + ], + "domainExtraction": { + "ads": [ + { + "method": "textContent", + "selectors": "#b_results .b_ad .b_attribution cite, .adsMvCarousel cite, aside cite" + } + ], + "nonAds": [ + { + "method": "textContent", + "selectors": "#b_results .b_algo .b_attribution cite" + } + ] + }, + "extraAdServersRegexps": [ + "^https://www\\.bing\\.com/acli?c?k" + ], + "followOnCookies": [ + { + "codeParamName": "PC", + "extraCodeParamName": "form", + "extraCodePrefixes": [ + "QBRE" + ], + "host": "www.bing.com", + "name": "_SS" + }, + { + "codeParamName": "PC", + "extraCodeParamName": "form", + "extraCodePrefixes": [ + "QBRE" + ], + "host": "www.bing.com", + "name": "SRCHS" + } + ], + "id": "e1eec461-f1f3-40de-b94b-3b670b78108c", + "last_modified": 1731429440245, + "nonAdsLinkRegexps": [ + "^https://www.bing.com/ck/a" + ], + "organicCodes": [], + "queryParamName": "q", + "queryParamNames": [ + "q" + ], + "schema": 1730764806877, + "searchPageRegexp": "^https://www\\.bing\\.com/search", + "shoppingTab": { + "regexp": "^/shop?", + "selector": "#b-scopeListItem-shop a" + }, + "taggedCodes": [ + "MOZ2", + "MOZ4", + "MOZ5", + "MOZA", + "MOZB", + "MOZD", + "MOZE", + "MOZI", + "MOZL", + "MOZM", + "MOZO", + "MOZR", + "MOZT", + "MOZW", + "MOZX", + "MZSL01", + "MZSL02", + "MZSL03" + ], + "telemetryId": "bing" + }, + { + "adServerAttributes": [ + "rw" + ], + "codeParamName": "client", + "components": [ + { + "included": { + "children": [ + { + "countChildren": true, + "selector": ".pla-hovercard-container", + "skipCount": true + } + ], + "parent": { + "selector": "#plahover" + } + }, + "type": "ad_popover" + }, + { + "included": { + "children": [ + { + "countChildren": true, + "selector": "[data-dtld]" + } + ], + "parent": { + "selector": ".pla-exp-container" + }, + "related": { + "selector": "g-right-button, g-left-button, .exp-button" + } + }, + "type": "ad_carousel" + }, + { + "included": { + "children": [ + { + "countChildren": true, + "selector": ".sh-np__click-target" + } + ], + "parent": { + "selector": ".sh-sr__shop-result-group" + }, + "related": { + "selector": "g-right-button, g-left-button" + } + }, + "type": "ad_carousel" + }, + { + "included": { + "children": [ + { + "selector": "a" + } + ], + "parent": { + "selector": "#appbar g-scrolling-carousel" + }, + "related": { + "selector": "g-right-button, g-left-button" + } + }, + "topDown": true, + "type": "refined_search_buttons" + }, + { + "excluded": { + "parent": { + "selector": "#rhs" + } + }, + "included": { + "children": [ + { + "selector": "[role='list']", + "type": "ad_sitelink" + } + ], + "parent": { + "selector": "[data-text-ad='1']" + } + }, + "type": "ad_link" + }, + { + "included": { + "children": [ + { + "countChildren": true, + "selector": ".pla-unit, .mnr-c" + } + ], + "parent": { + "selector": "#rhs" + } + }, + "type": "ad_sidebar" + }, + { + "included": { + "children": [ + { + "selector": "input[type='text']" + }, + { + "selector": "textarea[name='q']" + } + ], + "parent": { + "selector": "form[role='search']" + }, + "related": { + "selector": "div.logo + div + div" + } + }, + "topDown": true, + "type": "incontent_searchbox" + }, + { + "excluded": { + "parent": { + "selector": ".pla-exp-container" + } + }, + "included": { + "children": [ + { + "countChildren": true, + "selector": "[data-dtld]" + } + ], + "parent": { + "selector": ".top-pla-group-inner" + } + }, + "type": "ad_image_row" + }, + { + "included": { + "children": [ + { + "eventListeners": [ + { + "action": "clicked_accept", + "eventType": "click" + } + ], + "selector": "button#L2AGLb" + }, + { + "eventListeners": [ + { + "action": "clicked_reject", + "eventType": "click" + } + ], + "selector": "button#W0wltc" + }, + { + "eventListeners": [ + { + "action": "clicked_more_options", + "eventType": "click" + } + ], + "selector": "button#VnjCcb" + } + ], + "parent": { + "selector": "div.spoKVd" + } + }, + "topDown": true, + "type": "cookie_banner" + }, + { + "default": true, + "type": "ad_link" + } + ], + "domainExtraction": { + "ads": [ + { + "method": "textContent", + "selectors": ".sh-np__seller-container" + }, + { + "method": "dataAttribute", + "options": { + "dataAttributeKey": "dtld" + }, + "selectors": "[data-dtld]" + } + ], + "nonAds": [ + { + "method": "href", + "options": { + "queryParamKey": "url", + "queryParamValueIsHref": true + }, + "selectors": ".mnIHsc > a:first-child" + }, + { + "method": "href", + "selectors": "a[jsname='UWckNb']" + }, + { + "method": "dataAttribute", + "options": { + "dataAttributeKey": "lpage" + }, + "selectors": "[data-id='mosaic'] [data-lpage]" + } + ] + }, + "extraAdServersRegexps": [ + "^https?://www\\.google(?:adservices)?\\.com/(?:pagead/)?aclk" + ], + "followOnParamNames": [ + "oq", + "ved", + "ei" + ], + "id": "635a3325-1995-42d6-be09-dbe4b2a95453", + "ignoreLinkRegexps": [ + "^https?://consent\\.google\\.(?:.+)/d\\?continue\\=" + ], + "last_modified": 1724867833754, + "nonAdsLinkQueryParamNames": [ + "url" + ], + "nonAdsLinkRegexps": [ + "^https?://www\\.google\\.(?:.+)/url?(?:.+)&url=" + ], + "organicCodes": [], + "queryParamName": "q", + "queryParamNames": [ + "q" + ], + "schema": 1724630408117, + "searchPageRegexp": "^https://www\\.google\\.(?:.+)/search", + "shoppingTab": { + "inspectRegexpInSERP": true, + "regexp": "&tbm=shop", + "selector": "div[role='navigation'] a" + }, + "signedInCookies": [ + { + "host": "accounts.google.com", + "name": "SID" + } + ], + "taggedCodes": [ + "firefox-a", + "firefox-b", + "firefox-b-1", + "firefox-b-ab", + "firefox-b-1-ab", + "firefox-b-d", + "firefox-b-1-d", + "firefox-b-e", + "firefox-b-1-e", + "firefox-b-m", + "firefox-b-1-m", + "firefox-b-o", + "firefox-b-1-o", + "firefox-b-lm", + "firefox-b-1-lm", + "firefox-b-lg", + "firefox-b-huawei-h1611", + "firefox-b-is-oem1", + "firefox-b-oem1", + "firefox-b-oem2", + "firefox-b-tinno", + "firefox-b-pn-wt", + "firefox-b-pn-wt-us", + "ubuntu", + "ubuntu-sn" + ], + "telemetryId": "google" + }, + { + "codeParamName": "client", + "components": [ + { + "included": { + "children": [ + { + "countChildren": true, + "selector": "[data-slide-index]" + } + ], + "parent": { + "selector": "[data-testid='pam.container']" + } + }, + "type": "ad_image_row" + }, + { + "included": { + "parent": { + "selector": "[data-testid='adResult']" + } + }, + "type": "ad_link" + }, + { + "included": { + "children": [ + { + "selector": "input[type='search']" + } + ], + "parent": { + "selector": "._1zdrb._1cR1n" + }, + "related": { + "selector": "#search-suggestions" + } + }, + "topDown": true, + "type": "incontent_searchbox" + }, + { + "default": true, + "type": "ad_link" + } + ], + "defaultPageQueryParam": { + "key": "t", + "value": "web" + }, + "extraAdServersRegexps": [ + "^https://www\\.bing\\.com/acli?c?k", + "^https://api\\.qwant\\.com/v3/r/", + "^https://fdn\\.qwant\\.com/v3/r/" + ], + "filter_expression": "env.version|versionCompare(\"124.0a1\")>=0", + "followOnParamNames": [], + "id": "19c434a3-d173-4871-9743-290ac92a3f6b", + "isSPA": true, + "last_modified": 1713187389066, + "organicCodes": [], + "queryParamName": "q", + "queryParamNames": [ + "q" + ], + "schema": 1712762409532, + "searchPageRegexp": "^https://www\\.qwant\\.com/", + "taggedCodes": [ + "brz-moz", + "firefoxqwant" + ], + "telemetryId": "qwant" + }, + { + "codeParamName": "t", + "components": [ + { + "included": { + "children": [ + { + "countChildren": true, + "selector": ".module--carousel__item" + } + ], + "parent": { + "selector": ".module--carousel" + }, + "related": { + "selector": ".module--carousel__left, .module--carousel__right" + } + }, + "type": "ad_carousel" + }, + { + "excluded": { + "parent": { + "selector": ".js-results-sidebar" + } + }, + "included": { + "children": [ + { + "selector": "ul", + "type": "ad_sitelink" + } + ], + "parent": { + "selector": "article[data-testid='ad']" + } + }, + "type": "ad_link" + }, + { + "included": { + "children": [ + { + "selector": " input#search_form_input" + } + ], + "parent": { + "selector": "form#search_form" + }, + "related": { + "selector": "input#search_button, .search__autocomplete" + } + }, + "topDown": true, + "type": "incontent_searchbox" + }, + { + "included": { + "children": [ + { + "countChildren": true, + "selector": "article[data-testid='ad']" + } + ], + "parent": { + "selector": ".js-results-sidebar" + } + }, + "type": "ad_sidebar" + }, + { + "default": true, + "type": "ad_link" + } + ], + "domainExtraction": { + "ads": [ + { + "method": "href", + "options": { + "queryParamKey": "ad_domain" + }, + "selectors": ".products-carousel a.js-carousel-item-title, [data-testid='ad'] a[data-testid='result-title-a']" + } + ], + "nonAds": [ + { + "method": "href", + "selectors": "[data-layout='organic'] a[data-testid='result-title-a']" + } + ] + }, + "expectedOrganicCodes": [ + "h_", + "ha", + "hb", + "hc", + "hd", + "he", + "hf", + "hg", + "hh", + "hi", + "hj", + "hk", + "hl", + "hm", + "hn", + "ho", + "hp", + "hq", + "hr", + "hs", + "ht", + "hu", + "hv", + "hw", + "hx", + "hy", + "hz" + ], + "extraAdServersRegexps": [ + "^https://duckduckgo.com/y\\.js?.*ad_provider\\=", + "^https://www\\.amazon\\.(?:[a-z.]{2,24}).*(?:tag=duckduckgo-)" + ], + "id": "9dfd626b-26f2-4913-9d0a-27db6cb7d8ca", + "last_modified": 1706198445456, + "organicCodes": [], + "queryParamName": "q", + "queryParamNames": [ + "q" + ], + "schema": 1705363206938, + "searchPageRegexp": "^https://duckduckgo\\.com/", + "shoppingTab": { + "regexp": "&iax=shopping&ia=shopping", + "selector": "#duckbar a[data-zci-link='products']" + }, + "taggedCodes": [ + "ffab", + "ffcm", + "ffhp", + "ffip", + "ffit", + "ffnt", + "ffocus", + "ffos", + "ffsb", + "fpas", + "fpsa", + "ftas", + "ftsa", + "lm", + "newext" + ], + "telemetryId": "duckduckgo" + }, + { + "codeParamName": "tn", + "extraAdServersRegexps": [ + "^https?://www\\.baidu\\.com/baidu\\.php?" + ], + "followOnParamNames": [ + "oq" + ], + "id": "19c434a3-d173-4871-9743-290ac92a3f6a", + "last_modified": 1698666532326, + "organicCodes": [], + "queryParamName": "wd", + "queryParamNames": [ + "wd", + "word" + ], + "schema": 1698656464939, + "searchPageRegexp": "^https://(?:m|www)\\.baidu\\.com/(?:s|baidu)", + "taggedCodes": [ + "monline_dg", + "monline_3_dg", + "monline_4_dg", + "monline_7_dg" + ], + "telemetryId": "baidu" + }, + { + "codeParamName": "tt", + "components": [ + { + "included": { + "children": [ + { + "countChildren": true, + "selector": ".product-ads-carousel__item" + } + ], + "parent": { + "selector": ".product-ads-carousel" + }, + "related": { + "selector": ".snippet__control" + } + }, + "type": "ad_carousel" + }, + { + "included": { + "children": [ + { + "selector": ".result__extra-content .deep-links--descriptions", + "type": "ad_sitelink" + } + ], + "parent": { + "selector": ".ad-result" + } + }, + "type": "ad_link" + }, + { + "included": { + "children": [ + { + "selector": ".search-form__input, .search-form__submit" + } + ], + "parent": { + "selector": "form.search-form" + }, + "related": { + "selector": ".search-form__suggestions" + } + }, + "topDown": true, + "type": "incontent_searchbox" + }, + { + "default": true, + "type": "ad_link" + } + ], + "expectedOrganicCodes": [], + "extraAdServersRegexps": [ + "^https://www\\.bing\\.com/acli?c?k" + ], + "filter_expression": "env.version|versionCompare(\"110.0a1\")>=0", + "id": "9a487171-3a06-4647-8866-36250ec84f3a", + "last_modified": 1698666532324, + "organicCodes": [], + "queryParamName": "q", + "queryParamNames": [ + "q" + ], + "schema": 1698656463945, + "searchPageRegexp": "^https://www\\.ecosia\\.org/", + "shoppingTab": { + "regexp": "/shopping?", + "selector": "nav li[data-test-id='search-navigation-item-shopping'] a" + }, + "taggedCodes": [ + "mzl", + "813cf1dd", + "16eeffc4" + ], + "telemetryId": "ecosia" + } + ], + "timestamp": 1731429440245 +} \ No newline at end of file diff --git a/components/remote_settings/src/client.rs b/components/remote_settings/src/client.rs index dea14b935..1019f408f 100644 --- a/components/remote_settings/src/client.rs +++ b/components/remote_settings/src/client.rs @@ -23,6 +23,12 @@ const HEADER_BACKOFF: &str = "Backoff"; const HEADER_ETAG: &str = "ETag"; const HEADER_RETRY_AFTER: &str = "Retry-After"; +#[derive(Debug, Clone, Deserialize)] +struct CollectionData { + data: Vec, + timestamp: u64, +} + /// Internal Remote settings client API /// /// This stores an ApiClient implementation. In the real-world, this is always ViaductApiClient, @@ -57,10 +63,29 @@ impl RemoteSettingsClient { }), } } + pub fn collection_name(&self) -> &str { &self.collection_name } + fn get_packaged_data(collection_name: &str) -> Option<&'static str> { + match collection_name { + // Add entries for each locally dumped collection in the `dumps/` folder. + // This is also the place where we want to think about a macro! and feature-gating + // different platforms. + "search-telemetry-v2" => Some(include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/dumps/main/search-telemetry-v2.json" + ))), + _ => None, + } + } + + fn load_packaged_data(&self) -> Option { + Self::get_packaged_data(&self.collection_name) + .and_then(|data| serde_json::from_str(data).ok()) + } + /// Filters records based on the presence and evaluation of `filter_expression`. #[cfg(feature = "jexl")] fn filter_records(&self, records: Vec) -> Vec { @@ -87,30 +112,58 @@ impl RemoteSettingsClient { pub fn get_records(&self, sync_if_empty: bool) -> Result>> { let mut inner = self.inner.lock(); let collection_url = inner.api_client.collection_url(); - - // Try to retrieve and filter cached records first let cached_records = inner.storage.get_records(&collection_url)?; + let is_prod = inner.api_client.is_prod_server()?; + let packaged_data = if is_prod { + self.load_packaged_data() + } else { + None + }; - match cached_records { - Some(records) if !records.is_empty() || !sync_if_empty => { - // Filter and return cached records if they're present or if we don't need to sync - let filtered_records = self.filter_records(records); - Ok(Some(filtered_records)) + // Case 1: We have no cached records + if cached_records.is_none() { + // Case 1a: Use packaged data if available (prod only) + if let Some(collection) = packaged_data { + inner + .storage + .set_records(&collection_url, &collection.data)?; + return Ok(Some(self.filter_records(collection.data))); } - None if !sync_if_empty => { - // No cached records and sync_if_empty is false, so we return None - Ok(None) - } - _ => { - // Fetch new records if no cached records or if sync is required + // Case 1b: No packaged data - fetch from remote if sync_if_empty + if sync_if_empty { let records = inner.api_client.get_records(None)?; inner.storage.set_records(&collection_url, &records)?; + return Ok(Some(self.filter_records(records))); + } + return Ok(None); + } - // Apply filtering to the newly fetched records - let filtered_records = self.filter_records(records); - Ok(Some(filtered_records)) + // Now we know we have cached records + let cached_records = cached_records.unwrap(); + let cached_timestamp = inner.storage.get_last_modified_timestamp(&collection_url)?; + + // Case 2: We have packaged data and are in prod + if let Some(packaged_data) = packaged_data { + if packaged_data.timestamp > cached_timestamp.unwrap_or(0) { + // Packaged data is newer + inner + .storage + .set_records(&collection_url, &packaged_data.data)?; + return Ok(Some(self.filter_records(packaged_data.data))); } } + + // Case 3: Return cached data if we have it and either: + // - it's not empty + // - or we're not allowed to sync + if !cached_records.is_empty() || !sync_if_empty { + return Ok(Some(self.filter_records(cached_records))); + } + + // Case 4: Cache is empty and we're allowed to sync + let records = inner.api_client.get_records(None)?; + inner.storage.set_records(&collection_url, &records)?; + Ok(Some(self.filter_records(records))) } pub fn sync(&self) -> Result<()> { @@ -175,6 +228,9 @@ pub trait ApiClient { /// Fetch an attachment from the server fn get_attachment(&mut self, attachment_location: &str) -> Result>; + + /// Check if this client is pointing to the production server + fn is_prod_server(&self) -> Result; } /// Client for Remote settings API requests @@ -300,6 +356,14 @@ impl ApiClient for ViaductApiClient { let resp = self.make_request(attachments_base_url.join(attachment_location)?)?; Ok(resp.body) } + + fn is_prod_server(&self) -> Result { + Ok(self + .endpoints + .root_url + .as_str() + .starts_with(RemoteSettingsServer::Prod.get_url()?.as_str())) + } } /// A simple HTTP client that can retrieve Remote Settings data using the properties by [ClientConfig]. @@ -1556,9 +1620,12 @@ mod test_new_client { api_client.expect_collection_url().returning(|| { "http://rs.example.com/v1/buckets/main/collections/test-collection".into() }); + api_client.expect_is_prod_server().returning(|| Ok(false)); + // Note, don't make any api_client.expect_*() calls, the RemoteSettingsClient should not // attempt to make any requests for this scenario let storage = Storage::new(":memory:".into()).expect("Error creating storage"); + let rs_client = RemoteSettingsClient::new_from_parts("test-collection".into(), storage, api_client); assert_eq!( @@ -1588,9 +1655,12 @@ mod test_new_client { Ok(records.clone()) } }); + api_client.expect_is_prod_server().returning(|| Ok(false)); let storage = Storage::new(":memory:".into()).expect("Error creating storage"); + let rs_client = RemoteSettingsClient::new_from_parts("test-collection".into(), storage, api_client); + assert_eq!( rs_client.get_records(true).expect("Error getting records"), Some(records) @@ -1628,6 +1698,7 @@ mod jexl_tests { Ok(records.clone()) } }); + api_client.expect_is_prod_server().returning(|| Ok(false)); let context = RemoteSettingsContext { app_version: Some("129.0.0".to_string()), @@ -1646,6 +1717,7 @@ mod jexl_tests { JexlFilter::new(Some(context)), api_client, ); + assert_eq!( rs_client.get_records(false).expect("Error getting records"), Some(records) @@ -1677,6 +1749,7 @@ mod jexl_tests { Ok(records.clone()) } }); + api_client.expect_is_prod_server().returning(|| Ok(false)); let context = RemoteSettingsContext { app_version: Some("127.0.0.".to_string()), @@ -1695,9 +1768,325 @@ mod jexl_tests { JexlFilter::new(Some(context)), api_client, ); + assert_eq!( rs_client.get_records(false).expect("Error getting records"), Some(vec![]) ); } } + +#[cfg(not(feature = "jexl"))] +#[cfg(test)] +mod cached_data_tests { + use super::*; + + #[test] + fn test_no_cached_data_use_packaged_data() -> Result<()> { + let collection_name = "search-telemetry-v2"; + + let file_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("dumps") + .join("main") + .join(format!("{}.json", collection_name)); + + assert!( + file_path.exists(), + "Packaged data should exist for this test" + ); + + let mut api_client = MockApiClient::new(); + let storage = Storage::new(":memory:".into())?; + + let collection_url = format!( + "https://firefox.settings.services.mozilla.com/v1/buckets/main/collections/{}", + collection_name + ); + + api_client + .expect_collection_url() + .returning(move || collection_url.clone()); + api_client.expect_is_prod_server().returning(|| Ok(true)); + + let rs_client = + RemoteSettingsClient::new_from_parts(collection_name.to_string(), storage, api_client); + + let records = rs_client.get_records(false)?; + assert!(records.is_some(), "Records should exist from packaged data"); + + Ok(()) + } + + #[test] + fn test_packaged_data_newer_than_cached() -> Result<()> { + let api_client = MockApiClient::new(); + let storage = Storage::new(":memory:".into())?; + + let collection_url = "https://firefox.settings.services.mozilla.com/v1/buckets/main/collections/search-telemetry-v2"; + + // First get the packaged data to know its timestamp + let rs_client = + RemoteSettingsClient::new_from_parts("search-telemetry-v2".into(), storage, api_client); + let packaged_data = rs_client + .load_packaged_data() + .expect("Packaged data should exist"); + + // Setup older cached data + let old_record = RemoteSettingsRecord { + id: "old".to_string(), + last_modified: packaged_data.timestamp - 1000, // Ensure it's older + deleted: false, + attachment: None, + fields: serde_json::Map::new(), + }; + + let mut api_client = MockApiClient::new(); + let mut storage = Storage::new(":memory:".into())?; + storage.set_records(collection_url, &vec![old_record.clone()])?; + + api_client + .expect_collection_url() + .returning(|| collection_url.to_string()); + api_client.expect_is_prod_server().returning(|| Ok(true)); + + let rs_client = + RemoteSettingsClient::new_from_parts("search-telemetry-v2".into(), storage, api_client); + + let records = rs_client.get_records(false)?; + assert!(records.is_some()); + let records = records.unwrap(); + assert!(!records.is_empty()); + + // Verify the new records replaced old ones + let mut inner = rs_client.inner.lock(); + let cached = inner.storage.get_records(collection_url)?.unwrap(); + assert!(cached[0].last_modified > old_record.last_modified); + assert_eq!(cached.len(), packaged_data.data.len()); + + Ok(()) + } + + #[test] + fn test_no_cached_data_no_packaged_data_sync_if_empty_true() -> Result<()> { + let collection_name = "nonexistent-collection"; // A collection without packaged data + + // Verify the packaged data file does not exist + let file_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("dumps") + .join("main") + .join(format!("{}.json", collection_name)); + + assert!( + !file_path.exists(), + "Packaged data should not exist for this test" + ); + + let mut api_client = MockApiClient::new(); + let storage = Storage::new(":memory:".into())?; + + let collection_url = format!( + "https://firefox.settings.services.mozilla.com/v1/buckets/main/collections/{}", + collection_name + ); + + api_client + .expect_collection_url() + .returning(move || collection_url.clone()); + api_client.expect_is_prod_server().returning(|| Ok(true)); + + // Mock get_records to return some data + let expected_records = vec![RemoteSettingsRecord { + id: "remote".to_string(), + last_modified: 1000, + deleted: false, + attachment: None, + fields: serde_json::Map::new(), + }]; + api_client + .expect_get_records() + .withf(|timestamp| timestamp.is_none()) + .returning(move |_| Ok(expected_records.clone())); + + let rs_client = + RemoteSettingsClient::new_from_parts(collection_name.to_string(), storage, api_client); + + // Call get_records with sync_if_empty = true + let records = rs_client.get_records(true)?; + assert!( + records.is_some(), + "Records should be fetched from the remote server" + ); + let records = records.unwrap(); + assert_eq!(records.len(), 1); + assert_eq!(records[0].id, "remote"); + + Ok(()) + } + + #[test] + fn test_no_cached_data_no_packaged_data_sync_if_empty_false() -> Result<()> { + let collection_name = "nonexistent-collection"; // A collection without packaged data + + // Verify the packaged data file does not exist + let file_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("dumps") + .join("main") + .join(format!("{}.json", collection_name)); + + assert!( + !file_path.exists(), + "Packaged data should not exist for this test" + ); + + let mut api_client = MockApiClient::new(); + let storage = Storage::new(":memory:".into())?; + + let collection_url = format!( + "https://firefox.settings.services.mozilla.com/v1/buckets/main/collections/{}", + collection_name + ); + + api_client + .expect_collection_url() + .returning(move || collection_url.clone()); + api_client.expect_is_prod_server().returning(|| Ok(true)); + + // Since sync_if_empty is false, get_records should not be called + // No need to set expectation for api_client.get_records + + let rs_client = + RemoteSettingsClient::new_from_parts(collection_name.to_string(), storage, api_client); + + // Call get_records with sync_if_empty = false + let records = rs_client.get_records(false)?; + assert!( + records.is_none(), + "Records should be None when no cache, no packaged data, and sync_if_empty is false" + ); + + Ok(()) + } + + #[test] + fn test_cached_data_exists_and_not_empty() -> Result<()> { + let collection_name = "test-collection"; + let mut api_client = MockApiClient::new(); + let mut storage = Storage::new(":memory:".into())?; + + let collection_url = format!( + "https://firefox.settings.services.mozilla.com/v1/buckets/main/collections/{}", + collection_name + ); + + // Set up cached records + let cached_records = vec![RemoteSettingsRecord { + id: "cached1".to_string(), + last_modified: 500, + deleted: false, + attachment: None, + fields: serde_json::Map::new(), + }]; + storage.set_records(&collection_url, &cached_records)?; + + api_client + .expect_collection_url() + .returning(move || collection_url.clone()); + api_client.expect_is_prod_server().returning(|| Ok(true)); + + let rs_client = + RemoteSettingsClient::new_from_parts(collection_name.to_string(), storage, api_client); + + // Call get_records with any sync_if_empty value + let records = rs_client.get_records(true)?; + assert!( + records.is_some(), + "Records should be returned from the cached data" + ); + let records = records.unwrap(); + assert_eq!(records.len(), 1); + assert_eq!(records[0].id, "cached1"); + + Ok(()) + } + + #[test] + fn test_cached_data_empty_sync_if_empty_false() -> Result<()> { + let collection_name = "test-collection"; + let mut api_client = MockApiClient::new(); + let mut storage = Storage::new(":memory:".into())?; + + let collection_url = format!( + "https://firefox.settings.services.mozilla.com/v1/buckets/main/collections/{}", + collection_name + ); + + // Set up empty cached records + let cached_records: Vec = vec![]; + storage.set_records(&collection_url, &cached_records)?; + + api_client + .expect_collection_url() + .returning(move || collection_url.clone()); + api_client.expect_is_prod_server().returning(|| Ok(true)); + + let rs_client = + RemoteSettingsClient::new_from_parts(collection_name.to_string(), storage, api_client); + + // Call get_records with sync_if_empty = false + let records = rs_client.get_records(false)?; + assert!(records.is_some(), "Empty cached records should be returned"); + let records = records.unwrap(); + assert!(records.is_empty(), "Cached records should be empty"); + + Ok(()) + } + + #[test] + fn test_cached_data_empty_sync_if_empty_true() -> Result<()> { + let collection_name = "test-collection"; + let mut api_client = MockApiClient::new(); + let mut storage = Storage::new(":memory:".into())?; + + let collection_url = format!( + "https://firefox.settings.services.mozilla.com/v1/buckets/main/collections/{}", + collection_name + ); + + // Mock get_records to return some data + let expected_records = vec![RemoteSettingsRecord { + id: "remote1".to_string(), + last_modified: 1000, + deleted: false, + attachment: None, + fields: serde_json::Map::new(), + }]; + api_client + .expect_get_records() + .withf(|timestamp| timestamp.is_none()) + .returning(move |_| Ok(expected_records.clone())); + api_client.expect_is_prod_server().returning(|| Ok(true)); + + // Set up empty cached records + let cached_records: Vec = vec![]; + storage.set_records(&collection_url, &cached_records)?; + + api_client + .expect_collection_url() + .returning(move || collection_url.clone()); + + let rs_client = + RemoteSettingsClient::new_from_parts(collection_name.to_string(), storage, api_client); + + // Call get_records with sync_if_empty = true + let records = rs_client.get_records(true)?; + assert!( + records.is_some(), + "Records should be fetched from the remote server" + ); + let records = records.unwrap(); + assert_eq!(records.len(), 1); + assert_eq!(records[0].id, "remote1"); + + Ok(()) + } +} diff --git a/components/remote_settings/src/config.rs b/components/remote_settings/src/config.rs index 9d289863c..7540f9e50 100644 --- a/components/remote_settings/src/config.rs +++ b/components/remote_settings/src/config.rs @@ -64,7 +64,7 @@ impl RemoteSettingsServer { /// /// The difference is that it uses `Error` instead of `ApiError`. This is what we need to use /// inside the crate. - pub(crate) fn get_url(&self) -> Result { + pub fn get_url(&self) -> Result { Ok(match self { Self::Prod => Url::parse("https://firefox.settings.services.mozilla.com/v1")?, Self::Stage => Url::parse("https://firefox.settings.services.allizom.org/v1")?, diff --git a/components/remote_settings/src/service.rs b/components/remote_settings/src/service.rs index c8466d71d..552d9a581 100644 --- a/components/remote_settings/src/service.rs +++ b/components/remote_settings/src/service.rs @@ -64,6 +64,7 @@ impl RemoteSettingsService { ) -> Result> { let mut inner = self.inner.lock(); let storage = Storage::new(inner.storage_dir.join(format!("{collection_name}.sql")))?; + let client = Arc::new(RemoteSettingsClient::new( inner.base_url.clone(), inner.bucket_name.clone(), diff --git a/components/remote_settings/src/storage.rs b/components/remote_settings/src/storage.rs index 0756be7e8..46e4ab4f3 100644 --- a/components/remote_settings/src/storage.rs +++ b/components/remote_settings/src/storage.rs @@ -133,7 +133,7 @@ impl Storage { pub fn set_records( &mut self, collection_url: &str, - records: &[RemoteSettingsRecord], + records: &Vec, ) -> Result<()> { let tx = self.conn.transaction()?; @@ -282,7 +282,7 @@ mod tests { let collection_url = "https://example.com/api"; // Set empty records - storage.set_records(collection_url, &[])?; + storage.set_records(collection_url, &Vec::::default())?; // Get records let fetched_records = storage.get_records(collection_url)?; diff --git a/examples/remote-settings-cli/Cargo.toml b/examples/remote-settings-cli/Cargo.toml index 3d6bffe25..81ac77a4a 100644 --- a/examples/remote-settings-cli/Cargo.toml +++ b/examples/remote-settings-cli/Cargo.toml @@ -5,6 +5,10 @@ license = "MPL-2.0" edition = "2021" publish = false +[lib] +name = "dump" +path = "src/dump/lib.rs" + [dependencies] remote_settings = { path = "../../components/remote_settings" } viaduct-reqwest = { path = "../../components/support/viaduct-reqwest" } @@ -12,3 +16,11 @@ log = "0.4" clap = {version = "4.2", features = ["derive"]} anyhow = "1.0" env_logger = { version = "0.10", default-features = false, features = ["humantime"] } +reqwest = { version = "0.12", features = ["json"] } +serde_json = "1" +futures = "0.3" +indicatif = "0.17" +tokio = { version = "1.29.1", features = ["full"] } +serde = { version = "1", features = ["derive"] } +thiserror = "1.0.31" +walkdir = "2.4.0" \ No newline at end of file diff --git a/examples/remote-settings-cli/src/dump/client.rs b/examples/remote-settings-cli/src/dump/client.rs new file mode 100644 index 000000000..04dc3459b --- /dev/null +++ b/examples/remote-settings-cli/src/dump/client.rs @@ -0,0 +1,332 @@ +use crate::error::*; +use futures::{stream::FuturesUnordered, StreamExt}; +use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; +use serde::de::Error; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use std::{path::PathBuf, sync::Arc}; +use walkdir::WalkDir; + +const DUMPS_DIR: &str = "dumps"; + +pub struct CollectionDownloader { + client: reqwest::Client, + multi_progress: Arc, + output_dir: PathBuf, +} + +#[derive(Deserialize, Serialize)] +pub struct CollectionData { + data: Vec, + timestamp: u64, +} + +pub struct UpdateResult { + updated: Vec, + up_to_date: Vec, + not_found: Vec, +} + +impl CollectionDownloader { + pub fn new(root_path: PathBuf) -> Self { + let output_dir = if root_path.ends_with("components/remote_settings") { + root_path + } else { + root_path.join("components").join("remote_settings") + }; + + Self { + client: reqwest::Client::new(), + multi_progress: Arc::new(MultiProgress::new()), + output_dir, + } + } + + pub async fn run(&self, dry_run: bool, create_pr: bool) -> Result<()> { + if dry_run && create_pr { + return Err(RemoteSettingsError::Git( + "Cannot use --dry-run with --create-pr".to_string(), + ) + .into()); + } + + let result = self.download_all().await?; + + if dry_run { + println!("\nDry run summary:"); + println!("- Would update {} collections", result.updated.len()); + println!( + "- {} collections already up to date", + result.up_to_date.len() + ); + println!( + "- {} collections not found on remote", + result.not_found.len() + ); + return Ok(()); + } + + println!("\nExecution summary:"); + if !result.updated.is_empty() { + println!("Updated collections:"); + for collection in &result.updated { + println!(" - {}", collection); + } + } + + if !result.up_to_date.is_empty() { + println!("Collections already up to date:"); + for collection in &result.up_to_date { + println!(" - {}", collection); + } + } + + if !result.not_found.is_empty() { + println!("Collections not found on remote:"); + for collection in &result.not_found { + println!(" - {}", collection); + } + } + + if !result.updated.is_empty() && create_pr { + self.create_pull_request()?; + } + + Ok(()) + } + + fn create_pull_request(&self) -> Result<()> { + let git_ops = crate::git::GitOps::new( + self.output_dir + .parent() + .unwrap() + .parent() + .unwrap() + .to_path_buf(), + ); + + let branch_name = "remote-settings-update-dumps"; + + git_ops.create_branch(branch_name)?; + git_ops.commit_changes()?; + git_ops.push_branch(branch_name)?; + Ok(()) + } + + fn scan_local_dumps(&self) -> Result> { + let mut collections = HashMap::new(); + let dumps_dir = self.output_dir.join(DUMPS_DIR); + + for entry in WalkDir::new(dumps_dir).min_depth(2).max_depth(2) { + let entry = entry?; + if entry.file_type().is_file() + && entry.path().extension().map_or(false, |ext| ext == "json") + { + // Get bucket name from parent directory + let bucket = entry + .path() + .parent() + .and_then(|p| p.file_name()) + .and_then(|n| n.to_str()) + .ok_or_else(|| RemoteSettingsError::Path("Invalid bucket path".into()))?; + + // Get collection name from filename + let collection_name = entry + .path() + .file_stem() + .and_then(|n| n.to_str()) + .ok_or_else(|| RemoteSettingsError::Path("Invalid collection name".into()))?; + + // Read and parse the file to get timestamp + let content = std::fs::read_to_string(entry.path())?; + let data: serde_json::Value = serde_json::from_str(&content)?; + let timestamp = data["timestamp"].as_u64().ok_or_else(|| { + RemoteSettingsError::Json(serde_json::Error::custom("No timestamp found")) + })?; + + collections.insert( + format!("{}/{}", bucket, collection_name), + (bucket.to_string(), timestamp), + ); + } + } + Ok(collections) + } + + async fn fetch_timestamps(&self) -> Result> { + let monitor_url = format!( + "{}/buckets/monitor/collections/changes/records", + "https://firefox.settings.services.mozilla.com/v1" + ); + let monitor_response: Value = self.client.get(&monitor_url).send().await?.json().await?; + + Ok(monitor_response["data"] + .as_array() + .ok_or_else(|| { + RemoteSettingsError::Json(serde_json::Error::custom( + "No data array in monitor response", + )) + })? + .iter() + .filter_map(|record| { + let bucket = record["bucket"].as_str()?; + let collection_name = record["collection"].as_str()?; + Some(( + format!("{}/{}", bucket, collection_name), + record["last_modified"].as_u64()?, + )) + }) + .collect()) + } + + async fn fetch_collection( + &self, + collection_name: String, + last_modified: u64, + pb: ProgressBar, + ) -> Result<(String, CollectionData)> { + let parts: Vec<&str> = collection_name.split('/').collect(); + if parts.len() != 2 { + return Err(RemoteSettingsError::Json(serde_json::Error::custom( + "Invalid collection name format", + )) + .into()); + } + let (bucket, name) = (parts[0], parts[1]); + + let url = format!( + "{}/buckets/{}/collections/{}/changeset?_expected={}", + "https://firefox.settings.services.mozilla.com/v1", bucket, name, last_modified + ); + + pb.set_message(format!("Downloading {}", name)); + + let response = self.client.get(&url).send().await?; + let changeset: Value = response.json().await?; + + let timestamp = changeset["timestamp"].as_u64().ok_or_else(|| { + RemoteSettingsError::Json(serde_json::Error::custom("No timestamp in changeset")) + })?; + + pb.finish_with_message(format!("Downloaded {}", name)); + + Ok(( + collection_name, + CollectionData { + data: changeset["changes"] + .as_array() + .unwrap_or(&Vec::new()) + .to_vec(), + timestamp, + }, + )) + } + + pub async fn download_all(&self) -> Result { + std::fs::create_dir_all(self.output_dir.join(DUMPS_DIR))?; + + let local_collections = self.scan_local_dumps()?; + if local_collections.is_empty() { + println!( + "No local collections found in {:?}", + self.output_dir.join(DUMPS_DIR) + ); + return Ok(UpdateResult { + updated: vec![], + up_to_date: vec![], + not_found: vec![], + }); + } + + let remote_timestamps = self.fetch_timestamps().await?; + let mut futures = FuturesUnordered::new(); + let mut up_to_date = Vec::new(); + let mut not_found = Vec::new(); + + // Only check collections we have locally + for (collection_key, (_, local_timestamp)) in local_collections { + let remote_timestamp = match remote_timestamps.get(&collection_key) { + Some(×tamp) => timestamp, + None => { + println!("Warning: Collection {} not found on remote", collection_key); + not_found.push(collection_key); + continue; + } + }; + + let pb = self.multi_progress.add(ProgressBar::new(100)); + pb.set_style( + ProgressStyle::default_bar() + .template("[{elapsed_precise}] {bar:40.cyan/blue} {msg}") + .unwrap(), + ); + + if local_timestamp >= remote_timestamp { + println!("Collection {} is up to date", collection_key); + up_to_date.push(collection_key); + continue; + } + + println!("Collection {} needs update", collection_key); + futures.push(self.fetch_collection(collection_key.clone(), remote_timestamp, pb)); + } + + let mut updated = Vec::new(); + while let Some(result) = futures.next().await { + let (collection, data) = result?; + self.write_collection_file(&collection, &data)?; + updated.push(collection); + } + + Ok(UpdateResult { + updated, + up_to_date, + not_found, + }) + } + + pub async fn download_single(&self, bucket: &str, collection_name: &str) -> Result<()> { + std::fs::create_dir_all(self.output_dir.join(DUMPS_DIR))?; + + let collection_key = format!("{}/{}", bucket, collection_name); + let pb = self.multi_progress.add(ProgressBar::new(100)); + pb.set_style( + ProgressStyle::default_bar() + .template("[{elapsed_precise}] {bar:40.cyan/blue} {msg}") + .unwrap(), + ); + + let (_, data) = self.fetch_collection(collection_key.clone(), 0, pb).await?; + + // Write to file + self.write_collection_file(&collection_key, &data)?; + + println!( + "Successfully downloaded collection to {:?}/dumps/{}/{}.json", + self.output_dir, bucket, collection_name + ); + + Ok(()) + } + + fn write_collection_file(&self, collection: &str, data: &CollectionData) -> Result<()> { + let parts: Vec<&str> = collection.split('/').collect(); + if parts.len() != 2 { + return Err(RemoteSettingsError::Path("Invalid collection path".into()).into()); + } + let (bucket, name) = (parts[0], parts[1]); + + // Write to dumps directory + let dumps_path = self + .output_dir + .join(DUMPS_DIR) + .join(bucket) + .join(format!("{}.json", name)); + + std::fs::create_dir_all(dumps_path.parent().unwrap())?; + std::fs::write(&dumps_path, serde_json::to_string_pretty(&data)?)?; + + Ok(()) + } +} diff --git a/examples/remote-settings-cli/src/dump/error.rs b/examples/remote-settings-cli/src/dump/error.rs new file mode 100644 index 000000000..9bd076a54 --- /dev/null +++ b/examples/remote-settings-cli/src/dump/error.rs @@ -0,0 +1,17 @@ +use thiserror::Error; + +pub type Result = anyhow::Result; + +#[derive(Error, Debug)] +pub enum RemoteSettingsError { + #[error("Network error: {0}")] + Network(#[from] reqwest::Error), + #[error("IO error: {0}")] + IO(#[from] std::io::Error), + #[error("JSON error: {0}")] + Json(#[from] serde_json::Error), + #[error("Git operation failed: {0}")] + Git(String), + #[error("Cannot find local dump: {0}")] + Path(String), +} diff --git a/examples/remote-settings-cli/src/dump/git.rs b/examples/remote-settings-cli/src/dump/git.rs new file mode 100644 index 000000000..c74f34b5d --- /dev/null +++ b/examples/remote-settings-cli/src/dump/git.rs @@ -0,0 +1,73 @@ +use anyhow::{Context, Result}; +use std::path::PathBuf; + +pub(crate) struct GitOps { + pub(crate) root_path: PathBuf, +} + +impl GitOps { + pub(crate) fn new(root_path: PathBuf) -> Self { + Self { root_path } + } + + pub(crate) fn create_branch(&self, name: &str) -> Result<()> { + let status = std::process::Command::new("git") + .args(["checkout", "-b", name]) + .current_dir(&self.root_path) + .status() + .context("Failed to create branch")?; + + if !status.success() { + anyhow::bail!("Failed to create branch"); + } + + Ok(()) + } + + pub(crate) fn commit_changes(&self) -> Result<()> { + let status = std::process::Command::new("git") + .args(["add", "."]) + .current_dir(&self.root_path) + .status() + .context("Failed to stage changes")?; + + if !status.success() { + anyhow::bail!("Failed to stage changes"); + } + + let status = std::process::Command::new("git") + .args([ + "commit", + "-m", "Update Remote Settings defaults\n\nAutomated update of Remote Settings default values" + ]) + .current_dir(&self.root_path) + .status() + .context("Failed to commit changes")?; + + if !status.success() { + anyhow::bail!("Failed to commit changes"); + } + + Ok(()) + } + + pub(crate) fn push_branch(&self, name: &str) -> Result<()> { + let status = std::process::Command::new("git") + .args(["push", "origin", name]) + .current_dir(&self.root_path) + .status() + .context("Failed to push branch")?; + + if !status.success() { + anyhow::bail!("Failed to push branch"); + } + + println!("Branch '{}' has been pushed to origin.", name); + println!( + "You can create a PR at: https://github.com/mozilla/application-services/pull/new/{}", + name + ); + + Ok(()) + } +} diff --git a/examples/remote-settings-cli/src/dump/lib.rs b/examples/remote-settings-cli/src/dump/lib.rs new file mode 100644 index 000000000..240bc0a16 --- /dev/null +++ b/examples/remote-settings-cli/src/dump/lib.rs @@ -0,0 +1,3 @@ +pub mod client; +pub(crate) mod error; +pub(crate) mod git; diff --git a/examples/remote-settings-cli/src/main.rs b/examples/remote-settings-cli/src/main.rs index 6bdc74a99..2369bb03e 100644 --- a/examples/remote-settings-cli/src/main.rs +++ b/examples/remote-settings-cli/src/main.rs @@ -4,7 +4,9 @@ use anyhow::Result; use clap::{Parser, Subcommand, ValueEnum}; +use std::path::PathBuf; +use dump::client::CollectionDownloader; use remote_settings::{RemoteSettingsConfig2, RemoteSettingsServer, RemoteSettingsService}; const DEFAULT_LOG_FILTER: &str = "remote_settings=info"; @@ -45,9 +47,38 @@ enum Commands { #[arg(long)] sync_if_empty: bool, }, + /// Download and combine all remote settings collections + DumpSync { + /// Root path of the repository + #[arg(short, long, default_value = ".")] + path: PathBuf, + + /// Dry run - don't write any files + #[arg(long, default_value_t = false)] + dry_run: bool, + + /// Create a PR with the changes + #[arg(long, default_value_t = false)] + create_pr: bool, + }, + /// Download a single collection to the dumps directory + DumpGet { + /// Bucket name + #[arg(long, required = true)] + bucket: String, + + /// Collection name + #[arg(long, required = true)] + collection_name: String, + + /// Root path of the repository + #[arg(short, long, default_value = ".")] + path: PathBuf, + }, } -fn main() -> Result<()> { +#[tokio::main] +async fn main() -> Result<()> { let cli = Cli::parse(); env_logger::init_from_env(env_logger::Env::default().filter_or( "RUST_LOG", @@ -65,6 +96,22 @@ fn main() -> Result<()> { collection, sync_if_empty, } => get_records(service, collection, sync_if_empty), + Commands::DumpSync { + path, + dry_run, + create_pr, + } => { + let downloader = CollectionDownloader::new(path); + downloader.run(dry_run, create_pr).await + } + Commands::DumpGet { + bucket, + collection_name, + path, + } => { + let downloader = CollectionDownloader::new(path); + downloader.download_single(&bucket, &collection_name).await + } } }