From 388f78be19aea8ff2a6044a222f4c31826fbf36b Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Wed, 29 Jul 2020 14:22:23 -0400 Subject: [PATCH] 1641148: Integrate Python and Rust logging (#1052) * 1641148: Integrate Python and Rust logging This works by setting up a named pipe between Python and Rust logging. This requires adding another thread to the Python side to read from the log. The overhead of that *could* be problematic. I have no idea if this approach will work on Windows. This at least *does* work -- any callback-based approach has been ruled out due to limitations of CFFI or the need to go to a compiling-against- a-specific-version-of-Python approach (see details in bug). * hangle -> handle * Rename and better document function --- CHANGELOG.md | 1 + Cargo.lock | 1 + .../setting-up-python-build-environment.md | 16 +--- glean-core/ffi/Cargo.toml | 1 + glean-core/ffi/glean.h | 14 ++++ glean-core/ffi/src/fd_logger.rs | 83 +++++++++++++++++++ glean-core/ffi/src/lib.rs | 39 +++++++++ glean-core/ios/Glean/GleanFfi.h | 14 ++++ glean-core/python/glean/_ffi.py | 43 +++++++++- 9 files changed, 196 insertions(+), 16 deletions(-) create mode 100644 glean-core/ffi/src/fd_logger.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 80c01a539..d99360311 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Add support for the Counter metric type and Labeled Counter. * Python * **Breaking change:** `data_dir` must always be passed to `Glean.initialize`. Prior to this, a missing value would store Glean data in a temporary directory. + * Logging messages from the Rust core are now sent through Python's standard library `logging` module. Therefore all logging in a Python application can be controlled through the `logging` module interface. # v31.6.0 (2020-07-24) diff --git a/Cargo.lock b/Cargo.lock index af6a78833..edb304552 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -340,6 +340,7 @@ dependencies = [ "glean-core 31.6.0", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "once_cell 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)", "uuid 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/docs/dev/python/setting-up-python-build-environment.md b/docs/dev/python/setting-up-python-build-environment.md index 2359ecaa0..2253adee9 100644 --- a/docs/dev/python/setting-up-python-build-environment.md +++ b/docs/dev/python/setting-up-python-build-environment.md @@ -118,11 +118,7 @@ You can send extra parameters to the `py.test` command by setting the `PYTEST_AR ## Viewing logging output -The Glean Python bindings have two sources of log messages: those that come from Python and those that come from Rust. - -### Python log messages - -Python log messages are emitted using the Python standard library's [`logging` module](https://docs.python.org/3/library/logging.html). +Log messages (whether originating in Python or Rust) are emitted using the Python standard library's [`logging` module](https://docs.python.org/3/library/logging.html). This module provides a lot of possibilities for customization, but the easiest way to control the log level globally is with [`logging.basicConfig`](https://docs.python.org/3/library/logging.html#logging.basicConfig): ```python @@ -130,16 +126,6 @@ import logging logging.basicConfig(level=logging.DEBUG) ``` -### Rust log messages - -Rust log messages are emitted using [`env_logger`](https://docs.rs/env_logger/latest/env_logger/). -The log level can be controlled with the `RUST_LOG` environment variable: - -```python -import os -os.environ["RUST_LOG"] = "glean_core=DEBUG" -``` - ## Linting, formatting and type checking The Glean Python bindings use the following tools: diff --git a/glean-core/ffi/Cargo.toml b/glean-core/ffi/Cargo.toml index 2e3384ff7..b79fcdaa1 100644 --- a/glean-core/ffi/Cargo.toml +++ b/glean-core/ffi/Cargo.toml @@ -29,6 +29,7 @@ crate-type = ["lib", "staticlib", "cdylib"] [dependencies] ffi-support = "0.4.0" log = "0.4.8" +serde = { version = "1.0.104", features = ["derive"] } serde_json = "1.0.44" uuid = { version = "0.8.1", features = ["v4"] } once_cell = "1.2.0" diff --git a/glean-core/ffi/glean.h b/glean-core/ffi/glean.h index a2c7db5dc..d961cd031 100644 --- a/glean-core/ffi/glean.h +++ b/glean-core/ffi/glean.h @@ -402,6 +402,20 @@ void glean_destroy_uuid_metric(uint64_t v); */ void glean_enable_logging(void); +/** + * Initialize the logging system to send JSON messages to a file descriptor + * (Unix) or file handle (Windows). + * + * Not available on Android and iOS. + * + * `fd` is a writable file descriptor (on Unix) or file handle (on Windows). + * + * # Safety + * Unsafe because the fd u64 passed in will be interpreted as either a file + * descriptor (Unix) or file handle (Windows) without any checking. + */ +void glean_enable_logging_to_fd(uint64_t fd); + void glean_event_record(uint64_t metric_id, uint64_t timestamp, RawIntArray extra_keys, diff --git a/glean-core/ffi/src/fd_logger.rs b/glean-core/ffi/src/fd_logger.rs new file mode 100644 index 000000000..ad03aff76 --- /dev/null +++ b/glean-core/ffi/src/fd_logger.rs @@ -0,0 +1,83 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fs::File; +use std::io::Write; +use std::sync::RwLock; + +#[cfg(target_os = "windows")] +use std::os::windows::io::FromRawHandle; + +#[cfg(target_os = "windows")] +use std::ffi::c_void; + +#[cfg(not(target_os = "windows"))] +use std::os::unix::io::FromRawFd; + +use serde::Serialize; + +/// An implementation of log::Log that writes log messages in JSON format to a +/// file descriptor/handle. The logging level is ignored in this implementation: +/// it is up to the receiver of these log messages (on the language binding +/// side) to filter the log messages based on their level. +/// The JSON payload of each message in an object with the following keys: +/// - `level` (string): One of the logging levels defined here: +/// https://docs.rs/log/0.4.11/log/enum.Level.html +/// - `message` (string): The logging message. +pub struct FdLogger { + pub file: RwLock, +} + +#[derive(Serialize)] +struct FdLoggingRecord { + level: String, + message: String, +} + +#[cfg(target_os = "windows")] +unsafe fn get_file_from_fd(fd: u64) -> File { + File::from_raw_handle(fd as *mut c_void) +} + +#[cfg(not(target_os = "windows"))] +unsafe fn get_file_from_fd(fd: u64) -> File { + File::from_raw_fd(fd as i32) +} + +impl FdLogger { + pub unsafe fn new(fd: u64) -> Self { + FdLogger { + file: RwLock::new(get_file_from_fd(fd)), + } + } +} + +impl log::Log for FdLogger { + fn enabled(&self, _metadata: &log::Metadata) -> bool { + // This logger always emits logging messages of any level, and the + // language binding consuming these messages is responsible for + // filtering and routing them. + true + } + + fn log(&self, record: &log::Record) { + // Normally, classes implementing the Log trait would filter based on + // the log level here. But in this case, we want to emit all log + // messages and let the logging system in the language binding filter + // and route them. + let payload = FdLoggingRecord { + level: record.level().to_string(), + message: record.args().to_string(), + }; + let _ = writeln!( + self.file.write().unwrap(), + "{}", + serde_json::to_string(&payload).unwrap() + ); + } + + fn flush(&self) { + let _ = self.file.write().unwrap().flush(); + } +} diff --git a/glean-core/ffi/src/lib.rs b/glean-core/ffi/src/lib.rs index 499e973a7..7f6318a56 100644 --- a/glean-core/ffi/src/lib.rs +++ b/glean-core/ffi/src/lib.rs @@ -9,6 +9,9 @@ use std::panic::UnwindSafe; use ffi_support::{define_string_destructor, ConcurrentHandleMap, FfiStr, IntoFfi}; +#[cfg(all(not(target_os = "android"), not(target_os = "ios")))] +use once_cell::sync::OnceCell; + pub use glean_core::metrics::MemoryUnit; pub use glean_core::metrics::TimeUnit; pub use glean_core::upload::ffi_upload_result::*; @@ -38,6 +41,9 @@ mod timing_distribution; pub mod upload; mod uuid; +#[cfg(all(not(target_os = "android"), not(target_os = "ios")))] +mod fd_logger; + use ffi_string_ext::FallibleToString; use from_raw::*; use handlemap_ext::HandleMapExtension; @@ -180,6 +186,39 @@ pub extern "C" fn glean_enable_logging() { } } +#[cfg(all(not(target_os = "android"), not(target_os = "ios")))] +static FD_LOGGER: OnceCell = OnceCell::new(); + +/// Initialize the logging system to send JSON messages to a file descriptor +/// (Unix) or file handle (Windows). +/// +/// Not available on Android and iOS. +/// +/// `fd` is a writable file descriptor (on Unix) or file handle (on Windows). +/// +/// # Safety +/// Unsafe because the fd u64 passed in will be interpreted as either a file +/// descriptor (Unix) or file handle (Windows) without any checking. +#[cfg(all(not(target_os = "android"), not(target_os = "ios")))] +#[no_mangle] +pub unsafe extern "C" fn glean_enable_logging_to_fd(fd: u64) { + // Set up logging to a file descriptor/handle. For this usage, the + // language binding should setup a pipe and pass in the descriptor to + // the writing side of the pipe as the `fd` parameter. Log messages are + // written as JSON to the file descriptor. + if FD_LOGGER.set(fd_logger::FdLogger::new(fd)).is_ok() { + // Set the level so everything goes through to the language + // binding side where it will be filtered by the language + // binding's logging system. + if log::set_logger(FD_LOGGER.get().unwrap()) + .map(|()| log::set_max_level(log::LevelFilter::Debug)) + .is_ok() + { + return; + } + }; +} + /// Configuration over FFI. /// /// **CAUTION**: This must match _exactly_ the definition on the Kotlin side. diff --git a/glean-core/ios/Glean/GleanFfi.h b/glean-core/ios/Glean/GleanFfi.h index a2c7db5dc..d961cd031 100644 --- a/glean-core/ios/Glean/GleanFfi.h +++ b/glean-core/ios/Glean/GleanFfi.h @@ -402,6 +402,20 @@ void glean_destroy_uuid_metric(uint64_t v); */ void glean_enable_logging(void); +/** + * Initialize the logging system to send JSON messages to a file descriptor + * (Unix) or file handle (Windows). + * + * Not available on Android and iOS. + * + * `fd` is a writable file descriptor (on Unix) or file handle (on Windows). + * + * # Safety + * Unsafe because the fd u64 passed in will be interpreted as either a file + * descriptor (Unix) or file handle (Windows) without any checking. + */ +void glean_enable_logging_to_fd(uint64_t fd); + void glean_event_record(uint64_t metric_id, uint64_t timestamp, RawIntArray extra_keys, diff --git a/glean-core/python/glean/_ffi.py b/glean-core/python/glean/_ffi.py index 6bce7301a..b65f8b920 100644 --- a/glean-core/python/glean/_ffi.py +++ b/glean-core/python/glean/_ffi.py @@ -2,8 +2,12 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +import json +import logging +import os from pathlib import Path import sys +import threading from typing import Any, List, Optional import weakref @@ -31,7 +35,44 @@ _global_weakkeydict: Any = weakref.WeakKeyDictionary() lib = ffi.dlopen(str(Path(__file__).parent / get_shared_object_filename())) -lib.glean_enable_logging() + + +def setup_logging(): + """ + Sets up a pipe for communication of logging messages from the Rust core to + the Python logging system. A thread is created to listen for messages on + the pipe, convert them from JSON and send them to the Python stdlib logging + module. + + Must be called after the Glean core has been dlopen'd. + """ + r, w = os.pipe() + lib.glean_enable_logging_to_fd(w) + + reader = os.fdopen(r, encoding="utf-8") + + name = "glean.rust_core" + log = logging.getLogger(name) + level_map = { + "CRITICAL": logging.CRITICAL, + "ERROR": logging.ERROR, + "WARNING": logging.WARNING, + "INFO": logging.INFO, + "DEBUG": logging.DEBUG, + } + + def log_handler(): + while True: + data = reader.readline().rstrip() + json_content = json.loads(data) + log.log(level_map.get(json_content["level"], 0), json_content["message"]) + + logging_thread = threading.Thread(target=log_handler) + logging_thread.daemon = True + logging_thread.start() + + +setup_logging() def make_config(