Skip to main content

iroh_services/
logs.rs

1//! Client-side log collection: a `tracing-subscriber` layer that writes
2//! structured log records to rolling files on the local filesystem, plus a
3//! reload handle that lets the cloud control the level filter at runtime.
4//!
5//! # The cloud is the source of truth for the level
6//!
7//! The level filter starts at `off`. No tracing events are captured until the
8//! cloud pushes a [`crate::protocol::SetLogLevel`] over the [`ClientHost`]
9//! channel. The cloud sends one immediately after authenticating a connected
10//! endpoint, derived from the per-endpoint `endpoint_log_settings` row (when
11//! present) plus the project default.
12//!
13//! [`ClientHost`]: crate::ClientHost
14//!
15//! # Files live on the device
16//!
17//! Records land in rolling JSON files under a caller-supplied directory.
18//! Operators view, ship, or aggregate them with whatever tooling they
19//! already use (`tail`, `journalctl`, `vector`, etc.).
20//!
21//! # Typical usage
22//!
23//! ```no_run
24//! use iroh_services::logs::{self, FileLoggerConfig};
25//!
26//! # fn main() -> anyhow::Result<()> {
27//! // Installs a global subscriber. The filter starts at `off`; the
28//! // Client pulls the cloud-persisted directive right after Auth and
29//! // applies it via the collector. The dashboard can also push live
30//! // overrides via ClientHost::set_log_level after that.
31//! let (collector, _guard) = logs::install(FileLoggerConfig::new("./logs"))?;
32//! # let _ = collector;
33//! # Ok(())
34//! # }
35//! ```
36//!
37//! Compose with additional layers (for example, a stderr fmt layer) via
38//! [`layer`]:
39//!
40//! ```no_run
41//! use iroh_services::logs::{self, FileLoggerConfig};
42//! use tracing_subscriber::prelude::*;
43//!
44//! # fn main() -> anyhow::Result<()> {
45//! let (collector, file_layer, _guard) = logs::layer(FileLoggerConfig::new("./logs"))?;
46//! tracing_subscriber::registry()
47//!     .with(file_layer)
48//!     .with(tracing_subscriber::fmt::layer())
49//!     .try_init()
50//!     .ok();
51//! # let _ = collector;
52//! # Ok(())
53//! # }
54//! ```
55
56use std::{
57    path::{Path, PathBuf},
58    sync::{Arc, Mutex},
59};
60
61use n0_future::{
62    task::{AbortOnDropHandle, JoinHandle},
63    time::Duration,
64};
65use tracing::{Subscriber, debug, warn};
66use tracing_subscriber::{
67    EnvFilter, Layer, Registry, layer::SubscriberExt as _, registry::LookupSpan, reload,
68    util::SubscriberInitExt as _,
69};
70
71/// Errors that can occur while installing the log collector.
72#[derive(Debug, thiserror::Error)]
73pub enum InstallError {
74    /// The default tracing dispatcher is already set; install once at startup.
75    #[error("global tracing dispatcher is already set")]
76    AlreadyInstalled,
77    /// File logger setup failed (could not create directory, open appender,
78    /// etc.).
79    #[error("file logger setup failed: {0}")]
80    FileLogger(#[from] FileLoggerError),
81}
82
83/// Errors that can occur while changing the active filter at runtime.
84#[derive(Debug, thiserror::Error)]
85pub enum SetFilterError {
86    /// The supplied directives string was rejected by `EnvFilter`.
87    #[error("invalid filter directives: {0}")]
88    InvalidDirectives(String),
89    /// Reloading the filter failed because the subscriber went away.
90    #[error("reload handle is no longer valid")]
91    ReloadFailed,
92}
93
94/// Handle to the cloud-controlled tracing filter. Cheap to clone; all clones
95/// share the same backing reload handle.
96#[derive(Clone)]
97pub struct LogCollector {
98    inner: Arc<CollectorInner>,
99}
100
101struct CollectorInner {
102    reload_handle: reload::Handle<EnvFilter, Registry>,
103    revert_task: Mutex<Option<AbortOnDropHandle<()>>>,
104    /// Directory where the rolling file appender writes. Used by
105    /// [`LogCollector::serve_fetch_logs`] to locate the current file.
106    log_dir: PathBuf,
107    /// Filename prefix the rolling appender uses; the date suffix is
108    /// appended for each rolled-over file.
109    file_name_prefix: String,
110}
111
112/// Off-state directive. Nothing is captured until the cloud sends a
113/// `SetLogLevel` with something more permissive.
114const OFF_DIRECTIVES: &str = "off";
115
116impl LogCollector {
117    /// Sets the active filter directives. When `expires_in` is set,
118    /// schedules a revert after that duration. The revert target is
119    /// `revert_to` when supplied; `None` means revert to `off`.
120    pub fn set_filter(
121        &self,
122        directives: &str,
123        expires_in: Option<Duration>,
124        revert_to: Option<&str>,
125    ) -> Result<(), SetFilterError> {
126        let filter = EnvFilter::try_new(directives)
127            .map_err(|e| SetFilterError::InvalidDirectives(e.to_string()))?;
128        self.inner
129            .reload_handle
130            .reload(filter)
131            .map_err(|_| SetFilterError::ReloadFailed)?;
132
133        let mut guard = self.inner.revert_task.lock().expect("poisoned");
134        *guard = None;
135
136        if let Some(expires_in) = expires_in {
137            let collector = self.clone();
138            let revert_to = revert_to.map(str::to_string);
139            let handle: JoinHandle<()> = n0_future::task::spawn(async move {
140                n0_future::time::sleep(expires_in).await;
141                let target = revert_to.as_deref();
142                if let Err(err) = collector.revert(target) {
143                    warn!(?err, "failed to revert log filter");
144                }
145            });
146            *guard = Some(AbortOnDropHandle::new(handle));
147        }
148        Ok(())
149    }
150
151    /// Reverts the active filter to `to`, or to the off state when `to` is
152    /// `None`.
153    pub fn revert(&self, to: Option<&str>) -> Result<(), SetFilterError> {
154        let directives = to.unwrap_or(OFF_DIRECTIVES);
155        let filter = EnvFilter::try_new(directives)
156            .map_err(|e| SetFilterError::InvalidDirectives(e.to_string()))?;
157        self.inner
158            .reload_handle
159            .reload(filter)
160            .map_err(|_| SetFilterError::ReloadFailed)
161    }
162
163    /// Locate the newest rolling file in the configured log directory
164    /// whose name starts with the configured filename prefix. Returns
165    /// `Ok(None)` when the directory exists but no matching file is
166    /// present. Used by [`crate::ClientHost`] to serve [`FetchLogs`].
167    ///
168    /// [`FetchLogs`]: crate::protocol::FetchLogs
169    pub(crate) fn current_log_file(&self) -> std::io::Result<Option<PathBuf>> {
170        let dir = &self.inner.log_dir;
171        let prefix = &self.inner.file_name_prefix;
172        let entries = match std::fs::read_dir(dir) {
173            Ok(e) => e,
174            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
175            Err(err) => return Err(err),
176        };
177        let mut best: Option<(PathBuf, std::time::SystemTime)> = None;
178        for entry in entries.flatten() {
179            let name = entry.file_name();
180            let name = name.to_string_lossy();
181            if !name.starts_with(prefix) {
182                continue;
183            }
184            let Ok(meta) = entry.metadata() else { continue };
185            if !meta.is_file() {
186                continue;
187            }
188            let mtime = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);
189            match &best {
190                Some((_, current)) if *current >= mtime => {}
191                _ => best = Some((entry.path(), mtime)),
192            }
193        }
194        Ok(best.map(|(p, _)| p))
195    }
196}
197
198impl std::fmt::Debug for LogCollector {
199    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200        f.debug_struct("LogCollector").finish_non_exhaustive()
201    }
202}
203
204/// Installs a global tracing subscriber whose only output is a rolling
205/// file appender under `config.dir`. The level filter starts at `off`; the
206/// cloud must push a `SetLogLevel` for any events to be captured.
207///
208/// Returns the [`LogCollector`] to hand to [`crate::ClientHost`] so it can
209/// apply runtime overrides, and a [`WorkerGuard`] the caller must hold for
210/// the lifetime of the process so the non-blocking writer flushes on exit.
211///
212/// Call exactly once at process start. For composition with other layers,
213/// use [`layer`].
214pub fn install(config: FileLoggerConfig) -> Result<(LogCollector, WorkerGuard), InstallError> {
215    let (collector, file_layer, guard) = layer(config)?;
216    tracing_subscriber::registry()
217        .with(file_layer)
218        .try_init()
219        .map_err(|_| InstallError::AlreadyInstalled)?;
220    debug!("iroh-services file logger installed");
221    Ok((collector, guard))
222}
223
224/// Builds the cloud-controlled file layer and its [`LogCollector`] without
225/// installing a global subscriber. Use this when composing the file layer
226/// with other layers; the returned layer is pre-wrapped in the reloadable
227/// filter so the cloud's `SetLogLevel` overrides take effect.
228pub fn layer(
229    config: FileLoggerConfig,
230) -> Result<
231    (
232        LogCollector,
233        impl Layer<Registry> + Send + Sync + 'static,
234        WorkerGuard,
235    ),
236    InstallError,
237> {
238    // `EnvFilter::try_new("off")` cannot fail; "off" is always valid.
239    let filter = EnvFilter::try_new(OFF_DIRECTIVES).expect("'off' is always a valid directive");
240    let (filter, reload_handle) = reload::Layer::new(filter);
241
242    let log_dir = config.dir.clone();
243    let file_name_prefix = config.file_name_prefix.clone();
244    let (file_layer, guard) = file_layer::<Registry>(config)?;
245    let layer = file_layer.with_filter(filter);
246
247    let inner = Arc::new(CollectorInner {
248        reload_handle,
249        revert_task: Mutex::new(None),
250        log_dir,
251        file_name_prefix,
252    });
253    let collector = LogCollector { inner };
254    Ok((collector, layer, guard))
255}
256
257/// Guard returned by [`file_layer`] / [`layer`] / [`install`] that keeps the
258/// non-blocking writer's worker thread alive. Drop this only at process
259/// shutdown; once dropped, any buffered records still in flight are flushed
260/// and the file layer stops accepting writes.
261pub use tracing_appender::non_blocking::WorkerGuard;
262/// How often the rolling file appender starts a new file.
263///
264/// Re-exported from `tracing-appender` so callers don't need to depend on it
265/// directly.
266pub use tracing_appender::rolling::Rotation;
267
268/// Errors raised when constructing the file logger.
269#[derive(Debug, thiserror::Error)]
270pub enum FileLoggerError {
271    /// Could not create the log directory or open the rolling appender.
272    #[error("file logger setup failed: {0}")]
273    Io(#[from] std::io::Error),
274    /// `tracing-appender`'s builder rejected the configuration (for example
275    /// an invalid filename prefix).
276    #[error("file logger builder rejected configuration: {0}")]
277    Builder(String),
278}
279
280/// Configuration for the rolling file logger.
281///
282/// Use [`FileLoggerConfig::new`] to set the destination directory and tune
283/// the remaining fields with the with-style setters. The defaults are
284/// daily rotation, a `iroh-services` filename prefix, and a 30-file
285/// retention window.
286#[derive(Debug, Clone)]
287pub struct FileLoggerConfig {
288    dir: PathBuf,
289    rotation: Rotation,
290    file_name_prefix: String,
291    max_files: Option<usize>,
292}
293
294impl FileLoggerConfig {
295    /// Build a config rooted at `dir`. The directory is created on first
296    /// write if it does not exist.
297    pub fn new<P: Into<PathBuf>>(dir: P) -> Self {
298        Self {
299            dir: dir.into(),
300            rotation: Rotation::DAILY,
301            file_name_prefix: "iroh-services".into(),
302            max_files: Some(30),
303        }
304    }
305
306    /// Override the rotation cadence. Default: [`Rotation::DAILY`].
307    pub fn with_rotation(mut self, rotation: Rotation) -> Self {
308        self.rotation = rotation;
309        self
310    }
311
312    /// Override the file name stem. Rotation appends a date suffix to this.
313    /// Default: `iroh-services`.
314    pub fn with_file_name_prefix<S: Into<String>>(mut self, prefix: S) -> Self {
315        self.file_name_prefix = prefix.into();
316        self
317    }
318
319    /// Override the retention cap. `None` keeps every file forever; `Some(n)`
320    /// keeps at most `n` files and deletes the oldest on rotation. Default:
321    /// `Some(30)`.
322    pub fn with_max_files(mut self, max_files: Option<usize>) -> Self {
323        self.max_files = max_files;
324        self
325    }
326}
327
328/// Builds an unfiltered tracing layer that writes records to a rolling
329/// file under `config.dir`. Returns the layer plus a [`WorkerGuard`] the
330/// caller must hold for the lifetime of the process — drop it at shutdown
331/// so any buffered records flush before exit.
332///
333/// Most callers want [`layer`] or [`install`] instead, which apply the
334/// cloud-controlled `EnvFilter` reload handle. Use this when you want a
335/// plain file appender with no cloud filter integration.
336pub fn file_layer<S>(
337    config: FileLoggerConfig,
338) -> Result<(impl Layer<S> + Send + Sync + 'static, WorkerGuard), FileLoggerError>
339where
340    S: Subscriber + for<'a> LookupSpan<'a>,
341{
342    let FileLoggerConfig {
343        dir,
344        rotation,
345        file_name_prefix,
346        max_files,
347    } = config;
348
349    create_dir_all(&dir)?;
350
351    let mut builder = tracing_appender::rolling::RollingFileAppender::builder()
352        .rotation(rotation)
353        .filename_prefix(file_name_prefix);
354    if let Some(max) = max_files {
355        builder = builder.max_log_files(max);
356    }
357    let appender = builder
358        .build(&dir)
359        .map_err(|e| FileLoggerError::Builder(e.to_string()))?;
360
361    let (writer, guard) = tracing_appender::non_blocking(appender);
362    let layer = tracing_subscriber::fmt::layer()
363        .with_writer(writer)
364        .with_ansi(false)
365        .json();
366    Ok((layer, guard))
367}
368
369fn create_dir_all(dir: &Path) -> Result<(), FileLoggerError> {
370    std::fs::create_dir_all(dir).map_err(FileLoggerError::Io)
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    /// `file_layer` writes records to a file in the configured directory,
378    /// and the WorkerGuard flushes pending writes on drop.
379    #[test]
380    fn file_layer_writes_to_disk() {
381        use tracing::Dispatch;
382        use tracing_subscriber::{Registry, layer::SubscriberExt};
383
384        let tmp = tempfile::tempdir().unwrap();
385        let (layer, guard) = file_layer::<Registry>(
386            FileLoggerConfig::new(tmp.path())
387                .with_file_name_prefix("test")
388                .with_max_files(Some(2)),
389        )
390        .expect("file_layer setup");
391
392        let subscriber = Registry::default().with(layer);
393        let dispatch = Dispatch::new(subscriber);
394        tracing::dispatcher::with_default(&dispatch, || {
395            tracing::info!(target: "file_layer_test", "hello from the file logger");
396        });
397        drop(guard);
398
399        let mut found = false;
400        for entry in std::fs::read_dir(tmp.path()).unwrap() {
401            let entry = entry.unwrap();
402            if !entry.file_name().to_string_lossy().starts_with("test") {
403                continue;
404            }
405            let contents = std::fs::read_to_string(entry.path()).unwrap();
406            if contents.contains("hello from the file logger") {
407                found = true;
408                break;
409            }
410        }
411        assert!(found, "expected log line to be written to a test.* file");
412    }
413
414    /// The cloud-controlled `layer` starts captured-nothing and only writes
415    /// after `set_filter` raises the level. Verifies the reload handle is
416    /// wired to the file layer end-to-end.
417    #[tokio::test(flavor = "current_thread")]
418    async fn cloud_filter_controls_file_writes() {
419        use tracing::Dispatch;
420
421        let tmp = tempfile::tempdir().unwrap();
422        let (collector, log_layer, guard) =
423            layer(FileLoggerConfig::new(tmp.path()).with_file_name_prefix("controlled")).unwrap();
424
425        let subscriber = Registry::default().with(log_layer);
426        let dispatch = Dispatch::new(subscriber);
427        tracing::dispatcher::with_default(&dispatch, || {
428            // Captured nothing yet — filter is "off".
429            tracing::info!(target: "logtest", "before-set");
430
431            collector
432                .set_filter("info", None, None)
433                .expect("set_filter to info");
434            tracing::info!(target: "logtest", "after-set");
435        });
436        drop(guard);
437
438        let mut combined = String::new();
439        for entry in std::fs::read_dir(tmp.path()).unwrap() {
440            let entry = entry.unwrap();
441            if entry
442                .file_name()
443                .to_string_lossy()
444                .starts_with("controlled")
445            {
446                combined.push_str(&std::fs::read_to_string(entry.path()).unwrap());
447            }
448        }
449        assert!(
450            !combined.contains("before-set"),
451            "before-set should be filtered out, got: {combined}"
452        );
453        assert!(
454            combined.contains("after-set"),
455            "after-set should be written after set_filter, got: {combined}"
456        );
457    }
458}