Skip to main content

iroh_services/
logs.rs

1//! Client-side log collection: a `tracing-subscriber` layer that writes
2//! structured log records to rolling files on the local filesystem, plus a
3//! reload handle that lets the cloud control the level filter at runtime.
4//!
5//! # The cloud is the source of truth for the level
6//!
7//! The level filter starts at `off`. No tracing events are captured until the
8//! cloud pushes a [`crate::protocol::SetLogLevel`] over the [`ClientHost`]
9//! channel. The cloud sends one immediately after authenticating a connected
10//! endpoint, derived from the per-endpoint `endpoint_log_settings` row (when
11//! present) plus the project default.
12//!
13//! [`ClientHost`]: crate::ClientHost
14//!
15//! # Files live on the device
16//!
17//! Records land in rolling JSON files under a caller-supplied directory.
18//! Operators view, ship, or aggregate them with whatever tooling they
19//! already use (`tail`, `journalctl`, `vector`, etc.).
20//!
21//! # Typical usage
22//!
23//! ```no_run
24//! use iroh_services::logs::{self, FileLoggerConfig};
25//!
26//! # fn main() -> anyhow::Result<()> {
27//! // Installs a global subscriber. The filter starts at `off`; the
28//! // Client pulls the cloud-persisted directive right after Auth and
29//! // applies it via the collector. The dashboard can also push live
30//! // overrides via ClientHost::set_log_level after that.
31//! let (collector, _guard) = logs::install(FileLoggerConfig::new("./logs"))?;
32//! # let _ = collector;
33//! # Ok(())
34//! # }
35//! ```
36//!
37//! Compose with additional layers (for example, a stderr fmt layer) via
38//! [`layer`]:
39//!
40//! ```no_run
41//! use iroh_services::logs::{self, FileLoggerConfig};
42//! use tracing_subscriber::prelude::*;
43//!
44//! # fn main() -> anyhow::Result<()> {
45//! let (collector, file_layer, _guard) = logs::layer(FileLoggerConfig::new("./logs"))?;
46//! tracing_subscriber::registry()
47//!     .with(file_layer)
48//!     .with(tracing_subscriber::fmt::layer())
49//!     .try_init()
50//!     .ok();
51//! # let _ = collector;
52//! # Ok(())
53//! # }
54//! ```
55
56use std::{
57    path::{Path, PathBuf},
58    sync::{Arc, Mutex},
59};
60
61use n0_future::{
62    task::{AbortOnDropHandle, JoinHandle},
63    time::Duration,
64};
65use tracing::{Subscriber, debug, warn};
66use tracing_subscriber::{
67    EnvFilter, Layer, Registry, layer::SubscriberExt as _, registry::LookupSpan, reload,
68    util::SubscriberInitExt as _,
69};
70
71/// Errors that can occur while installing the log collector.
72#[derive(Debug, thiserror::Error)]
73pub enum InstallError {
74    /// The default tracing dispatcher is already set; install once at startup.
75    #[error("global tracing dispatcher is already set")]
76    AlreadyInstalled,
77    /// File logger setup failed (could not create directory, open appender,
78    /// etc.).
79    #[error("file logger setup failed: {0}")]
80    FileLogger(#[from] FileLoggerError),
81}
82
83/// Errors that can occur while changing the active filter at runtime.
84#[derive(Debug, thiserror::Error)]
85pub enum SetFilterError {
86    /// The supplied directives string was rejected by `EnvFilter`.
87    #[error("invalid filter directives: {0}")]
88    InvalidDirectives(String),
89    /// Reloading the filter failed because the subscriber went away.
90    #[error("reload handle is no longer valid")]
91    ReloadFailed,
92}
93
94/// Handle to the cloud-controlled tracing filter. Cheap to clone; all clones
95/// share the same backing reload handle.
96#[derive(Clone)]
97pub struct LogCollector {
98    inner: Arc<CollectorInner>,
99}
100
101struct CollectorInner {
102    reload_handle: reload::Handle<EnvFilter, Registry>,
103    revert_task: Mutex<Option<AbortOnDropHandle<()>>>,
104    /// Directory where the rolling file appender writes. Used by
105    /// [`LogCollector::serve_fetch_logs`] to locate the current file.
106    log_dir: PathBuf,
107    /// Filename prefix the rolling appender uses; the date suffix is
108    /// appended for each rolled-over file.
109    file_name_prefix: String,
110}
111
112/// Off-state directive. Nothing is captured until the cloud sends a
113/// `SetLogLevel` with something more permissive.
114const OFF_DIRECTIVES: &str = "off";
115
116impl LogCollector {
117    /// Sets the active filter directives. When `expires_in` is set,
118    /// schedules a revert after that duration. The revert target is
119    /// `revert_to` when supplied; `None` means revert to `off`.
120    pub fn set_filter(
121        &self,
122        directives: &str,
123        expires_in: Option<Duration>,
124        revert_to: Option<&str>,
125    ) -> Result<(), SetFilterError> {
126        let filter = EnvFilter::try_new(directives)
127            .map_err(|e| SetFilterError::InvalidDirectives(e.to_string()))?;
128        self.inner
129            .reload_handle
130            .reload(filter)
131            .map_err(|_| SetFilterError::ReloadFailed)?;
132
133        let mut guard = self.inner.revert_task.lock().expect("poisoned");
134        *guard = None;
135
136        if let Some(expires_in) = expires_in {
137            let collector = self.clone();
138            let revert_to = revert_to.map(str::to_string);
139            let handle: JoinHandle<()> = n0_future::task::spawn(async move {
140                n0_future::time::sleep(expires_in).await;
141                let target = revert_to.as_deref();
142                if let Err(err) = collector.revert(target) {
143                    warn!(?err, "failed to revert log filter");
144                }
145            });
146            *guard = Some(AbortOnDropHandle::new(handle));
147        }
148        Ok(())
149    }
150
151    /// Reverts the active filter to `to`, or to the off state when `to` is
152    /// `None`.
153    pub fn revert(&self, to: Option<&str>) -> Result<(), SetFilterError> {
154        let directives = to.unwrap_or(OFF_DIRECTIVES);
155        let filter = EnvFilter::try_new(directives)
156            .map_err(|e| SetFilterError::InvalidDirectives(e.to_string()))?;
157        self.inner
158            .reload_handle
159            .reload(filter)
160            .map_err(|_| SetFilterError::ReloadFailed)
161    }
162
163    /// Locate the newest rolling file in the configured log directory
164    /// whose name starts with the configured filename prefix. Returns
165    /// `Ok(None)` when the directory exists but no matching file is
166    /// present. Used by [`crate::ClientHost`] to serve [`FetchLogs`].
167    ///
168    /// [`FetchLogs`]: crate::protocol::FetchLogs
169    pub(crate) fn current_log_file(&self) -> std::io::Result<Option<PathBuf>> {
170        let dir = &self.inner.log_dir;
171        let prefix = &self.inner.file_name_prefix;
172        let entries = match std::fs::read_dir(dir) {
173            Ok(e) => e,
174            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
175            Err(err) => return Err(err),
176        };
177        let mut best: Option<(PathBuf, std::time::SystemTime)> = None;
178        for entry in entries.flatten() {
179            let name = entry.file_name();
180            let name = name.to_string_lossy();
181            if !name.starts_with(prefix) {
182                continue;
183            }
184            let Ok(meta) = entry.metadata() else { continue };
185            if !meta.is_file() {
186                continue;
187            }
188            let mtime = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);
189            match &best {
190                Some((_, current)) if *current >= mtime => {}
191                _ => best = Some((entry.path(), mtime)),
192            }
193        }
194        Ok(best.map(|(p, _)| p))
195    }
196}
197
198impl std::fmt::Debug for LogCollector {
199    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200        f.debug_struct("LogCollector").finish_non_exhaustive()
201    }
202}
203
204/// Installs a global tracing subscriber whose only output is a rolling
205/// file appender under `config.dir`. The level filter starts at `off`; the
206/// cloud must push a `SetLogLevel` for any events to be captured.
207///
208/// Returns the [`LogCollector`] to hand to [`crate::ClientHost`] so it can
209/// apply runtime overrides, and a [`WorkerGuard`] the caller must hold for
210/// the lifetime of the process so the non-blocking writer flushes on exit.
211///
212/// Call exactly once at process start. For composition with other layers,
213/// use [`layer`].
214pub fn install(config: FileLoggerConfig) -> Result<(LogCollector, WorkerGuard), InstallError> {
215    let (collector, file_layer, guard) = layer(config)?;
216    tracing_subscriber::registry()
217        .with(file_layer)
218        .try_init()
219        .map_err(|_| InstallError::AlreadyInstalled)?;
220    debug!("iroh-services file logger installed");
221    Ok((collector, guard))
222}
223
224/// Builds the cloud-controlled file layer and its [`LogCollector`] without
225/// installing a global subscriber. Use this when composing the file layer
226/// with other layers; the returned layer is pre-wrapped in the reloadable
227/// filter so the cloud's `SetLogLevel` overrides take effect.
228pub fn layer(
229    config: FileLoggerConfig,
230) -> Result<
231    (
232        LogCollector,
233        impl Layer<Registry> + Send + Sync + 'static,
234        WorkerGuard,
235    ),
236    InstallError,
237> {
238    // `EnvFilter::try_new("off")` cannot fail; "off" is always valid.
239    let filter = EnvFilter::try_new(OFF_DIRECTIVES).expect("'off' is always a valid directive");
240    let (filter, reload_handle) = reload::Layer::new(filter);
241
242    let log_dir = config.dir.clone();
243    let file_name_prefix = config.file_name_prefix.clone();
244    let (file_layer, guard) = file_layer::<Registry>(config)?;
245    let layer = file_layer.with_filter(filter);
246
247    let inner = Arc::new(CollectorInner {
248        reload_handle,
249        revert_task: Mutex::new(None),
250        log_dir,
251        file_name_prefix,
252    });
253    let collector = LogCollector { inner };
254    Ok((collector, layer, guard))
255}
256
257/// How often the rolling file appender starts a new file.
258///
259/// Re-exported from `tracing-appender` so callers don't need to depend on it
260/// directly.
261pub use tracing_appender::rolling::Rotation;
262
263/// Guard returned by [`file_layer`] / [`layer`] / [`install`] that keeps the
264/// non-blocking writer's worker thread alive. Drop this only at process
265/// shutdown; once dropped, any buffered records still in flight are flushed
266/// and the file layer stops accepting writes.
267pub use tracing_appender::non_blocking::WorkerGuard;
268
269/// Errors raised when constructing the file logger.
270#[derive(Debug, thiserror::Error)]
271pub enum FileLoggerError {
272    /// Could not create the log directory or open the rolling appender.
273    #[error("file logger setup failed: {0}")]
274    Io(#[from] std::io::Error),
275    /// `tracing-appender`'s builder rejected the configuration (for example
276    /// an invalid filename prefix).
277    #[error("file logger builder rejected configuration: {0}")]
278    Builder(String),
279}
280
281/// Configuration for the rolling file logger.
282///
283/// Use [`FileLoggerConfig::new`] to set the destination directory and tune
284/// the remaining fields with the with-style setters. The defaults are
285/// daily rotation, a `iroh-services` filename prefix, and a 30-file
286/// retention window.
287#[derive(Debug, Clone)]
288pub struct FileLoggerConfig {
289    dir: PathBuf,
290    rotation: Rotation,
291    file_name_prefix: String,
292    max_files: Option<usize>,
293}
294
295impl FileLoggerConfig {
296    /// Build a config rooted at `dir`. The directory is created on first
297    /// write if it does not exist.
298    pub fn new<P: Into<PathBuf>>(dir: P) -> Self {
299        Self {
300            dir: dir.into(),
301            rotation: Rotation::DAILY,
302            file_name_prefix: "iroh-services".into(),
303            max_files: Some(30),
304        }
305    }
306
307    /// Override the rotation cadence. Default: [`Rotation::DAILY`].
308    pub fn with_rotation(mut self, rotation: Rotation) -> Self {
309        self.rotation = rotation;
310        self
311    }
312
313    /// Override the file name stem. Rotation appends a date suffix to this.
314    /// Default: `iroh-services`.
315    pub fn with_file_name_prefix<S: Into<String>>(mut self, prefix: S) -> Self {
316        self.file_name_prefix = prefix.into();
317        self
318    }
319
320    /// Override the retention cap. `None` keeps every file forever; `Some(n)`
321    /// keeps at most `n` files and deletes the oldest on rotation. Default:
322    /// `Some(30)`.
323    pub fn with_max_files(mut self, max_files: Option<usize>) -> Self {
324        self.max_files = max_files;
325        self
326    }
327}
328
329/// Builds an unfiltered tracing layer that writes records to a rolling
330/// file under `config.dir`. Returns the layer plus a [`WorkerGuard`] the
331/// caller must hold for the lifetime of the process — drop it at shutdown
332/// so any buffered records flush before exit.
333///
334/// Most callers want [`layer`] or [`install`] instead, which apply the
335/// cloud-controlled `EnvFilter` reload handle. Use this when you want a
336/// plain file appender with no cloud filter integration.
337pub fn file_layer<S>(
338    config: FileLoggerConfig,
339) -> Result<(impl Layer<S> + Send + Sync + 'static, WorkerGuard), FileLoggerError>
340where
341    S: Subscriber + for<'a> LookupSpan<'a>,
342{
343    let FileLoggerConfig {
344        dir,
345        rotation,
346        file_name_prefix,
347        max_files,
348    } = config;
349
350    create_dir_all(&dir)?;
351
352    let mut builder = tracing_appender::rolling::RollingFileAppender::builder()
353        .rotation(rotation)
354        .filename_prefix(file_name_prefix);
355    if let Some(max) = max_files {
356        builder = builder.max_log_files(max);
357    }
358    let appender = builder
359        .build(&dir)
360        .map_err(|e| FileLoggerError::Builder(e.to_string()))?;
361
362    let (writer, guard) = tracing_appender::non_blocking(appender);
363    let layer = tracing_subscriber::fmt::layer()
364        .with_writer(writer)
365        .with_ansi(false)
366        .json();
367    Ok((layer, guard))
368}
369
370fn create_dir_all(dir: &Path) -> Result<(), FileLoggerError> {
371    std::fs::create_dir_all(dir).map_err(FileLoggerError::Io)
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    /// `file_layer` writes records to a file in the configured directory,
379    /// and the WorkerGuard flushes pending writes on drop.
380    #[test]
381    fn file_layer_writes_to_disk() {
382        use tracing::Dispatch;
383        use tracing_subscriber::{Registry, layer::SubscriberExt};
384
385        let tmp = tempfile::tempdir().unwrap();
386        let (layer, guard) = file_layer::<Registry>(
387            FileLoggerConfig::new(tmp.path())
388                .with_file_name_prefix("test")
389                .with_max_files(Some(2)),
390        )
391        .expect("file_layer setup");
392
393        let subscriber = Registry::default().with(layer);
394        let dispatch = Dispatch::new(subscriber);
395        tracing::dispatcher::with_default(&dispatch, || {
396            tracing::info!(target: "file_layer_test", "hello from the file logger");
397        });
398        drop(guard);
399
400        let mut found = false;
401        for entry in std::fs::read_dir(tmp.path()).unwrap() {
402            let entry = entry.unwrap();
403            if !entry.file_name().to_string_lossy().starts_with("test") {
404                continue;
405            }
406            let contents = std::fs::read_to_string(entry.path()).unwrap();
407            if contents.contains("hello from the file logger") {
408                found = true;
409                break;
410            }
411        }
412        assert!(found, "expected log line to be written to a test.* file");
413    }
414
415    /// The cloud-controlled `layer` starts captured-nothing and only writes
416    /// after `set_filter` raises the level. Verifies the reload handle is
417    /// wired to the file layer end-to-end.
418    #[tokio::test(flavor = "current_thread")]
419    async fn cloud_filter_controls_file_writes() {
420        use tracing::Dispatch;
421
422        let tmp = tempfile::tempdir().unwrap();
423        let (collector, log_layer, guard) =
424            layer(FileLoggerConfig::new(tmp.path()).with_file_name_prefix("controlled")).unwrap();
425
426        let subscriber = Registry::default().with(log_layer);
427        let dispatch = Dispatch::new(subscriber);
428        tracing::dispatcher::with_default(&dispatch, || {
429            // Captured nothing yet — filter is "off".
430            tracing::info!(target: "logtest", "before-set");
431
432            collector
433                .set_filter("info", None, None)
434                .expect("set_filter to info");
435            tracing::info!(target: "logtest", "after-set");
436        });
437        drop(guard);
438
439        let mut combined = String::new();
440        for entry in std::fs::read_dir(tmp.path()).unwrap() {
441            let entry = entry.unwrap();
442            if entry
443                .file_name()
444                .to_string_lossy()
445                .starts_with("controlled")
446            {
447                combined.push_str(&std::fs::read_to_string(entry.path()).unwrap());
448            }
449        }
450        assert!(
451            !combined.contains("before-set"),
452            "before-set should be filtered out, got: {combined}"
453        );
454        assert!(
455            combined.contains("after-set"),
456            "after-set should be written after set_filter, got: {combined}"
457        );
458    }
459}