iroh_relay/
server.rs

1//! A fully-fledged iroh-relay server over HTTP or HTTPS.
2//!
3//! This module provides an API to run a full fledged iroh-relay server.  It is primarily
4//! used by the `iroh-relay` binary in this crate.  It can be used to run a relay server in
5//! other locations however.
6//!
7//! This code is fully written in a form of structured-concurrency: every spawned task is
8//! always attached to a handle and when the handle is dropped the tasks abort.  So tasks
9//! can not outlive their handle.  It is also always possible to await for completion of a
10//! task.  Some tasks additionally have a method to do graceful shutdown.
11//!
12//! The relay server hosts the following services:
13//!
14//! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over.
15//! - HTTPS `/ping`: Used for net_report probes.
16//! - HTTPS `/generate_204`: Used for net_report probes.
17
18use std::{fmt, future::Future, net::SocketAddr, num::NonZeroU32, pin::Pin, sync::Arc};
19
20use derive_more::Debug;
21use http::{
22    HeaderMap, HeaderValue, Method, Request, Response, StatusCode, header::InvalidHeaderValue,
23    response::Builder as ResponseBuilder,
24};
25use hyper::body::Incoming;
26use iroh_base::EndpointId;
27#[cfg(feature = "test-utils")]
28use iroh_base::RelayUrl;
29use n0_error::{e, stack_error};
30use n0_future::{StreamExt, future::Boxed};
31use serde::Serialize;
32use tokio::{
33    net::TcpListener,
34    task::{JoinError, JoinSet},
35};
36use tokio_util::task::AbortOnDropHandle;
37use tracing::{Instrument, debug, error, info, info_span, instrument};
38
39use crate::{
40    defaults::DEFAULT_KEY_CACHE_CAPACITY,
41    http::RELAY_PROBE_PATH,
42    quic::server::{QuicServer, QuicSpawnError, ServerHandle as QuicServerHandle},
43};
44
45pub mod client;
46pub mod clients;
47pub mod http_server;
48mod metrics;
49pub(crate) mod resolver;
50pub mod streams;
51#[cfg(feature = "test-utils")]
52pub mod testing;
53
54pub use self::{
55    http_server::{Handlers, RelayService},
56    metrics::{Metrics, RelayMetrics},
57    resolver::{DEFAULT_CERT_RELOAD_INTERVAL, ReloadingResolver},
58};
59
60const NO_CONTENT_CHALLENGE_HEADER: &str = "X-Iroh-Challenge";
61const NO_CONTENT_RESPONSE_HEADER: &str = "X-Iroh-Response";
62const NOTFOUND: &[u8] = b"Not Found";
63const ROBOTS_TXT: &[u8] = b"User-agent: *\nDisallow: /\n";
64const INDEX: &[u8] = br#"<html><body>
65<h1>Iroh Relay</h1>
66<p>
67  This is an <a href="https://iroh.computer/">Iroh</a> Relay server.
68</p>
69"#;
70const TLS_HEADERS: [(&str, &str); 2] = [
71    (
72        "Strict-Transport-Security",
73        "max-age=63072000; includeSubDomains",
74    ),
75    (
76        "Content-Security-Policy",
77        "default-src 'none'; frame-ancestors 'none'; form-action 'none'; base-uri 'self'; block-all-mixed-content; plugin-types 'none'",
78    ),
79];
80
81type BytesBody = http_body_util::Full<hyper::body::Bytes>;
82type HyperError = Box<dyn std::error::Error + Send + Sync>;
83type HyperResult<T> = std::result::Result<T, HyperError>;
84
85/// Creates a new [`BytesBody`] with no content.
86fn body_empty() -> BytesBody {
87    http_body_util::Full::new(hyper::body::Bytes::new())
88}
89
90/// Configuration for the full Relay.
91///
92/// Be aware the generic parameters are for when using the Let's Encrypt TLS configuration.
93/// If not used dummy ones need to be provided, e.g. `ServerConfig::<(), ()>::default()`.
94#[derive(Debug, Default)]
95pub struct ServerConfig<EC: fmt::Debug, EA: fmt::Debug = EC> {
96    /// Configuration for the Relay server, disabled if `None`.
97    pub relay: Option<RelayConfig<EC, EA>>,
98    /// Configuration for the QUIC server, disabled if `None`.
99    pub quic: Option<QuicConfig>,
100    /// Socket to serve metrics on.
101    #[cfg(feature = "metrics")]
102    pub metrics_addr: Option<SocketAddr>,
103}
104
105/// Configuration for the Relay HTTP and HTTPS server.
106///
107/// This includes the HTTP services hosted by the Relay server, the Relay `/relay` HTTP
108/// endpoint is only one of the services served.
109#[derive(Debug)]
110pub struct RelayConfig<EC: fmt::Debug, EA: fmt::Debug = EC> {
111    /// The socket address on which the Relay HTTP server should bind.
112    ///
113    /// Normally you'd choose port `80`.  The bind address for the HTTPS server is
114    /// configured in [`RelayConfig::tls`].
115    ///
116    /// If [`RelayConfig::tls`] is `None` then this serves all the HTTP services without
117    /// TLS.
118    pub http_bind_addr: SocketAddr,
119    /// TLS configuration for the HTTPS server.
120    ///
121    /// If *None* all the HTTP services that would be served here are served from
122    /// [`RelayConfig::http_bind_addr`].
123    pub tls: Option<TlsConfig<EC, EA>>,
124    /// Rate limits.
125    pub limits: Limits,
126    /// Key cache capacity.
127    pub key_cache_capacity: Option<usize>,
128    /// Access configuration.
129    pub access: AccessConfig,
130}
131
132/// Controls which endpoints are allowed to use the relay.
133#[derive(derive_more::Debug)]
134pub enum AccessConfig {
135    /// Everyone
136    Everyone,
137    /// Only endpoints for which the function returns `Access::Allow`.
138    #[debug("restricted")]
139    Restricted(Box<dyn Fn(EndpointId) -> Boxed<Access> + Send + Sync + 'static>),
140}
141
142impl AccessConfig {
143    /// Is this endpoint allowed?
144    pub async fn is_allowed(&self, endpoint: EndpointId) -> bool {
145        match self {
146            Self::Everyone => true,
147            Self::Restricted(check) => {
148                let res = check(endpoint).await;
149                matches!(res, Access::Allow)
150            }
151        }
152    }
153}
154
155/// Access restriction for an endpoint.
156#[derive(Debug, Copy, Clone, PartialEq, Eq)]
157pub enum Access {
158    /// Access is allowed.
159    Allow,
160    /// Access is denied.
161    Deny,
162}
163
164/// Configuration for the QUIC server.
165#[derive(Debug)]
166pub struct QuicConfig {
167    /// The socket address on which the QUIC server should bind.
168    ///
169    /// Normally you'd chose port `7842`, see [`crate::defaults::DEFAULT_RELAY_QUIC_PORT`].
170    pub bind_addr: SocketAddr,
171    /// The TLS server configuration for the QUIC server.
172    ///
173    /// If this [`rustls::ServerConfig`] does not support TLS 1.3, the QUIC server will fail
174    /// to spawn.
175    pub server_config: rustls::ServerConfig,
176}
177
178/// TLS configuration for Relay server.
179///
180/// Normally the Relay server accepts connections on both HTTPS and HTTP.
181#[derive(Debug)]
182pub struct TlsConfig<EC: fmt::Debug, EA: fmt::Debug = EC> {
183    /// The socket address on which to serve the HTTPS server.
184    ///
185    /// Since the captive portal probe has to run over plain text HTTP and TLS is used for
186    /// the main relay server this has to be on a different port.  When TLS is not enabled
187    /// this is served on the [`RelayConfig::http_bind_addr`] socket address.
188    ///
189    /// Normally you'd choose port `80`.
190    pub https_bind_addr: SocketAddr,
191    /// The socket address on which to server the QUIC server is QUIC is enabled.
192    pub quic_bind_addr: SocketAddr,
193    /// Mode for getting a cert.
194    pub cert: CertConfig<EC, EA>,
195    /// The server configuration.
196    pub server_config: rustls::ServerConfig,
197}
198
199/// Rate limits.
200// TODO: accept_conn_limit and accept_conn_burst are not currently implemented.
201#[derive(Debug, Default)]
202pub struct Limits {
203    /// Rate limit for accepting new connection. Unlimited if not set.
204    pub accept_conn_limit: Option<f64>,
205    /// Burst limit for accepting new connection. Unlimited if not set.
206    pub accept_conn_burst: Option<usize>,
207    /// Rate limits for incoming traffic from a client connection.
208    pub client_rx: Option<ClientRateLimit>,
209}
210
211/// Per-client rate limit configuration.
212#[derive(Debug, Copy, Clone)]
213pub struct ClientRateLimit {
214    /// Max number of bytes per second to read from the client connection.
215    pub bytes_per_second: NonZeroU32,
216    /// Max number of bytes to read in a single burst.
217    pub max_burst_bytes: Option<NonZeroU32>,
218}
219
220/// TLS certificate configuration.
221#[derive(derive_more::Debug)]
222pub enum CertConfig<EC: fmt::Debug, EA: fmt::Debug = EC> {
223    /// Use Let's Encrypt.
224    LetsEncrypt {
225        /// State for Let's Encrypt certificates.
226        #[debug("AcmeConfig")]
227        state: tokio_rustls_acme::AcmeState<EC, EA>,
228    },
229    /// Use a static TLS key and certificate chain.
230    Manual {
231        /// The TLS certificate chain.
232        certs: Vec<rustls::pki_types::CertificateDer<'static>>,
233    },
234    /// Use a TLS key and certificate chain that can be reloaded.
235    Reloading,
236}
237
238/// A running Relay + QAD server.
239///
240/// This is a full Relay server, including QAD, Relay and various associated HTTP services.
241///
242/// Dropping this will stop the server.
243#[derive(Debug)]
244pub struct Server {
245    /// The address of the HTTP server, if configured.
246    http_addr: Option<SocketAddr>,
247    /// The address of the HTTPS server, if the relay server is using TLS.
248    ///
249    /// If the Relay server is not using TLS then it is served from the
250    /// [`Server::http_addr`].
251    https_addr: Option<SocketAddr>,
252    /// The address of the QUIC server, if configured.
253    quic_addr: Option<SocketAddr>,
254    /// Handle to the relay server.
255    relay_handle: Option<http_server::ServerHandle>,
256    /// Handle to the quic server.
257    quic_handle: Option<QuicServerHandle>,
258    /// The main task running the server.
259    supervisor: AbortOnDropHandle<Result<(), SupervisorError>>,
260    /// The certificate for the server.
261    ///
262    /// If the server has manual certificates configured the certificate chain will be
263    /// available here, this can be used by a client to authenticate the server.
264    certificates: Option<Vec<rustls::pki_types::CertificateDer<'static>>>,
265    metrics: RelayMetrics,
266}
267
268/// Server spawn errors
269#[allow(missing_docs)]
270#[stack_error(derive, add_meta, std_sources)]
271#[non_exhaustive]
272pub enum SpawnError {
273    #[error("Unable to get local address")]
274    LocalAddr { source: std::io::Error },
275    #[error("Failed to bind QAD listener")]
276    QuicSpawn { source: QuicSpawnError },
277    #[error("Failed to parse TLS header")]
278    TlsHeaderParse { source: InvalidHeaderValue },
279    #[error("Failed to bind TcpListener")]
280    BindTlsListener { source: std::io::Error },
281    #[error("No local address")]
282    NoLocalAddr { source: std::io::Error },
283    #[error("Failed to bind server socket to {addr}")]
284    BindTcpListener {
285        source: std::io::Error,
286        addr: SocketAddr,
287    },
288}
289
290/// Server task errors
291#[allow(missing_docs)]
292#[stack_error(derive, add_meta)]
293#[non_exhaustive]
294pub enum SupervisorError {
295    #[error("Error starting metrics server")]
296    Metrics {
297        #[error(std_err)]
298        source: std::io::Error,
299    },
300    #[error("Acme event stream finished")]
301    AcmeEventStreamFinished {},
302    #[error(transparent)]
303    JoinError {
304        #[error(from, std_err)]
305        source: JoinError,
306    },
307    #[error("No relay services are enabled")]
308    NoRelayServicesEnabled {},
309    #[error("Task cancelled")]
310    TaskCancelled {},
311}
312
313impl Server {
314    /// Starts the server.
315    pub async fn spawn<EC, EA>(config: ServerConfig<EC, EA>) -> Result<Self, SpawnError>
316    where
317        EC: fmt::Debug + 'static,
318        EA: fmt::Debug + 'static,
319    {
320        let mut tasks = JoinSet::new();
321
322        let metrics = RelayMetrics::default();
323
324        #[cfg(feature = "metrics")]
325        if let Some(addr) = config.metrics_addr {
326            debug!("Starting metrics server");
327            let mut registry = iroh_metrics::Registry::default();
328            registry.register_all(&metrics);
329            tasks.spawn(
330                async move {
331                    iroh_metrics::service::start_metrics_server(addr, Arc::new(registry))
332                        .await
333                        .map_err(|err| e!(SupervisorError::Metrics, err))
334                }
335                .instrument(info_span!("metrics-server")),
336            );
337        }
338
339        // Start the Relay server, but first clone the certs out.
340        let certificates = config.relay.as_ref().and_then(|relay| {
341            relay.tls.as_ref().and_then(|tls| match tls.cert {
342                CertConfig::LetsEncrypt { .. } => None,
343                CertConfig::Manual { ref certs, .. } => Some(certs.clone()),
344                CertConfig::Reloading => None,
345            })
346        });
347
348        let quic_server = match config.quic {
349            Some(quic_config) => {
350                debug!("Starting QUIC server {}", quic_config.bind_addr);
351                Some(QuicServer::spawn(quic_config).map_err(|err| e!(SpawnError::QuicSpawn, err))?)
352            }
353            None => None,
354        };
355        let quic_addr = quic_server.as_ref().map(|srv| srv.bind_addr());
356        let quic_handle = quic_server.as_ref().map(|srv| srv.handle());
357
358        let (relay_server, http_addr) = match config.relay {
359            Some(relay_config) => {
360                debug!("Starting Relay server");
361                let mut headers = HeaderMap::new();
362                for (name, value) in TLS_HEADERS.iter() {
363                    headers.insert(
364                        *name,
365                        value
366                            .parse()
367                            .map_err(|err| e!(SpawnError::TlsHeaderParse, err))?,
368                    );
369                }
370                let relay_bind_addr = match relay_config.tls {
371                    Some(ref tls) => tls.https_bind_addr,
372                    None => relay_config.http_bind_addr,
373                };
374                let key_cache_capacity = relay_config
375                    .key_cache_capacity
376                    .unwrap_or(DEFAULT_KEY_CACHE_CAPACITY);
377                let mut builder = http_server::ServerBuilder::new(relay_bind_addr)
378                    .metrics(metrics.server.clone())
379                    .headers(headers)
380                    .key_cache_capacity(key_cache_capacity)
381                    .access(relay_config.access)
382                    .request_handler(Method::GET, "/", Box::new(root_handler))
383                    .request_handler(Method::GET, "/index.html", Box::new(root_handler))
384                    .request_handler(Method::GET, RELAY_PROBE_PATH, Box::new(probe_handler))
385                    .request_handler(Method::GET, "/robots.txt", Box::new(robots_handler))
386                    .request_handler(Method::GET, "/healthz", Box::new(healthz_handler));
387                if let Some(cfg) = relay_config.limits.client_rx {
388                    builder = builder.client_rx_ratelimit(cfg);
389                }
390                let http_addr = match relay_config.tls {
391                    Some(tls_config) => {
392                        let server_tls_config = match tls_config.cert {
393                            CertConfig::LetsEncrypt { mut state } => {
394                                let acceptor =
395                                    http_server::TlsAcceptor::LetsEncrypt(state.acceptor());
396                                tasks.spawn(
397                                    async move {
398                                        while let Some(event) = state.next().await {
399                                            match event {
400                                                Ok(ok) => debug!("acme event: {ok:?}"),
401                                                Err(err) => error!("error: {err:?}"),
402                                            }
403                                        }
404                                        Err(e!(SupervisorError::AcmeEventStreamFinished))
405                                    }
406                                    .instrument(info_span!("acme")),
407                                );
408                                Some(http_server::TlsConfig {
409                                    config: Arc::new(tls_config.server_config),
410                                    acceptor,
411                                })
412                            }
413                            CertConfig::Manual { .. } | CertConfig::Reloading => {
414                                let server_config = Arc::new(tls_config.server_config);
415                                let acceptor =
416                                    tokio_rustls::TlsAcceptor::from(server_config.clone());
417                                let acceptor = http_server::TlsAcceptor::Manual(acceptor);
418                                Some(http_server::TlsConfig {
419                                    config: server_config,
420                                    acceptor,
421                                })
422                            }
423                        };
424                        builder = builder.tls_config(server_tls_config);
425
426                        // Some services always need to be served over HTTP without TLS.  Run
427                        // these standalone.
428                        let http_listener = TcpListener::bind(&relay_config.http_bind_addr)
429                            .await
430                            .map_err(|err| e!(SpawnError::BindTlsListener, err))?;
431                        let http_addr = http_listener
432                            .local_addr()
433                            .map_err(|err| e!(SpawnError::NoLocalAddr, err))?;
434                        tasks.spawn(
435                            async move {
436                                run_captive_portal_service(http_listener).await;
437                                Ok(())
438                            }
439                            .instrument(info_span!("http-service", addr = %http_addr)),
440                        );
441                        Some(http_addr)
442                    }
443                    None => {
444                        // If running Relay without TLS add the plain HTTP server directly
445                        // to the Relay server.
446                        builder = builder.request_handler(
447                            Method::GET,
448                            "/generate_204",
449                            Box::new(serve_no_content_handler),
450                        );
451                        None
452                    }
453                };
454                let relay_server = builder.spawn().await?;
455                (Some(relay_server), http_addr)
456            }
457            None => (None, None),
458        };
459        // If http_addr is Some then relay_server is serving HTTPS.  If http_addr is None
460        // relay_server is serving HTTP, including the /generate_204 service.
461        let relay_addr = relay_server.as_ref().map(|srv| srv.addr());
462        let relay_handle = relay_server.as_ref().map(|srv| srv.handle());
463        let task = tokio::spawn(relay_supervisor(tasks, relay_server, quic_server));
464
465        Ok(Self {
466            http_addr: http_addr.or(relay_addr),
467            https_addr: http_addr.and(relay_addr),
468            quic_addr,
469            relay_handle,
470            quic_handle,
471            supervisor: AbortOnDropHandle::new(task),
472            certificates,
473            metrics,
474        })
475    }
476
477    /// Requests graceful shutdown.
478    ///
479    /// Returns once all server tasks have stopped.
480    pub async fn shutdown(self) -> Result<(), SupervisorError> {
481        // Only the Relay server and QUIC server need shutting down, the supervisor will abort the tasks in
482        // the JoinSet when the server terminates.
483        if let Some(handle) = self.relay_handle {
484            handle.shutdown();
485        }
486        if let Some(handle) = self.quic_handle {
487            handle.shutdown();
488        }
489        self.supervisor.await?
490    }
491
492    /// Returns the handle for the task.
493    ///
494    /// This allows waiting for the server's supervisor task to finish.  Can be useful in
495    /// case there is an error in the server before it is shut down.
496    pub fn task_handle(&mut self) -> &mut AbortOnDropHandle<Result<(), SupervisorError>> {
497        &mut self.supervisor
498    }
499
500    /// The socket address the HTTPS server is listening on.
501    pub fn https_addr(&self) -> Option<SocketAddr> {
502        self.https_addr
503    }
504
505    /// The socket address the HTTP server is listening on.
506    pub fn http_addr(&self) -> Option<SocketAddr> {
507        self.http_addr
508    }
509
510    /// The socket address the QUIC server is listening on.
511    pub fn quic_addr(&self) -> Option<SocketAddr> {
512        self.quic_addr
513    }
514
515    /// The certificates chain if configured with manual TLS certificates.
516    pub fn certificates(&self) -> Option<Vec<rustls::pki_types::CertificateDer<'static>>> {
517        self.certificates.clone()
518    }
519
520    /// Get the server's https [`RelayUrl`].
521    ///
522    /// This uses [`Self::https_addr`] so it's mostly useful for local development.
523    #[cfg(feature = "test-utils")]
524    pub fn https_url(&self) -> Option<RelayUrl> {
525        self.https_addr.map(|addr| {
526            url::Url::parse(&format!("https://{addr}"))
527                .expect("valid url")
528                .into()
529        })
530    }
531
532    /// Get the server's http [`RelayUrl`].
533    ///
534    /// This uses [`Self::http_addr`] so it's mostly useful for local development.
535    #[cfg(feature = "test-utils")]
536    pub fn http_url(&self) -> Option<RelayUrl> {
537        self.http_addr.map(|addr| {
538            url::Url::parse(&format!("http://{addr}"))
539                .expect("valid url")
540                .into()
541        })
542    }
543
544    /// Returns the metrics collected in the relay server.
545    pub fn metrics(&self) -> &RelayMetrics {
546        &self.metrics
547    }
548}
549
550/// Supervisor for the relay server tasks.
551///
552/// As soon as one of the tasks exits, all other tasks are stopped and the server stops.
553/// The supervisor finishes once all tasks are finished.
554#[instrument(skip_all)]
555async fn relay_supervisor(
556    mut tasks: JoinSet<Result<(), SupervisorError>>,
557    mut relay_http_server: Option<http_server::Server>,
558    mut quic_server: Option<QuicServer>,
559) -> Result<(), SupervisorError> {
560    let quic_enabled = quic_server.is_some();
561    let mut quic_fut = match quic_server {
562        Some(ref mut server) => n0_future::Either::Left(server.task_handle()),
563        None => n0_future::Either::Right(n0_future::future::pending()),
564    };
565    let relay_enabled = relay_http_server.is_some();
566    let mut relay_fut = match relay_http_server {
567        Some(ref mut server) => n0_future::Either::Left(server.task_handle()),
568        None => n0_future::Either::Right(n0_future::future::pending()),
569    };
570    let res = tokio::select! {
571        biased;
572        Some(ret) = tasks.join_next() => ret,
573        ret = &mut quic_fut, if quic_enabled => ret.map(Ok),
574        ret = &mut relay_fut, if relay_enabled => ret.map(Ok),
575        else => Ok(Err(e!(SupervisorError::NoRelayServicesEnabled))),
576    };
577    let ret = match res {
578        Ok(Ok(())) => {
579            debug!("Task exited");
580            Ok(())
581        }
582        Ok(Err(err)) => {
583            error!(%err, "Task failed");
584            Err(err)
585        }
586        Err(err) => {
587            if let Ok(panic) = err.try_into_panic() {
588                error!("Task panicked");
589                std::panic::resume_unwind(panic);
590            }
591            debug!("Task cancelled");
592            Err(e!(SupervisorError::TaskCancelled))
593        }
594    };
595
596    // Ensure the HTTP server terminated, there is no harm in calling this after it is
597    // already shut down.
598    if let Some(server) = relay_http_server {
599        server.shutdown();
600    }
601
602    // Ensure the QUIC server is closed
603    if let Some(server) = quic_server {
604        server.shutdown().await;
605    }
606
607    // Stop all remaining tasks
608    tasks.shutdown().await;
609
610    ret
611}
612
613fn root_handler(
614    _r: Request<Incoming>,
615    response: ResponseBuilder,
616) -> HyperResult<Response<BytesBody>> {
617    response
618        .status(StatusCode::OK)
619        .header("Content-Type", "text/html; charset=utf-8")
620        .body(INDEX.into())
621        .map_err(|err| Box::new(err) as HyperError)
622}
623
624/// HTTP latency queries
625fn probe_handler(
626    _r: Request<Incoming>,
627    response: ResponseBuilder,
628) -> HyperResult<Response<BytesBody>> {
629    response
630        .status(StatusCode::OK)
631        .header("Access-Control-Allow-Origin", "*")
632        .body(body_empty())
633        .map_err(|err| Box::new(err) as HyperError)
634}
635
636fn robots_handler(
637    _r: Request<Incoming>,
638    response: ResponseBuilder,
639) -> HyperResult<Response<BytesBody>> {
640    response
641        .status(StatusCode::OK)
642        .body(ROBOTS_TXT.into())
643        .map_err(|err| Box::new(err) as HyperError)
644}
645
646/// For captive portal detection.
647fn serve_no_content_handler<B: hyper::body::Body>(
648    r: Request<B>,
649    mut response: ResponseBuilder,
650) -> HyperResult<Response<BytesBody>> {
651    let check = |c: &HeaderValue| {
652        !c.is_empty() && c.len() < 64 && c.as_bytes().iter().all(|c| is_challenge_char(*c as char))
653    };
654
655    if let Some(challenge) = r.headers().get(NO_CONTENT_CHALLENGE_HEADER)
656        && check(challenge)
657    {
658        response = response.header(
659            NO_CONTENT_RESPONSE_HEADER,
660            format!("response {}", challenge.to_str()?),
661        );
662    }
663
664    response
665        .status(StatusCode::NO_CONTENT)
666        .body(body_empty())
667        .map_err(|err| Box::new(err) as HyperError)
668}
669
670fn is_challenge_char(c: char) -> bool {
671    // Semi-randomly chosen as a limited set of valid characters
672    c.is_ascii_lowercase()
673        || c.is_ascii_uppercase()
674        || c.is_ascii_digit()
675        || c == '.'
676        || c == '-'
677        || c == '_'
678}
679
680/// Health check response
681#[derive(Serialize)]
682struct Health {
683    status: &'static str,
684    version: &'static str,
685    git_hash: &'static str,
686}
687
688fn healthz_handler(
689    _r: Request<Incoming>,
690    response: ResponseBuilder,
691) -> HyperResult<Response<BytesBody>> {
692    let health = Health {
693        status: "ok",
694        version: env!("CARGO_PKG_VERSION"),
695        git_hash: option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"),
696    };
697    let body = serde_json::to_string(&health).unwrap_or_else(|_| r#"{"status":"error"}"#.into());
698    response
699        .status(StatusCode::OK)
700        .header("Content-Type", "application/json")
701        .body(body.into())
702        .map_err(|err| Box::new(err) as HyperError)
703}
704
705/// This is a future that never returns, drop it to cancel/abort.
706async fn run_captive_portal_service(http_listener: TcpListener) {
707    info!("serving");
708
709    // If this future is cancelled, this is dropped and all tasks are aborted.
710    let mut tasks = JoinSet::new();
711
712    loop {
713        tokio::select! {
714            biased;
715
716            Some(res) = tasks.join_next() => {
717                if let Err(err) = res
718                    && err.is_panic()
719                {
720                    panic!("task panicked: {err:#?}");
721                }
722            }
723
724            res = http_listener.accept() => {
725                match res {
726                    Ok((stream, peer_addr)) => {
727                        debug!(%peer_addr, "Connection opened",);
728                        let handler = CaptivePortalService;
729
730                        tasks.spawn(async move {
731                            let stream = crate::server::streams::MaybeTlsStream::Plain(stream);
732                            let stream = hyper_util::rt::TokioIo::new(stream);
733                            if let Err(err) = hyper::server::conn::http1::Builder::new()
734                                .serve_connection(stream, handler)
735                                .with_upgrades()
736                                .await
737                            {
738                                error!("Failed to serve connection: {err:?}");
739                            }
740                        });
741                    }
742                    Err(err) => {
743                        error!(
744                            "[CaptivePortalService] failed to accept connection: {:#?}",
745                            err
746                        );
747                    }
748                }
749            }
750        }
751    }
752}
753
754#[derive(Clone)]
755struct CaptivePortalService;
756
757impl hyper::service::Service<Request<Incoming>> for CaptivePortalService {
758    type Response = Response<BytesBody>;
759    type Error = HyperError;
760    type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
761
762    fn call(&self, req: Request<Incoming>) -> Self::Future {
763        match (req.method(), req.uri().path()) {
764            // Captive Portal checker
765            (&Method::GET, "/generate_204") => {
766                Box::pin(async move { serve_no_content_handler(req, Response::builder()) })
767            }
768            _ => {
769                // Return 404 not found response.
770                let r = Response::builder()
771                    .status(StatusCode::NOT_FOUND)
772                    .body(NOTFOUND.into())
773                    .map_err(|err| Box::new(err) as HyperError);
774                Box::pin(async move { r })
775            }
776        }
777    }
778}
779
780#[cfg(test)]
781mod tests {
782    use std::{net::Ipv4Addr, time::Duration};
783
784    use http::StatusCode;
785    use iroh_base::{EndpointId, RelayUrl, SecretKey};
786    use n0_error::Result;
787    use n0_future::{FutureExt, SinkExt, StreamExt};
788    use n0_tracing_test::traced_test;
789    use rand::SeedableRng;
790    use tracing::{info, instrument};
791
792    use super::{
793        Access, AccessConfig, NO_CONTENT_CHALLENGE_HEADER, NO_CONTENT_RESPONSE_HEADER, RelayConfig,
794        Server, ServerConfig, SpawnError,
795    };
796    use crate::{
797        client::{ClientBuilder, ConnectError},
798        dns::DnsResolver,
799        protos::{
800            handshake,
801            relay::{ClientToRelayMsg, Datagrams, RelayToClientMsg},
802        },
803    };
804
805    async fn spawn_local_relay() -> std::result::Result<Server, SpawnError> {
806        Server::spawn(ServerConfig::<(), ()> {
807            relay: Some(RelayConfig::<(), ()> {
808                http_bind_addr: (Ipv4Addr::LOCALHOST, 0).into(),
809                tls: None,
810                limits: Default::default(),
811                key_cache_capacity: Some(1024),
812                access: AccessConfig::Everyone,
813            }),
814            quic: None,
815            metrics_addr: None,
816        })
817        .await
818    }
819
820    #[instrument]
821    async fn try_send_recv(
822        client_a: &mut crate::client::Client,
823        client_b: &mut crate::client::Client,
824        b_key: EndpointId,
825        msg: Datagrams,
826    ) -> Result<RelayToClientMsg> {
827        // try resend 10 times
828        for _ in 0..10 {
829            client_a
830                .send(ClientToRelayMsg::Datagrams {
831                    dst_endpoint_id: b_key,
832                    datagrams: msg.clone(),
833                })
834                .await?;
835            let Ok(res) = tokio::time::timeout(Duration::from_millis(500), client_b.next()).await
836            else {
837                continue;
838            };
839            let res = res.expect("stream finished")?;
840            return Ok(res);
841        }
842        panic!("failed to send and recv message");
843    }
844
845    fn dns_resolver() -> DnsResolver {
846        DnsResolver::new()
847    }
848
849    #[tokio::test]
850    #[traced_test]
851    async fn test_no_services() {
852        let mut server = Server::spawn(ServerConfig::<(), ()>::default())
853            .await
854            .unwrap();
855        let res = tokio::time::timeout(Duration::from_secs(5), server.task_handle())
856            .await
857            .expect("timeout, server not finished")
858            .expect("server task JoinError");
859        assert!(res.is_err());
860    }
861
862    #[tokio::test]
863    #[traced_test]
864    async fn test_conflicting_bind() {
865        let mut server = Server::spawn(ServerConfig::<(), ()> {
866            relay: Some(RelayConfig {
867                http_bind_addr: (Ipv4Addr::LOCALHOST, 1234).into(),
868                tls: None,
869                limits: Default::default(),
870                key_cache_capacity: Some(1024),
871                access: AccessConfig::Everyone,
872            }),
873            quic: None,
874            metrics_addr: Some((Ipv4Addr::LOCALHOST, 1234).into()),
875        })
876        .await
877        .unwrap();
878        let res = tokio::time::timeout(Duration::from_secs(5), server.task_handle())
879            .await
880            .expect("timeout, server not finished")
881            .expect("server task JoinError");
882        assert!(res.is_err()); // AddrInUse
883    }
884
885    #[tokio::test]
886    #[traced_test]
887    async fn test_root_handler() {
888        let server = spawn_local_relay().await.unwrap();
889        let url = format!("http://{}", server.http_addr().unwrap());
890
891        let client = reqwest::Client::builder().use_rustls_tls().build().unwrap();
892        let response = client.get(&url).send().await.unwrap();
893        assert_eq!(response.status(), 200);
894        let body = response.text().await.unwrap();
895        assert!(body.contains("iroh.computer"));
896    }
897
898    #[tokio::test]
899    #[traced_test]
900    async fn test_captive_portal_service() {
901        let server = spawn_local_relay().await.unwrap();
902        let url = format!("http://{}/generate_204", server.http_addr().unwrap());
903        let challenge = "123az__.";
904
905        let client = reqwest::Client::builder().use_rustls_tls().build().unwrap();
906        let response = client
907            .get(&url)
908            .header(NO_CONTENT_CHALLENGE_HEADER, challenge)
909            .send()
910            .await
911            .unwrap();
912        assert_eq!(response.status(), StatusCode::NO_CONTENT);
913        let header = response.headers().get(NO_CONTENT_RESPONSE_HEADER).unwrap();
914        assert_eq!(header.to_str().unwrap(), format!("response {challenge}"));
915        let body = response.text().await.unwrap();
916        assert!(body.is_empty());
917    }
918
919    #[tokio::test]
920    #[traced_test]
921    async fn test_relay_clients() -> Result<()> {
922        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64);
923        let server = spawn_local_relay().await?;
924
925        let relay_url = format!("http://{}", server.http_addr().unwrap());
926        let relay_url: RelayUrl = relay_url.parse()?;
927
928        // set up client a
929        let a_secret_key = SecretKey::generate(&mut rng);
930        let a_key = a_secret_key.public();
931        let resolver = dns_resolver();
932        info!("client a build & connect");
933        let mut client_a = ClientBuilder::new(relay_url.clone(), a_secret_key, resolver.clone())
934            .connect()
935            .await?;
936
937        // set up client b
938        let b_secret_key = SecretKey::generate(&mut rng);
939        let b_key = b_secret_key.public();
940        info!("client b build & connect");
941        let mut client_b = ClientBuilder::new(relay_url.clone(), b_secret_key, resolver.clone())
942            .connect()
943            .await?;
944
945        info!("sending a -> b");
946
947        // send message from a to b
948        let msg = Datagrams::from("hello, b");
949        let res = try_send_recv(&mut client_a, &mut client_b, b_key, msg.clone()).await?;
950        let RelayToClientMsg::Datagrams {
951            remote_endpoint_id,
952            datagrams,
953        } = res
954        else {
955            panic!("client_b received unexpected message {res:?}");
956        };
957
958        assert_eq!(a_key, remote_endpoint_id);
959        assert_eq!(msg, datagrams);
960
961        info!("sending b -> a");
962        // send message from b to a
963        let msg = Datagrams::from("howdy, a");
964        let res = try_send_recv(&mut client_b, &mut client_a, a_key, msg.clone()).await?;
965
966        let RelayToClientMsg::Datagrams {
967            remote_endpoint_id,
968            datagrams,
969        } = res
970        else {
971            panic!("client_a received unexpected message {res:?}");
972        };
973
974        assert_eq!(b_key, remote_endpoint_id);
975        assert_eq!(msg, datagrams);
976
977        Ok(())
978    }
979
980    #[tokio::test]
981    #[traced_test]
982    async fn test_relay_access_control() -> Result<()> {
983        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64);
984        let current_span = tracing::info_span!("this is a test");
985        let _guard = current_span.enter();
986
987        let a_secret_key = SecretKey::generate(&mut rng);
988        let a_key = a_secret_key.public();
989
990        let server = Server::spawn(ServerConfig::<(), ()> {
991            relay: Some(RelayConfig::<(), ()> {
992                http_bind_addr: (Ipv4Addr::LOCALHOST, 0).into(),
993                tls: None,
994                limits: Default::default(),
995                key_cache_capacity: Some(1024),
996                access: AccessConfig::Restricted(Box::new(move |endpoint_id| {
997                    async move {
998                        info!("checking {}", endpoint_id);
999                        // reject endpoint a
1000                        if endpoint_id == a_key {
1001                            Access::Deny
1002                        } else {
1003                            Access::Allow
1004                        }
1005                    }
1006                    .boxed()
1007                })),
1008            }),
1009            quic: None,
1010            metrics_addr: None,
1011        })
1012        .await?;
1013
1014        let relay_url = format!("http://{}", server.http_addr().unwrap());
1015        let relay_url: RelayUrl = relay_url.parse()?;
1016
1017        // set up client a
1018        let resolver = dns_resolver();
1019        let result = ClientBuilder::new(relay_url.clone(), a_secret_key, resolver)
1020            .connect()
1021            .await;
1022
1023        assert!(
1024            matches!(result, Err(ConnectError::Handshake { source: handshake::Error::ServerDeniedAuth { reason, .. }, .. }) if reason == "not authorized")
1025        );
1026
1027        // test that another client has access
1028
1029        // set up client b
1030        let b_secret_key = SecretKey::generate(&mut rng);
1031        let b_key = b_secret_key.public();
1032
1033        let resolver = dns_resolver();
1034        let mut client_b = ClientBuilder::new(relay_url.clone(), b_secret_key, resolver)
1035            .connect()
1036            .await?;
1037
1038        // set up client c
1039        let c_secret_key = SecretKey::generate(&mut rng);
1040        let c_key = c_secret_key.public();
1041
1042        let resolver = dns_resolver();
1043        let mut client_c = ClientBuilder::new(relay_url.clone(), c_secret_key, resolver)
1044            .connect()
1045            .await?;
1046
1047        // send message from b to c
1048        let msg = Datagrams::from("hello, c");
1049        let res = try_send_recv(&mut client_b, &mut client_c, c_key, msg.clone()).await?;
1050
1051        if let RelayToClientMsg::Datagrams {
1052            remote_endpoint_id,
1053            datagrams,
1054        } = res
1055        {
1056            assert_eq!(b_key, remote_endpoint_id);
1057            assert_eq!(msg, datagrams);
1058        } else {
1059            panic!("client_c received unexpected message {res:?}");
1060        }
1061
1062        Ok(())
1063    }
1064
1065    #[tokio::test]
1066    #[traced_test]
1067    async fn test_relay_clients_full() -> Result<()> {
1068        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64);
1069        let server = spawn_local_relay().await.unwrap();
1070        let relay_url = format!("http://{}", server.http_addr().unwrap());
1071        let relay_url: RelayUrl = relay_url.parse().unwrap();
1072
1073        // set up client a
1074        let a_secret_key = SecretKey::generate(&mut rng);
1075        let resolver = dns_resolver();
1076        let mut client_a = ClientBuilder::new(relay_url.clone(), a_secret_key, resolver.clone())
1077            .connect()
1078            .await?;
1079
1080        // set up client b
1081        let b_secret_key = SecretKey::generate(&mut rng);
1082        let b_key = b_secret_key.public();
1083        let _client_b = ClientBuilder::new(relay_url.clone(), b_secret_key, resolver.clone())
1084            .connect()
1085            .await?;
1086
1087        // send messages from a to b, without b receiving anything.
1088        // we should still keep succeeding to send, even if the packet won't be forwarded
1089        // by the relay server because the server's send queue for b fills up.
1090        let msg = Datagrams::from("hello, b");
1091        for _i in 0..1000 {
1092            client_a
1093                .send(ClientToRelayMsg::Datagrams {
1094                    dst_endpoint_id: b_key,
1095                    datagrams: msg.clone(),
1096                })
1097                .await?;
1098        }
1099        Ok(())
1100    }
1101}