noq_udp/
lib.rs

1//! Uniform interface to send and receive UDP packets with advanced features useful for QUIC
2//!
3//! This crate exposes kernel UDP stack features available on most modern systems which are required
4//! for an efficient and conformant QUIC implementation. As of this writing, these are not available
5//! in std or major async runtimes, and their niche character and complexity are a barrier to adding
6//! them. Hence, a dedicated crate.
7//!
8//! Exposed features include:
9//!
10//! - Segmentation offload for bulk send and receive operations, reducing CPU load.
11//! - Reporting the exact destination address of received packets and specifying explicit source
12//!   addresses for sent packets, allowing responses to be sent from the address that the peer
13//!   expects when there are multiple possibilities. This is common when bound to a wildcard address
14//!   in IPv6 due to [RFC 8981] temporary addresses.
15//! - [Explicit Congestion Notification], which is required by QUIC to prevent packet loss and reduce
16//!   latency on congested links when supported by the network path.
17//! - Disabled IP-layer fragmentation, which allows the true physical MTU to be detected and reduces
18//!   risk of QUIC packet loss.
19//!
20//! Some features are unavailable in some environments. This can be due to an outdated operating
21//! system or drivers. Some operating systems may not implement desired features at all, or may not
22//! yet be supported by the crate. When support is unavailable, functionality will gracefully
23//! degrade.
24//!
25//! [RFC 8981]: https://www.rfc-editor.org/rfc/rfc8981.html
26//! [Explicit Congestion Notification]: https://www.rfc-editor.org/rfc/rfc3168.html
27#![warn(unreachable_pub)]
28#![warn(clippy::use_self)]
29
30use std::net::{IpAddr, Ipv6Addr, SocketAddr};
31#[cfg(unix)]
32use std::os::unix::io::AsFd;
33#[cfg(windows)]
34use std::os::windows::io::AsSocket;
35#[cfg(not(wasm_browser))]
36use std::{
37    sync::Mutex,
38    time::{Duration, Instant},
39};
40
41#[cfg(any(all(unix, not(posix_minimal)), windows))]
42mod cmsg;
43
44#[cfg(all(unix, not(posix_minimal)))]
45#[path = "unix.rs"]
46mod imp;
47
48#[cfg(windows)]
49#[path = "windows.rs"]
50mod imp;
51
52// Minimal POSIX UDP for platforms without advanced socket APIs (cmsg, GSO, GRO)
53#[cfg(posix_minimal)]
54#[path = "posix_minimal.rs"]
55mod imp;
56
57#[allow(unused_imports, unused_macros)]
58mod log {
59    #[cfg(all(feature = "log", not(feature = "tracing-log")))]
60    pub(crate) use log::{debug, error, info, trace, warn};
61
62    #[cfg(feature = "tracing-log")]
63    pub(crate) use tracing::{debug, error, info, trace, warn};
64
65    #[cfg(not(any(feature = "log", feature = "tracing-log")))]
66    mod no_op {
67        macro_rules! trace    ( ($($tt:tt)*) => {{}} );
68        macro_rules! debug    ( ($($tt:tt)*) => {{}} );
69        macro_rules! info     ( ($($tt:tt)*) => {{}} );
70        macro_rules! log_warn ( ($($tt:tt)*) => {{}} );
71        macro_rules! error    ( ($($tt:tt)*) => {{}} );
72
73        pub(crate) use {debug, error, info, log_warn as warn, trace};
74    }
75
76    #[cfg(not(any(feature = "log", feature = "tracing-log")))]
77    pub(crate) use no_op::*;
78}
79
80#[cfg(not(wasm_browser))]
81pub use imp::UdpSocketState;
82
83/// Number of UDP packets to send/receive at a time
84#[cfg(not(wasm_browser))]
85pub const BATCH_SIZE: usize = imp::BATCH_SIZE;
86/// Number of UDP packets to send/receive at a time
87#[cfg(wasm_browser)]
88pub const BATCH_SIZE: usize = 1;
89
90/// Metadata for a single buffer filled with bytes received from the network
91///
92/// This associated buffer can contain one or more datagrams, see [`stride`].
93///
94/// [`stride`]: RecvMeta::stride
95#[derive(Debug, Copy, Clone)]
96#[non_exhaustive]
97pub struct RecvMeta {
98    /// The source address of the datagram(s) contained in the buffer
99    pub addr: SocketAddr,
100    /// The number of bytes the associated buffer has
101    pub len: usize,
102    /// The size of a single datagram in the associated buffer
103    ///
104    /// When GRO (Generic Receive Offload) is used this indicates the size of a single
105    /// datagram inside the buffer. If the buffer is larger, that is if [`len`] is greater
106    /// then this value, then the individual datagrams contained have their boundaries at
107    /// `stride` increments from the start. The last datagram could be smaller than
108    /// `stride`.
109    ///
110    /// [`len`]: RecvMeta::len
111    pub stride: usize,
112    /// The Explicit Congestion Notification bits for the datagram(s) in the buffer
113    pub ecn: Option<EcnCodepoint>,
114    /// The destination IP address which was encoded in this datagram
115    ///
116    /// Populated on platforms: Windows (except under Wine), Linux, Android
117    /// (API level > 25), FreeBSD, OpenBSD, NetBSD, macOS, and iOS.
118    pub dst_ip: Option<IpAddr>,
119    /// The interface index of the interface on which the datagram was received
120    pub interface_index: Option<u32>,
121}
122
123impl Default for RecvMeta {
124    /// Constructs a value with arbitrary fields, intended to be overwritten
125    fn default() -> Self {
126        Self {
127            addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 0),
128            len: 0,
129            stride: 0,
130            ecn: None,
131            dst_ip: None,
132            interface_index: None,
133        }
134    }
135}
136
137/// An outgoing packet
138#[derive(Debug, Clone)]
139pub struct Transmit<'a> {
140    /// The socket this datagram should be sent to
141    pub destination: SocketAddr,
142    /// Explicit congestion notification bits to set on the packet
143    pub ecn: Option<EcnCodepoint>,
144    /// Contents of the datagram
145    pub contents: &'a [u8],
146    /// The segment size if this transmission contains multiple datagrams.
147    /// This is `None` if the transmit only contains a single datagram
148    pub segment_size: Option<usize>,
149    /// Optional source IP address for the datagram
150    pub src_ip: Option<IpAddr>,
151}
152
153#[cfg(not(posix_minimal))]
154impl Transmit<'_> {
155    /// Computes the effective segment-size of the packet.
156    ///
157    /// Some (older) network drivers don't like being told to do GSO even if
158    /// there is effectively only a single segment.
159    /// (i.e. `segment_size == contents.len()`)
160    /// Additionally, a `segment_size` that is greater than the content also
161    /// means there is effectively only a single segment.
162    /// This case is actually quite common when splitting up a prepared GSO batch
163    /// again after GSO has been disabled because the last datagram in a GSO
164    /// batch is allowed to be smaller than the segment size.
165    #[cfg_attr(apple_fast, allow(dead_code))] // Used by prepare_msg, which is unused when apple_fast
166    fn effective_segment_size(&self) -> Option<usize> {
167        match self.segment_size? {
168            size if size >= self.contents.len() => None,
169            size => Some(size),
170        }
171    }
172}
173
174/// Log at most 1 IO error per minute
175#[cfg(not(wasm_browser))]
176const IO_ERROR_LOG_INTERVAL: Duration = std::time::Duration::from_secs(60);
177
178/// Logs a warning message when sendmsg fails
179///
180/// Logging will only be performed if at least [`IO_ERROR_LOG_INTERVAL`]
181/// has elapsed since the last error was logged.
182#[cfg(all(not(wasm_browser), any(feature = "tracing-log", feature = "log")))]
183fn log_sendmsg_error(
184    last_send_error: &Mutex<Instant>,
185    err: impl core::fmt::Debug,
186    transmit: &Transmit<'_>,
187) {
188    let now = Instant::now();
189    let last_send_error = &mut *last_send_error.lock().expect("poisend lock");
190    if now.saturating_duration_since(*last_send_error) > IO_ERROR_LOG_INTERVAL {
191        *last_send_error = now;
192        log::warn!(
193            "sendmsg error: {:?}, Transmit: {{ destination: {:?}, src_ip: {:?}, ecn: {:?}, len: {:?}, segment_size: {:?} }}",
194            err,
195            transmit.destination,
196            transmit.src_ip,
197            transmit.ecn,
198            transmit.contents.len(),
199            transmit.segment_size
200        );
201    }
202}
203
204// No-op
205#[cfg(not(any(wasm_browser, feature = "tracing-log", feature = "log")))]
206fn log_sendmsg_error(_: &Mutex<Instant>, _: impl core::fmt::Debug, _: &Transmit<'_>) {}
207
208/// A borrowed UDP socket
209///
210/// On Unix, constructible via `From<T: AsFd>`. On Windows, constructible via `From<T:
211/// AsSocket>`.
212// Wrapper around socket2 to avoid making it a public dependency and incurring stability risk
213#[cfg(not(wasm_browser))]
214pub struct UdpSockRef<'a>(socket2::SockRef<'a>);
215
216#[cfg(unix)]
217impl<'s, S> From<&'s S> for UdpSockRef<'s>
218where
219    S: AsFd,
220{
221    fn from(socket: &'s S) -> Self {
222        Self(socket.into())
223    }
224}
225
226#[cfg(windows)]
227impl<'s, S> From<&'s S> for UdpSockRef<'s>
228where
229    S: AsSocket,
230{
231    fn from(socket: &'s S) -> Self {
232        Self(socket.into())
233    }
234}
235
236/// Explicit congestion notification codepoint
237#[repr(u8)]
238#[derive(Debug, Copy, Clone, Eq, PartialEq)]
239pub enum EcnCodepoint {
240    /// The ECT(0) codepoint, indicating that an endpoint is ECN-capable
241    Ect0 = 0b10,
242    /// The ECT(1) codepoint, indicating that an endpoint is ECN-capable
243    Ect1 = 0b01,
244    /// The CE codepoint, signalling that congestion was experienced
245    Ce = 0b11,
246}
247
248impl EcnCodepoint {
249    /// Create new object from the given bits
250    pub fn from_bits(x: u8) -> Option<Self> {
251        use EcnCodepoint::*;
252        Some(match x & 0b11 {
253            0b10 => Ect0,
254            0b01 => Ect1,
255            0b11 => Ce,
256            _ => {
257                return None;
258            }
259        })
260    }
261}
262
263#[cfg(all(test, not(posix_minimal)))]
264mod tests {
265    use std::net::Ipv4Addr;
266
267    use super::*;
268
269    #[test]
270    fn effective_segment_size() {
271        assert_eq!(
272            make_transmit(&[0u8; 10], Some(15)).effective_segment_size(),
273            None,
274            "segment_size > content_len should yield no effective segment_size"
275        );
276        assert_eq!(
277            make_transmit(&[0u8; 10], Some(10)).effective_segment_size(),
278            None,
279            "segment_size == content_len should yield no effective segment_size"
280        );
281        assert_eq!(
282            make_transmit(&[0u8; 10], None).effective_segment_size(),
283            None,
284            "no segment_size should yield no effective segment_size"
285        );
286        assert_eq!(
287            make_transmit(&[0u8; 10], Some(5)).effective_segment_size(),
288            Some(5),
289            "segment_size < content_len should yield effective segment_size"
290        );
291    }
292
293    fn make_transmit(contents: &[u8], segment_size: Option<usize>) -> Transmit<'_> {
294        Transmit {
295            destination: SocketAddr::from((Ipv4Addr::UNSPECIFIED, 1)),
296            ecn: None,
297            contents,
298            segment_size,
299            src_ip: None,
300        }
301    }
302}