iroh_quinn_udp/lib.rs
1//! Uniform interface to send and receive UDP packets with advanced features useful for QUIC
2//!
3//! This crate exposes kernel UDP stack features available on most modern systems which are required
4//! for an efficient and conformant QUIC implementation. As of this writing, these are not available
5//! in std or major async runtimes, and their niche character and complexity are a barrier to adding
6//! them. Hence, a dedicated crate.
7//!
8//! Exposed features include:
9//!
10//! - Segmentation offload for bulk send and receive operations, reducing CPU load.
11//! - Reporting the exact destination address of received packets and specifying explicit source
12//! addresses for sent packets, allowing responses to be sent from the address that the peer
13//! expects when there are multiple possibilities. This is common when bound to a wildcard address
14//! in IPv6 due to [RFC 8981] temporary addresses.
15//! - [Explicit Congestion Notification], which is required by QUIC to prevent packet loss and reduce
16//! latency on congested links when supported by the network path.
17//! - Disabled IP-layer fragmentation, which allows the true physical MTU to be detected and reduces
18//! risk of QUIC packet loss.
19//!
20//! Some features are unavailable in some environments. This can be due to an outdated operating
21//! system or drivers. Some operating systems may not implement desired features at all, or may not
22//! yet be supported by the crate. When support is unavailable, functionality will gracefully
23//! degrade.
24//!
25//! [RFC 8981]: https://www.rfc-editor.org/rfc/rfc8981.html
26//! [Explicit Congestion Notification]: https://www.rfc-editor.org/rfc/rfc3168.html
27#![warn(unreachable_pub)]
28#![warn(clippy::use_self)]
29
30use std::net::{IpAddr, Ipv6Addr, SocketAddr};
31#[cfg(unix)]
32use std::os::unix::io::AsFd;
33#[cfg(windows)]
34use std::os::windows::io::AsSocket;
35#[cfg(not(wasm_browser))]
36use std::{
37 sync::Mutex,
38 time::{Duration, Instant},
39};
40
41#[cfg(any(unix, windows))]
42mod cmsg;
43
44#[cfg(unix)]
45#[path = "unix.rs"]
46mod imp;
47
48#[cfg(windows)]
49#[path = "windows.rs"]
50mod imp;
51
52// No ECN support
53#[cfg(not(any(wasm_browser, unix, windows)))]
54#[path = "fallback.rs"]
55mod imp;
56
57#[allow(unused_imports, unused_macros)]
58mod log {
59 #[cfg(all(feature = "log", not(feature = "tracing-log")))]
60 pub(crate) use log::{debug, error, info, trace, warn};
61
62 #[cfg(feature = "tracing-log")]
63 pub(crate) use tracing::{debug, error, info, trace, warn};
64
65 #[cfg(not(any(feature = "log", feature = "tracing-log")))]
66 mod no_op {
67 macro_rules! trace ( ($($tt:tt)*) => {{}} );
68 macro_rules! debug ( ($($tt:tt)*) => {{}} );
69 macro_rules! info ( ($($tt:tt)*) => {{}} );
70 macro_rules! log_warn ( ($($tt:tt)*) => {{}} );
71 macro_rules! error ( ($($tt:tt)*) => {{}} );
72
73 pub(crate) use {debug, error, info, log_warn as warn, trace};
74 }
75
76 #[cfg(not(any(feature = "log", feature = "tracing-log")))]
77 pub(crate) use no_op::*;
78}
79
80#[cfg(not(wasm_browser))]
81pub use imp::UdpSocketState;
82
83/// Number of UDP packets to send/receive at a time
84#[cfg(not(wasm_browser))]
85pub const BATCH_SIZE: usize = imp::BATCH_SIZE;
86/// Number of UDP packets to send/receive at a time
87#[cfg(wasm_browser)]
88pub const BATCH_SIZE: usize = 1;
89
90/// Metadata for a single buffer filled with bytes received from the network
91///
92/// This associated buffer can contain one or more datagrams, see [`stride`].
93///
94/// [`stride`]: RecvMeta::stride
95#[derive(Debug, Copy, Clone)]
96#[non_exhaustive]
97pub struct RecvMeta {
98 /// The source address of the datagram(s) contained in the buffer
99 pub addr: SocketAddr,
100 /// The number of bytes the associated buffer has
101 pub len: usize,
102 /// The size of a single datagram in the associated buffer
103 ///
104 /// When GRO (Generic Receive Offload) is used this indicates the size of a single
105 /// datagram inside the buffer. If the buffer is larger, that is if [`len`] is greater
106 /// then this value, then the individual datagrams contained have their boundaries at
107 /// `stride` increments from the start. The last datagram could be smaller than
108 /// `stride`.
109 ///
110 /// [`len`]: RecvMeta::len
111 pub stride: usize,
112 /// The Explicit Congestion Notification bits for the datagram(s) in the buffer
113 pub ecn: Option<EcnCodepoint>,
114 /// The destination IP address which was encoded in this datagram
115 ///
116 /// Populated on platforms: Windows, Linux, Android (API level > 25),
117 /// FreeBSD, OpenBSD, NetBSD, macOS, and iOS.
118 pub dst_ip: Option<IpAddr>,
119 /// The interface index of the interface on which the datagram was received
120 pub interface_index: Option<u32>,
121}
122
123impl Default for RecvMeta {
124 /// Constructs a value with arbitrary fields, intended to be overwritten
125 fn default() -> Self {
126 Self {
127 addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 0),
128 len: 0,
129 stride: 0,
130 ecn: None,
131 dst_ip: None,
132 interface_index: None,
133 }
134 }
135}
136
137/// An outgoing packet
138#[derive(Debug, Clone)]
139pub struct Transmit<'a> {
140 /// The socket this datagram should be sent to
141 pub destination: SocketAddr,
142 /// Explicit congestion notification bits to set on the packet
143 pub ecn: Option<EcnCodepoint>,
144 /// Contents of the datagram
145 pub contents: &'a [u8],
146 /// The segment size if this transmission contains multiple datagrams.
147 /// This is `None` if the transmit only contains a single datagram
148 pub segment_size: Option<usize>,
149 /// Optional source IP address for the datagram
150 pub src_ip: Option<IpAddr>,
151}
152
153impl Transmit<'_> {
154 /// Computes the effective segment-size of the packet.
155 ///
156 /// Some (older) network drivers don't like being told to do GSO even if
157 /// there is effectively only a single segment.
158 /// (i.e. `segment_size == contents.len()`)
159 /// Additionally, a `segment_size` that is greater than the content also
160 /// means there is effectively only a single segment.
161 /// This case is actually quite common when splitting up a prepared GSO batch
162 /// again after GSO has been disabled because the last datagram in a GSO
163 /// batch is allowed to be smaller than the segment size.
164 fn effective_segment_size(&self) -> Option<usize> {
165 match self.segment_size? {
166 size if size >= self.contents.len() => None,
167 size => Some(size),
168 }
169 }
170}
171
172/// Log at most 1 IO error per minute
173#[cfg(not(wasm_browser))]
174const IO_ERROR_LOG_INTERVAL: Duration = std::time::Duration::from_secs(60);
175
176/// Logs a warning message when sendmsg fails
177///
178/// Logging will only be performed if at least [`IO_ERROR_LOG_INTERVAL`]
179/// has elapsed since the last error was logged.
180#[cfg(all(not(wasm_browser), any(feature = "tracing-log", feature = "log")))]
181fn log_sendmsg_error(
182 last_send_error: &Mutex<Instant>,
183 err: impl core::fmt::Debug,
184 transmit: &Transmit<'_>,
185) {
186 let now = Instant::now();
187 let last_send_error = &mut *last_send_error.lock().expect("poisend lock");
188 if now.saturating_duration_since(*last_send_error) > IO_ERROR_LOG_INTERVAL {
189 *last_send_error = now;
190 log::warn!(
191 "sendmsg error: {:?}, Transmit: {{ destination: {:?}, src_ip: {:?}, ecn: {:?}, len: {:?}, segment_size: {:?} }}",
192 err,
193 transmit.destination,
194 transmit.src_ip,
195 transmit.ecn,
196 transmit.contents.len(),
197 transmit.segment_size
198 );
199 }
200}
201
202// No-op
203#[cfg(not(any(wasm_browser, feature = "tracing-log", feature = "log")))]
204fn log_sendmsg_error(_: &Mutex<Instant>, _: impl core::fmt::Debug, _: &Transmit<'_>) {}
205
206/// A borrowed UDP socket
207///
208/// On Unix, constructible via `From<T: AsFd>`. On Windows, constructible via `From<T:
209/// AsSocket>`.
210// Wrapper around socket2 to avoid making it a public dependency and incurring stability risk
211#[cfg(not(wasm_browser))]
212pub struct UdpSockRef<'a>(socket2::SockRef<'a>);
213
214#[cfg(unix)]
215impl<'s, S> From<&'s S> for UdpSockRef<'s>
216where
217 S: AsFd,
218{
219 fn from(socket: &'s S) -> Self {
220 Self(socket.into())
221 }
222}
223
224#[cfg(windows)]
225impl<'s, S> From<&'s S> for UdpSockRef<'s>
226where
227 S: AsSocket,
228{
229 fn from(socket: &'s S) -> Self {
230 Self(socket.into())
231 }
232}
233
234/// Explicit congestion notification codepoint
235#[repr(u8)]
236#[derive(Debug, Copy, Clone, Eq, PartialEq)]
237pub enum EcnCodepoint {
238 /// The ECT(0) codepoint, indicating that an endpoint is ECN-capable
239 Ect0 = 0b10,
240 /// The ECT(1) codepoint, indicating that an endpoint is ECN-capable
241 Ect1 = 0b01,
242 /// The CE codepoint, signalling that congestion was experienced
243 Ce = 0b11,
244}
245
246impl EcnCodepoint {
247 /// Create new object from the given bits
248 pub fn from_bits(x: u8) -> Option<Self> {
249 use EcnCodepoint::*;
250 Some(match x & 0b11 {
251 0b10 => Ect0,
252 0b01 => Ect1,
253 0b11 => Ce,
254 _ => {
255 return None;
256 }
257 })
258 }
259}
260
261#[cfg(test)]
262mod tests {
263 use std::net::Ipv4Addr;
264
265 use super::*;
266
267 #[test]
268 fn effective_segment_size() {
269 assert_eq!(
270 make_transmit(&[0u8; 10], Some(15)).effective_segment_size(),
271 None,
272 "segment_size > content_len should yield no effective segment_size"
273 );
274 assert_eq!(
275 make_transmit(&[0u8; 10], Some(10)).effective_segment_size(),
276 None,
277 "segment_size == content_len should yield no effective segment_size"
278 );
279 assert_eq!(
280 make_transmit(&[0u8; 10], None).effective_segment_size(),
281 None,
282 "no segment_size should yield no effective segment_size"
283 );
284 assert_eq!(
285 make_transmit(&[0u8; 10], Some(5)).effective_segment_size(),
286 Some(5),
287 "segment_size < content_len should yield effective segment_size"
288 );
289 }
290
291 fn make_transmit(contents: &[u8], segment_size: Option<usize>) -> Transmit<'_> {
292 Transmit {
293 destination: SocketAddr::from((Ipv4Addr::UNSPECIFIED, 1)),
294 ecn: None,
295 contents,
296 segment_size,
297 src_ip: None,
298 }
299 }
300}