noq_udp/lib.rs
1//! Uniform interface to send and receive UDP packets with advanced features useful for QUIC
2//!
3//! This crate exposes kernel UDP stack features available on most modern systems which are required
4//! for an efficient and conformant QUIC implementation. As of this writing, these are not available
5//! in std or major async runtimes, and their niche character and complexity are a barrier to adding
6//! them. Hence, a dedicated crate.
7//!
8//! Exposed features include:
9//!
10//! - Segmentation offload for bulk send and receive operations, reducing CPU load.
11//! - Reporting the exact destination address of received packets and specifying explicit source
12//! addresses for sent packets, allowing responses to be sent from the address that the peer
13//! expects when there are multiple possibilities. This is common when bound to a wildcard address
14//! in IPv6 due to [RFC 8981] temporary addresses.
15//! - [Explicit Congestion Notification], which is required by QUIC to prevent packet loss and reduce
16//! latency on congested links when supported by the network path.
17//! - Disabled IP-layer fragmentation, which allows the true physical MTU to be detected and reduces
18//! risk of QUIC packet loss.
19//!
20//! Some features are unavailable in some environments. This can be due to an outdated operating
21//! system or drivers. Some operating systems may not implement desired features at all, or may not
22//! yet be supported by the crate. When support is unavailable, functionality will gracefully
23//! degrade.
24//!
25//! [RFC 8981]: https://www.rfc-editor.org/rfc/rfc8981.html
26//! [Explicit Congestion Notification]: https://www.rfc-editor.org/rfc/rfc3168.html
27#![warn(unreachable_pub)]
28#![warn(clippy::use_self)]
29
30use std::net::{IpAddr, Ipv6Addr, SocketAddr};
31#[cfg(unix)]
32use std::os::unix::io::AsFd;
33#[cfg(windows)]
34use std::os::windows::io::AsSocket;
35#[cfg(not(wasm_browser))]
36use std::{
37 sync::Mutex,
38 time::{Duration, Instant},
39};
40
41#[cfg(any(all(unix, not(posix_minimal)), windows))]
42mod cmsg;
43
44#[cfg(all(unix, not(posix_minimal)))]
45#[path = "unix.rs"]
46mod imp;
47
48#[cfg(windows)]
49#[path = "windows.rs"]
50mod imp;
51
52// Minimal POSIX UDP for platforms without advanced socket APIs (cmsg, GSO, GRO)
53#[cfg(posix_minimal)]
54#[path = "posix_minimal.rs"]
55mod imp;
56
57#[allow(unused_imports, unused_macros)]
58mod log {
59 #[cfg(all(feature = "log", not(feature = "tracing-log")))]
60 pub(crate) use log::{debug, error, info, trace, warn};
61
62 #[cfg(feature = "tracing-log")]
63 pub(crate) use tracing::{debug, error, info, trace, warn};
64
65 #[cfg(not(any(feature = "log", feature = "tracing-log")))]
66 mod no_op {
67 macro_rules! trace ( ($($tt:tt)*) => {{}} );
68 macro_rules! debug ( ($($tt:tt)*) => {{}} );
69 macro_rules! info ( ($($tt:tt)*) => {{}} );
70 macro_rules! log_warn ( ($($tt:tt)*) => {{}} );
71 macro_rules! error ( ($($tt:tt)*) => {{}} );
72
73 pub(crate) use {debug, error, info, log_warn as warn, trace};
74 }
75
76 #[cfg(not(any(feature = "log", feature = "tracing-log")))]
77 pub(crate) use no_op::*;
78}
79
80#[cfg(not(wasm_browser))]
81pub use imp::UdpSocketState;
82
83/// Number of UDP packets to send/receive at a time
84#[cfg(not(wasm_browser))]
85pub const BATCH_SIZE: usize = imp::BATCH_SIZE;
86/// Number of UDP packets to send/receive at a time
87#[cfg(wasm_browser)]
88pub const BATCH_SIZE: usize = 1;
89
90/// Metadata for a single buffer filled with bytes received from the network
91///
92/// This associated buffer can contain one or more datagrams, see [`stride`].
93///
94/// [`stride`]: RecvMeta::stride
95#[derive(Debug, Copy, Clone)]
96#[non_exhaustive]
97pub struct RecvMeta {
98 /// The source address of the datagram(s) contained in the buffer
99 pub addr: SocketAddr,
100 /// The number of bytes the associated buffer has
101 pub len: usize,
102 /// The size of a single datagram in the associated buffer
103 ///
104 /// When GRO (Generic Receive Offload) is used this indicates the size of a single
105 /// datagram inside the buffer. If the buffer is larger, that is if [`len`] is greater
106 /// then this value, then the individual datagrams contained have their boundaries at
107 /// `stride` increments from the start. The last datagram could be smaller than
108 /// `stride`.
109 ///
110 /// [`len`]: RecvMeta::len
111 pub stride: usize,
112 /// The Explicit Congestion Notification bits for the datagram(s) in the buffer
113 pub ecn: Option<EcnCodepoint>,
114 /// The destination IP address which was encoded in this datagram
115 ///
116 /// Populated on platforms: Windows (except under Wine), Linux, Android
117 /// (API level > 25), FreeBSD, OpenBSD, NetBSD, macOS, and iOS.
118 pub dst_ip: Option<IpAddr>,
119 /// The interface index of the interface on which the datagram was received
120 pub interface_index: Option<u32>,
121}
122
123impl Default for RecvMeta {
124 /// Constructs a value with arbitrary fields, intended to be overwritten
125 fn default() -> Self {
126 Self {
127 addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 0),
128 len: 0,
129 stride: 0,
130 ecn: None,
131 dst_ip: None,
132 interface_index: None,
133 }
134 }
135}
136
137/// An outgoing packet
138#[derive(Debug, Clone)]
139pub struct Transmit<'a> {
140 /// The socket this datagram should be sent to
141 pub destination: SocketAddr,
142 /// Explicit congestion notification bits to set on the packet
143 pub ecn: Option<EcnCodepoint>,
144 /// Contents of the datagram
145 pub contents: &'a [u8],
146 /// The segment size if this transmission contains multiple datagrams.
147 /// This is `None` if the transmit only contains a single datagram
148 pub segment_size: Option<usize>,
149 /// Optional source IP address for the datagram
150 pub src_ip: Option<IpAddr>,
151}
152
153#[cfg(not(posix_minimal))]
154impl Transmit<'_> {
155 /// Computes the effective segment-size of the packet.
156 ///
157 /// Some (older) network drivers don't like being told to do GSO even if
158 /// there is effectively only a single segment.
159 /// (i.e. `segment_size == contents.len()`)
160 /// Additionally, a `segment_size` that is greater than the content also
161 /// means there is effectively only a single segment.
162 /// This case is actually quite common when splitting up a prepared GSO batch
163 /// again after GSO has been disabled because the last datagram in a GSO
164 /// batch is allowed to be smaller than the segment size.
165 #[cfg_attr(apple_fast, allow(dead_code))] // Used by prepare_msg, which is unused when apple_fast
166 fn effective_segment_size(&self) -> Option<usize> {
167 match self.segment_size? {
168 size if size >= self.contents.len() => None,
169 size => Some(size),
170 }
171 }
172}
173
174/// Log at most 1 IO error per minute
175#[cfg(not(wasm_browser))]
176const IO_ERROR_LOG_INTERVAL: Duration = std::time::Duration::from_secs(60);
177
178/// Logs a warning message when sendmsg fails
179///
180/// Logging will only be performed if at least [`IO_ERROR_LOG_INTERVAL`]
181/// has elapsed since the last error was logged.
182#[cfg(all(not(wasm_browser), any(feature = "tracing-log", feature = "log")))]
183fn log_sendmsg_error(
184 last_send_error: &Mutex<Instant>,
185 err: impl core::fmt::Debug,
186 transmit: &Transmit<'_>,
187) {
188 let now = Instant::now();
189 let last_send_error = &mut *last_send_error.lock().expect("poisend lock");
190 if now.saturating_duration_since(*last_send_error) > IO_ERROR_LOG_INTERVAL {
191 *last_send_error = now;
192 log::warn!(
193 "sendmsg error: {:?}, Transmit: {{ destination: {:?}, src_ip: {:?}, ecn: {:?}, len: {:?}, segment_size: {:?} }}",
194 err,
195 transmit.destination,
196 transmit.src_ip,
197 transmit.ecn,
198 transmit.contents.len(),
199 transmit.segment_size
200 );
201 }
202}
203
204// No-op
205#[cfg(not(any(wasm_browser, feature = "tracing-log", feature = "log")))]
206fn log_sendmsg_error(_: &Mutex<Instant>, _: impl core::fmt::Debug, _: &Transmit<'_>) {}
207
208/// A borrowed UDP socket
209///
210/// On Unix, constructible via `From<T: AsFd>`. On Windows, constructible via `From<T:
211/// AsSocket>`.
212// Wrapper around socket2 to avoid making it a public dependency and incurring stability risk
213#[cfg(not(wasm_browser))]
214pub struct UdpSockRef<'a>(socket2::SockRef<'a>);
215
216#[cfg(unix)]
217impl<'s, S> From<&'s S> for UdpSockRef<'s>
218where
219 S: AsFd,
220{
221 fn from(socket: &'s S) -> Self {
222 Self(socket.into())
223 }
224}
225
226#[cfg(windows)]
227impl<'s, S> From<&'s S> for UdpSockRef<'s>
228where
229 S: AsSocket,
230{
231 fn from(socket: &'s S) -> Self {
232 Self(socket.into())
233 }
234}
235
236/// Explicit congestion notification codepoint
237#[repr(u8)]
238#[derive(Debug, Copy, Clone, Eq, PartialEq)]
239pub enum EcnCodepoint {
240 /// The ECT(0) codepoint, indicating that an endpoint is ECN-capable
241 Ect0 = 0b10,
242 /// The ECT(1) codepoint, indicating that an endpoint is ECN-capable
243 Ect1 = 0b01,
244 /// The CE codepoint, signalling that congestion was experienced
245 Ce = 0b11,
246}
247
248impl EcnCodepoint {
249 /// Create new object from the given bits
250 pub fn from_bits(x: u8) -> Option<Self> {
251 use EcnCodepoint::*;
252 Some(match x & 0b11 {
253 0b10 => Ect0,
254 0b01 => Ect1,
255 0b11 => Ce,
256 _ => {
257 return None;
258 }
259 })
260 }
261}
262
263#[cfg(all(test, not(posix_minimal)))]
264mod tests {
265 use std::net::Ipv4Addr;
266
267 use super::*;
268
269 #[test]
270 fn effective_segment_size() {
271 assert_eq!(
272 make_transmit(&[0u8; 10], Some(15)).effective_segment_size(),
273 None,
274 "segment_size > content_len should yield no effective segment_size"
275 );
276 assert_eq!(
277 make_transmit(&[0u8; 10], Some(10)).effective_segment_size(),
278 None,
279 "segment_size == content_len should yield no effective segment_size"
280 );
281 assert_eq!(
282 make_transmit(&[0u8; 10], None).effective_segment_size(),
283 None,
284 "no segment_size should yield no effective segment_size"
285 );
286 assert_eq!(
287 make_transmit(&[0u8; 10], Some(5)).effective_segment_size(),
288 Some(5),
289 "segment_size < content_len should yield effective segment_size"
290 );
291 }
292
293 fn make_transmit(contents: &[u8], segment_size: Option<usize>) -> Transmit<'_> {
294 Transmit {
295 destination: SocketAddr::from((Ipv4Addr::UNSPECIFIED, 1)),
296 ecn: None,
297 contents,
298 segment_size,
299 src_ip: None,
300 }
301 }
302}