io_uring/
types.rs

1//! Common Linux types not provided by libc.
2
3pub(crate) mod sealed {
4    use super::{Fd, Fixed};
5    use std::os::unix::io::RawFd;
6
7    #[derive(Debug)]
8    pub enum Target {
9        Fd(RawFd),
10        Fixed(u32),
11    }
12
13    pub trait UseFd: Sized {
14        fn into(self) -> RawFd;
15    }
16
17    pub trait UseFixed: Sized {
18        fn into(self) -> Target;
19    }
20
21    impl UseFd for Fd {
22        #[inline]
23        fn into(self) -> RawFd {
24            self.0
25        }
26    }
27
28    impl UseFixed for Fd {
29        #[inline]
30        fn into(self) -> Target {
31            Target::Fd(self.0)
32        }
33    }
34
35    impl UseFixed for Fixed {
36        #[inline]
37        fn into(self) -> Target {
38            Target::Fixed(self.0)
39        }
40    }
41}
42
43use crate::sys;
44use crate::util::{cast_ptr, unwrap_nonzero, unwrap_u32};
45use bitflags::bitflags;
46use std::convert::TryFrom;
47use std::marker::PhantomData;
48use std::num::NonZeroU32;
49use std::os::unix::io::RawFd;
50
51pub use sys::__kernel_rwf_t as RwFlags;
52
53/// Opaque types, you should use [`statx`](struct@libc::statx) instead.
54#[repr(C)]
55#[allow(non_camel_case_types)]
56pub struct statx {
57    _priv: (),
58}
59
60/// Opaque types, you should use [`epoll_event`](libc::epoll_event) instead.
61#[repr(C)]
62#[allow(non_camel_case_types)]
63pub struct epoll_event {
64    _priv: (),
65}
66
67/// A file descriptor that has not been registered with io_uring.
68#[derive(Debug, Clone, Copy)]
69#[repr(transparent)]
70pub struct Fd(pub RawFd);
71
72/// A file descriptor that has been registered with io_uring using
73/// [`Submitter::register_files`](crate::Submitter::register_files) or [`Submitter::register_files_sparse`](crate::Submitter::register_files_sparse).
74/// This can reduce overhead compared to using [`Fd`] in some cases.
75#[derive(Debug, Clone, Copy)]
76#[repr(transparent)]
77pub struct Fixed(pub u32);
78
79bitflags! {
80    /// Options for [`Timeout`](super::Timeout).
81    ///
82    /// The default behavior is to treat the timespec as a relative time interval. `flags` may
83    /// contain [`types::TimeoutFlags::ABS`] to indicate the timespec represents an absolute
84    /// time. When an absolute time is being specified, the kernel will use its monotonic clock
85    /// unless one of the following flags is set (they may not both be set):
86    /// [`types::TimeoutFlags::BOOTTIME`] or [`types::TimeoutFlags::REALTIME`].
87    ///
88    /// The default behavior when the timeout expires is to return a CQE with -libc::ETIME in
89    /// the res field. To change this behavior to have zero returned, include
90    /// [`types::TimeoutFlags::ETIME_SUCCESS`].
91    pub struct TimeoutFlags: u32 {
92        const ABS = sys::IORING_TIMEOUT_ABS;
93
94        const BOOTTIME = sys::IORING_TIMEOUT_BOOTTIME;
95
96        const REALTIME = sys::IORING_TIMEOUT_REALTIME;
97
98        const LINK_TIMEOUT_UPDATE = sys::IORING_LINK_TIMEOUT_UPDATE;
99
100        const ETIME_SUCCESS = sys::IORING_TIMEOUT_ETIME_SUCCESS;
101    }
102}
103
104bitflags! {
105    /// Options for [`Fsync`](super::Fsync).
106    pub struct FsyncFlags: u32 {
107        const DATASYNC = sys::IORING_FSYNC_DATASYNC;
108    }
109}
110
111bitflags! {
112    /// Options for [`AsyncCancel`](super::AsyncCancel) and
113    /// [`Submitter::register_sync_cancel`](super::Submitter::register_sync_cancel).
114    pub(crate) struct AsyncCancelFlags: u32 {
115        /// Cancel all requests that match the given criteria, rather
116        /// than just canceling the first one found.
117        ///
118        /// Available since 5.19.
119        const ALL = sys::IORING_ASYNC_CANCEL_ALL;
120
121        /// Match based on the file descriptor used in the original
122        /// request rather than the user_data.
123        ///
124        /// Available since 5.19.
125        const FD = sys::IORING_ASYNC_CANCEL_FD;
126
127        /// Match any request in the ring, regardless of user_data or
128        /// file descriptor.  Can be used to cancel any pending
129        /// request in the ring.
130        ///
131        /// Available since 5.19.
132        const ANY = sys::IORING_ASYNC_CANCEL_ANY;
133
134        /// Match based on the fixed file descriptor used in the original
135        /// request rather than the user_data.
136        ///
137        /// Available since 6.0
138        const FD_FIXED = sys::IORING_ASYNC_CANCEL_FD_FIXED;
139    }
140}
141
142/// Wrapper around `open_how` as used in [the `openat2(2)` system
143/// call](https://man7.org/linux/man-pages/man2/openat2.2.html).
144#[derive(Default, Debug, Clone, Copy)]
145#[repr(transparent)]
146pub struct OpenHow(sys::open_how);
147
148impl OpenHow {
149    pub const fn new() -> Self {
150        OpenHow(sys::open_how {
151            flags: 0,
152            mode: 0,
153            resolve: 0,
154        })
155    }
156
157    pub const fn flags(mut self, flags: u64) -> Self {
158        self.0.flags = flags;
159        self
160    }
161
162    pub const fn mode(mut self, mode: u64) -> Self {
163        self.0.mode = mode;
164        self
165    }
166
167    pub const fn resolve(mut self, resolve: u64) -> Self {
168        self.0.resolve = resolve;
169        self
170    }
171}
172
173#[derive(Default, Debug, Clone, Copy)]
174#[repr(transparent)]
175pub struct Timespec(pub(crate) sys::__kernel_timespec);
176
177impl Timespec {
178    #[inline]
179    pub const fn new() -> Self {
180        Timespec(sys::__kernel_timespec {
181            tv_sec: 0,
182            tv_nsec: 0,
183        })
184    }
185
186    #[inline]
187    pub const fn sec(mut self, sec: u64) -> Self {
188        self.0.tv_sec = sec as _;
189        self
190    }
191
192    #[inline]
193    pub const fn nsec(mut self, nsec: u32) -> Self {
194        self.0.tv_nsec = nsec as _;
195        self
196    }
197}
198
199impl From<std::time::Duration> for Timespec {
200    fn from(value: std::time::Duration) -> Self {
201        Timespec::new()
202            .sec(value.as_secs())
203            .nsec(value.subsec_nanos())
204    }
205}
206
207/// Submit arguments
208///
209/// Note that arguments that exceed their lifetime will fail to compile.
210///
211/// ```compile_fail
212/// use io_uring::types::{ SubmitArgs, Timespec };
213///
214/// let sigmask: libc::sigset_t = unsafe { std::mem::zeroed() };
215///
216/// let mut args = SubmitArgs::new();
217///
218/// {
219///     let ts = Timespec::new();
220///     args = args.timespec(&ts);
221///     args = args.sigmask(&sigmask);
222/// }
223///
224/// drop(args);
225/// ```
226#[derive(Default, Debug, Clone, Copy)]
227pub struct SubmitArgs<'prev: 'now, 'now> {
228    pub(crate) args: sys::io_uring_getevents_arg,
229    prev: PhantomData<&'prev ()>,
230    now: PhantomData<&'now ()>,
231}
232
233impl<'prev, 'now> SubmitArgs<'prev, 'now> {
234    #[inline]
235    pub const fn new() -> SubmitArgs<'static, 'static> {
236        let args = sys::io_uring_getevents_arg {
237            sigmask: 0,
238            sigmask_sz: 0,
239            pad: 0,
240            ts: 0,
241        };
242
243        SubmitArgs {
244            args,
245            prev: PhantomData,
246            now: PhantomData,
247        }
248    }
249
250    #[inline]
251    pub fn sigmask<'new>(mut self, sigmask: &'new libc::sigset_t) -> SubmitArgs<'now, 'new> {
252        self.args.sigmask = cast_ptr(sigmask) as _;
253        self.args.sigmask_sz = std::mem::size_of::<libc::sigset_t>() as _;
254
255        SubmitArgs {
256            args: self.args,
257            prev: self.now,
258            now: PhantomData,
259        }
260    }
261
262    #[inline]
263    pub fn timespec<'new>(mut self, timespec: &'new Timespec) -> SubmitArgs<'now, 'new> {
264        self.args.ts = cast_ptr(timespec) as _;
265
266        SubmitArgs {
267            args: self.args,
268            prev: self.now,
269            now: PhantomData,
270        }
271    }
272}
273
274#[repr(transparent)]
275pub struct BufRingEntry(sys::io_uring_buf);
276
277/// An entry in a buf_ring that allows setting the address, length and buffer id.
278#[allow(clippy::len_without_is_empty)]
279impl BufRingEntry {
280    /// Sets the entry addr.
281    pub fn set_addr(&mut self, addr: u64) {
282        self.0.addr = addr;
283    }
284
285    /// Returns the entry addr.
286    pub fn addr(&self) -> u64 {
287        self.0.addr
288    }
289
290    /// Sets the entry len.
291    pub fn set_len(&mut self, len: u32) {
292        self.0.len = len;
293    }
294
295    /// Returns the entry len.
296    pub fn len(&self) -> u32 {
297        self.0.len
298    }
299
300    /// Sets the entry bid.
301    pub fn set_bid(&mut self, bid: u16) {
302        self.0.bid = bid;
303    }
304
305    /// Returns the entry bid.
306    pub fn bid(&self) -> u16 {
307        self.0.bid
308    }
309
310    /// The offset to the ring's tail field given the ring's base address.
311    ///
312    /// The caller should ensure the ring's base address is aligned with the system's page size,
313    /// per the uring interface requirements.
314    ///
315    /// # Safety
316    ///
317    /// The ptr will be dereferenced in order to determine the address of the resv field,
318    /// so the caller is responsible for passing in a valid pointer. And not just
319    /// a valid pointer type, but also the argument must be the address to the first entry
320    /// of the buf_ring for the resv field to even be considered the tail field of the ring.
321    /// The entry must also be properly initialized.
322    pub unsafe fn tail(ring_base: *const BufRingEntry) -> *const u16 {
323        &(*ring_base).0.resv
324    }
325}
326
327/// A destination slot for sending fixed resources
328/// (e.g. [`opcode::MsgRingSendFd`](crate::opcode::MsgRingSendFd)).
329#[derive(Debug, Clone, Copy)]
330pub struct DestinationSlot {
331    /// Fixed slot as indexed by the kernel (target+1).
332    dest: NonZeroU32,
333}
334
335impl DestinationSlot {
336    // SAFETY: kernel constant, `IORING_FILE_INDEX_ALLOC` is always > 0.
337    const AUTO_ALLOC: NonZeroU32 =
338        unwrap_nonzero(NonZeroU32::new(sys::IORING_FILE_INDEX_ALLOC as u32));
339
340    /// Use an automatically allocated target slot.
341    pub const fn auto_target() -> Self {
342        Self {
343            dest: DestinationSlot::AUTO_ALLOC,
344        }
345    }
346
347    /// Try to use a given target slot.
348    ///
349    /// Valid slots are in the range from `0` to `u32::MAX - 2` inclusive.
350    pub fn try_from_slot_target(target: u32) -> Result<Self, u32> {
351        // SAFETY: kernel constant, `IORING_FILE_INDEX_ALLOC` is always >= 2.
352        const MAX_INDEX: u32 = unwrap_u32(DestinationSlot::AUTO_ALLOC.get().checked_sub(2));
353
354        if target > MAX_INDEX {
355            return Err(target);
356        }
357
358        let kernel_index = target.saturating_add(1);
359        // SAFETY: by construction, always clamped between 1 and IORING_FILE_INDEX_ALLOC-1.
360        debug_assert!(0 < kernel_index && kernel_index < DestinationSlot::AUTO_ALLOC.get());
361        let dest = NonZeroU32::new(kernel_index).unwrap();
362
363        Ok(Self { dest })
364    }
365
366    pub(crate) fn kernel_index_arg(&self) -> u32 {
367        self.dest.get()
368    }
369}
370
371/// Helper structure for parsing the result of a multishot [`opcode::RecvMsg`](crate::opcode::RecvMsg).
372#[derive(Debug)]
373pub struct RecvMsgOut<'buf> {
374    header: sys::io_uring_recvmsg_out,
375    /// The fixed length of the name field, in bytes.
376    ///
377    /// If the incoming name data is larger than this, it gets truncated to this.
378    /// If it is smaller, it gets 0-padded to fill the whole field. In either case,
379    /// this fixed amount of space is reserved in the result buffer.
380    msghdr_name_len: usize,
381
382    name_data: &'buf [u8],
383    control_data: &'buf [u8],
384    payload_data: &'buf [u8],
385}
386
387impl<'buf> RecvMsgOut<'buf> {
388    const DATA_START: usize = std::mem::size_of::<sys::io_uring_recvmsg_out>();
389
390    /// Parse the data buffered upon completion of a `RecvMsg` multishot operation.
391    ///
392    /// `buffer` is the whole buffer previously provided to the ring, while `msghdr`
393    /// is the same content provided as input to the corresponding SQE
394    /// (only `msg_namelen` and `msg_controllen` fields are relevant).
395    #[allow(clippy::result_unit_err)]
396    pub fn parse(buffer: &'buf [u8], msghdr: &libc::msghdr) -> Result<Self, ()> {
397        let msghdr_name_len = usize::try_from(msghdr.msg_namelen).unwrap();
398        let msghdr_control_len = usize::try_from(msghdr.msg_controllen).unwrap();
399
400        if Self::DATA_START
401            .checked_add(msghdr_name_len)
402            .and_then(|acc| acc.checked_add(msghdr_control_len))
403            .map(|header_len| buffer.len() < header_len)
404            .unwrap_or(true)
405        {
406            return Err(());
407        }
408        // SAFETY: buffer (minimum) length is checked here above.
409        let header = unsafe {
410            buffer
411                .as_ptr()
412                .cast::<sys::io_uring_recvmsg_out>()
413                .read_unaligned()
414        };
415
416        // min is used because the header may indicate the true size of the data
417        // while what we received was truncated.
418        let (name_data, control_start) = {
419            let name_start = Self::DATA_START;
420            let name_data_end =
421                name_start + usize::min(usize::try_from(header.namelen).unwrap(), msghdr_name_len);
422            let name_field_end = name_start + msghdr_name_len;
423            (&buffer[name_start..name_data_end], name_field_end)
424        };
425        let (control_data, payload_start) = {
426            let control_data_end = control_start
427                + usize::min(
428                    usize::try_from(header.controllen).unwrap(),
429                    msghdr_control_len,
430                );
431            let control_field_end = control_start + msghdr_control_len;
432            (&buffer[control_start..control_data_end], control_field_end)
433        };
434        let payload_data = {
435            let payload_data_end = payload_start
436                + usize::min(
437                    usize::try_from(header.payloadlen).unwrap(),
438                    buffer.len() - payload_start,
439                );
440            &buffer[payload_start..payload_data_end]
441        };
442
443        Ok(Self {
444            header,
445            msghdr_name_len,
446            name_data,
447            control_data,
448            payload_data,
449        })
450    }
451
452    /// Return the length of the incoming `name` data.
453    ///
454    /// This may be larger than the size of the content returned by
455    /// `name_data()`, if the kernel could not fit all the incoming
456    /// data in the provided buffer size. In that case, name data in
457    /// the result buffer gets truncated.
458    pub fn incoming_name_len(&self) -> u32 {
459        self.header.namelen
460    }
461
462    /// Return whether the incoming name data was larger than the provided limit/buffer.
463    ///
464    /// When `true`, data returned by `name_data()` is truncated and
465    /// incomplete.
466    pub fn is_name_data_truncated(&self) -> bool {
467        self.header.namelen as usize > self.msghdr_name_len
468    }
469
470    /// Message control data, with the same semantics as `msghdr.msg_control`.
471    pub fn name_data(&self) -> &[u8] {
472        self.name_data
473    }
474
475    /// Return the length of the incoming `control` data.
476    ///
477    /// This may be larger than the size of the content returned by
478    /// `control_data()`, if the kernel could not fit all the incoming
479    /// data in the provided buffer size. In that case, control data in
480    /// the result buffer gets truncated.
481    pub fn incoming_control_len(&self) -> u32 {
482        self.header.controllen
483    }
484
485    /// Return whether the incoming control data was larger than the provided limit/buffer.
486    ///
487    /// When `true`, data returned by `control_data()` is truncated and
488    /// incomplete.
489    pub fn is_control_data_truncated(&self) -> bool {
490        (self.header.flags & u32::try_from(libc::MSG_CTRUNC).unwrap()) != 0
491    }
492
493    /// Message control data, with the same semantics as `msghdr.msg_control`.
494    pub fn control_data(&self) -> &[u8] {
495        self.control_data
496    }
497
498    /// Return whether the incoming payload was larger than the provided limit/buffer.
499    ///
500    /// When `true`, data returned by `payload_data()` is truncated and
501    /// incomplete.
502    pub fn is_payload_truncated(&self) -> bool {
503        (self.header.flags & u32::try_from(libc::MSG_TRUNC).unwrap()) != 0
504    }
505
506    /// Message payload, as buffered by the kernel.
507    pub fn payload_data(&self) -> &[u8] {
508        self.payload_data
509    }
510
511    /// Return the length of the incoming `payload` data.
512    ///
513    /// This may be larger than the size of the content returned by
514    /// `payload_data()`, if the kernel could not fit all the incoming
515    /// data in the provided buffer size. In that case, payload data in
516    /// the result buffer gets truncated.
517    pub fn incoming_payload_len(&self) -> u32 {
518        self.header.payloadlen
519    }
520
521    /// Message flags, with the same semantics as `msghdr.msg_flags`.
522    pub fn flags(&self) -> u32 {
523        self.header.flags
524    }
525}
526
527/// [CancelBuilder] constructs match criteria for request cancellation.
528///
529/// The [CancelBuilder] can be used to selectively cancel one or more requests
530/// by user_data, fd, fixed fd, or unconditionally.
531///
532/// ### Examples
533///
534/// ```
535/// use io_uring::types::{CancelBuilder, Fd, Fixed};
536///
537/// // Match all in-flight requests.
538/// CancelBuilder::any();
539///
540/// // Match a single request with user_data = 42.
541/// CancelBuilder::user_data(42);
542///
543/// // Match a single request with fd = 42.
544/// CancelBuilder::fd(Fd(42));
545///
546/// // Match a single request with fixed fd = 42.
547/// CancelBuilder::fd(Fixed(42));
548///
549/// // Match all in-flight requests with user_data = 42.
550/// CancelBuilder::user_data(42).all();
551/// ```
552#[derive(Debug)]
553pub struct CancelBuilder {
554    pub(crate) flags: AsyncCancelFlags,
555    pub(crate) user_data: Option<u64>,
556    pub(crate) fd: Option<sealed::Target>,
557}
558
559impl CancelBuilder {
560    /// Create a new [CancelBuilder] which will match any in-flight request.
561    ///
562    /// This will cancel every in-flight request in the ring.
563    ///
564    /// Async cancellation matching any requests is only available since 5.19.
565    pub const fn any() -> Self {
566        Self {
567            flags: AsyncCancelFlags::ANY,
568            user_data: None,
569            fd: None,
570        }
571    }
572
573    /// Create a new [CancelBuilder] which will match in-flight requests
574    /// with the given `user_data` value.
575    ///
576    /// The first request with the given `user_data` value will be canceled.
577    /// [CancelBuilder::all](#method.all) can be called to instead match every
578    /// request with the provided `user_data` value.
579    pub const fn user_data(user_data: u64) -> Self {
580        Self {
581            flags: AsyncCancelFlags::empty(),
582            user_data: Some(user_data),
583            fd: None,
584        }
585    }
586
587    /// Create a new [CancelBuilder] which will match in-flight requests with
588    /// the given `fd` value.
589    ///
590    /// The first request with the given `fd` value will be canceled. [CancelBuilder::all](#method.all)
591    /// can be called to instead match every request with the provided `fd` value.
592    ///
593    /// FD async cancellation is only available since 5.19.
594    pub fn fd(fd: impl sealed::UseFixed) -> Self {
595        let mut flags = AsyncCancelFlags::FD;
596        let target = fd.into();
597        if matches!(target, sealed::Target::Fixed(_)) {
598            flags.insert(AsyncCancelFlags::FD_FIXED);
599        }
600        Self {
601            flags,
602            user_data: None,
603            fd: Some(target),
604        }
605    }
606
607    /// Modify the [CancelBuilder] match criteria to match all in-flight requests
608    /// rather than just the first one.
609    ///
610    /// This has no effect when combined with [CancelBuilder::any](#method.any).
611    ///
612    /// Async cancellation matching all requests is only available since 5.19.
613    pub fn all(mut self) -> Self {
614        self.flags.insert(AsyncCancelFlags::ALL);
615        self
616    }
617
618    pub(crate) fn to_fd(&self) -> i32 {
619        self.fd
620            .as_ref()
621            .map(|target| match *target {
622                sealed::Target::Fd(fd) => fd,
623                sealed::Target::Fixed(idx) => idx as i32,
624            })
625            .unwrap_or(-1)
626    }
627}
628
629/// Wrapper around `futex_waitv` as used in [`futex_waitv` system
630/// call](https://www.kernel.org/doc/html/latest/userspace-api/futex2.html).
631#[derive(Default, Debug, Clone, Copy)]
632#[repr(transparent)]
633pub struct FutexWaitV(sys::futex_waitv);
634
635impl FutexWaitV {
636    pub const fn new() -> Self {
637        Self(sys::futex_waitv {
638            val: 0,
639            uaddr: 0,
640            flags: 0,
641            __reserved: 0,
642        })
643    }
644
645    pub const fn val(mut self, val: u64) -> Self {
646        self.0.val = val;
647        self
648    }
649
650    pub const fn uaddr(mut self, uaddr: u64) -> Self {
651        self.0.uaddr = uaddr;
652        self
653    }
654
655    pub const fn flags(mut self, flags: u32) -> Self {
656        self.0.flags = flags;
657        self
658    }
659}
660
661#[cfg(test)]
662mod tests {
663    use std::time::Duration;
664
665    use crate::types::sealed::Target;
666
667    use super::*;
668
669    #[test]
670    fn timespec_from_duration_converts_correctly() {
671        let duration = Duration::new(2, 500);
672        let timespec = Timespec::from(duration);
673
674        assert_eq!(timespec.0.tv_sec as u64, duration.as_secs());
675        assert_eq!(timespec.0.tv_nsec as u32, duration.subsec_nanos());
676    }
677
678    #[test]
679    fn test_cancel_builder_flags() {
680        let cb = CancelBuilder::any();
681        assert_eq!(cb.flags, AsyncCancelFlags::ANY);
682
683        let mut cb = CancelBuilder::user_data(42);
684        assert_eq!(cb.flags, AsyncCancelFlags::empty());
685        assert_eq!(cb.user_data, Some(42));
686        assert!(cb.fd.is_none());
687        cb = cb.all();
688        assert_eq!(cb.flags, AsyncCancelFlags::ALL);
689
690        let mut cb = CancelBuilder::fd(Fd(42));
691        assert_eq!(cb.flags, AsyncCancelFlags::FD);
692        assert!(matches!(cb.fd, Some(Target::Fd(42))));
693        assert!(cb.user_data.is_none());
694        cb = cb.all();
695        assert_eq!(cb.flags, AsyncCancelFlags::FD | AsyncCancelFlags::ALL);
696
697        let mut cb = CancelBuilder::fd(Fixed(42));
698        assert_eq!(cb.flags, AsyncCancelFlags::FD | AsyncCancelFlags::FD_FIXED);
699        assert!(matches!(cb.fd, Some(Target::Fixed(42))));
700        assert!(cb.user_data.is_none());
701        cb = cb.all();
702        assert_eq!(
703            cb.flags,
704            AsyncCancelFlags::FD | AsyncCancelFlags::FD_FIXED | AsyncCancelFlags::ALL
705        );
706    }
707}