// Syd: rock-solid application kernel
// src/kernel/net/recvmsg.rs: recvmsg(2), recvmmsg(2), and recvmmsg_time64 handlers
//
// Copyright (c) 2025 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::{
    borrow::Cow,
    os::{
        fd::{AsRawFd, FromRawFd, OwnedFd, RawFd},
        unix::ffi::OsStrExt,
    },
};

use libc::c_uint;
use libseccomp::ScmpNotifResp;
use nix::{
    errno::Errno,
    sys::{
        socket::{MsgFlags, SockaddrLike, SockaddrStorage},
        time::TimeSpec,
    },
};
use zeroize::Zeroizing;

use crate::{
    compat::{
        cmsg_len_32, cmsg_space_32, cmsghdr, cmsghdr32, iovec32, mmsghdr, mmsghdr32, msghdr,
        msghdr32,
    },
    config::MMAP_MIN_ADDR,
    confine::scmp_arch_bits,
    fs::{get_nonblock, has_recv_timeout, peer_inode},
    hook::UNotifyEventRequest,
    kernel::net::to_msgflags,
    path::XPath,
    sandbox::Flags,
};

#[expect(clippy::cognitive_complexity)]
pub(crate) fn handle_recvmsg(
    fd: OwnedFd,
    request: &UNotifyEventRequest,
    args: &[u64; 6],
    flags: Flags,
) -> Result<ScmpNotifResp, Errno> {
    // SAFETY: Reject undefined/invalid flags.
    let call_flags = to_msgflags(args[2])?;

    // SAFETY: Reject MSG_OOB as necessary.
    if !flags.allow_unsafe_oob() && call_flags.contains(MsgFlags::MSG_OOB) {
        // Signal no support to let the sandbox process
        // handle the error gracefully. This is consistent
        // with the Linux kernel.
        return Err(Errno::EOPNOTSUPP);
    }

    // Bitness of the sandbox process.
    let req = request.scmpreq;
    let is32 = scmp_arch_bits(req.data.arch) == 32;

    // Read user msghdr.
    let hdr_sz = if is32 {
        size_of::<msghdr32>()
    } else {
        size_of::<msghdr>()
    };
    let mut hdr_buf = zeroize::Zeroizing::new(Vec::new());
    hdr_buf.try_reserve(hdr_sz).or(Err(Errno::ENOMEM))?;
    hdr_buf.resize(hdr_sz, 0);
    request.read_mem(&mut hdr_buf, args[1])?;

    let mut hdr: msghdr = if is32 {
        // SAFETY: Unaligned read of POD; size validated above.
        let m32: msghdr32 = unsafe { std::ptr::read_unaligned(hdr_buf.as_ptr() as *const _) };
        msghdr::from(m32)
    } else {
        // SAFETY: Unaligned read of POD; size validated above.
        unsafe { std::ptr::read_unaligned(hdr_buf.as_ptr() as *const _) }
    };

    // Mirror sandbox process iovecs to local, bounded buffers.
    #[expect(clippy::type_complexity)]
    let mut msg_bufs: Vec<(Zeroizing<Vec<u8>>, u64)> = Vec::new();
    let mut msg_iovs: Vec<libc::iovec> = Vec::new();
    let mut nam_buf: Vec<u8> = Vec::new();
    let mut ctl_buf: Vec<u8> = Vec::new();

    // Handle msg_iov.
    let user_iov_base = process_msghdr_iov(request, &mut hdr, &mut msg_bufs, &mut msg_iovs)?;

    // Handle msg_name.
    let (user_nam_base, user_nam_size) = process_msghdr_name(&mut hdr, &mut nam_buf)?;

    // Handle msg_control.
    let (user_ctl_base, user_ctl_size) = process_msghdr_ctl(is32, &mut hdr, &mut ctl_buf)?;

    // Track blocking call for invalidation semantics.
    let is_blocking = !call_flags.contains(MsgFlags::MSG_DONTWAIT) && !get_nonblock(&fd)?;
    let ignore_restart = if is_blocking {
        has_recv_timeout(&fd)?
    } else {
        false
    };
    if is_blocking {
        request.cache.add_sys_block(req, ignore_restart)?;
    }

    // SAFETY: Perform recvmsg(2).
    let result = Errno::result(unsafe {
        libc::recvmsg(
            fd.as_raw_fd(),
            &raw mut hdr as *mut libc::msghdr,
            call_flags.bits(),
        )
    });

    if is_blocking {
        request
            .cache
            .del_sys_block(req.id, matches!(result, Err(Errno::EINTR)))?;
    }

    // Check result after critical block.
    #[expect(clippy::cast_sign_loss)]
    let r_bytes = result? as usize;

    // Replace msg_iov pointer with the sandbox process pointer.
    hdr.msg_iov = user_iov_base as *mut libc::iovec;

    // Scatter payload back into the sandbox process.
    if r_bytes > 0 {
        let mut remaining = r_bytes;
        for (buf, ptr) in &msg_bufs {
            if remaining == 0 {
                break;
            }
            let take = remaining.min(buf.len());
            request.write_mem(&buf[..take], *ptr)?;
            remaining = remaining.checked_sub(take).ok_or(Errno::EOVERFLOW)?;
        }
    }

    // Handle peer address logic.
    if !hdr.msg_name.is_null() && hdr.msg_namelen > 0 {
        let r_addr = if !hdr.msg_name.is_null() && hdr.msg_namelen > 0 {
            // SAFETY: `hdr` is returned by the host kernel.
            unsafe {
                SockaddrStorage::from_raw(
                    hdr.msg_name as *const libc::sockaddr,
                    Some(hdr.msg_namelen),
                )
            }
        } else {
            None
        };

        // Modify source address if needed.
        if let Some(mut addr) = r_addr {
            let hdr_namelen = if let Some(peer_addr) = addr
                .as_unix_addr()
                .and_then(|u| u.path())
                .map(|p| XPath::from_bytes(p.as_os_str().as_bytes()))
                .filter(|p| p.starts_with(b"./"))
                .map(|p| p.split().1)
                .and_then(|base| request.find_unix_addr(base).ok())
                .and_then(|sa| {
                    // SAFETY: `sa` originates from our bookkeeping; valid UnixAddr.
                    unsafe { SockaddrStorage::from_raw(sa.as_ptr().cast(), Some(sa.len())) }
                }) {
                addr = peer_addr;
                addr.len()
            } else {
                hdr.msg_namelen
            };

            // Write back truncated address.
            // SAFETY: Convert SockaddrStorage to byte slice late, then truncate.
            let bytes = unsafe {
                std::slice::from_raw_parts(addr.as_ptr().cast::<u8>(), addr.len() as usize)
            };
            #[expect(clippy::cast_possible_truncation)]
            let out_len = addr
                .len()
                .min(hdr_namelen)
                .min(user_nam_size as libc::socklen_t);
            // This write may fail if `user_nam_base` is not writable.
            // Therefore, we should handle EFAULT gracefully.
            // `process_msghdr_name` has already validated that
            // this pointer is not below mmap_min_addr.
            match request.write_mem(&bytes[..out_len as usize], user_nam_base) {
                Ok(_) | Err(Errno::EFAULT) => {}
                Err(errno) => return Err(errno),
            }
            hdr.msg_namelen = out_len;
        } else {
            hdr.msg_namelen = 0;
        }
    }

    // Handle SCM_RIGHTS and SCM_CREDENTIALS in the control message for each result.
    let mut out_cmsg = Vec::new();
    if !hdr.msg_control.is_null() && hdr.msg_controllen > 0 {
        let cmsg_len = hdr.msg_controllen;
        // SAFETY: Casting from `*const c_void` to a byte slice.
        let cmsg_buf =
            unsafe { std::slice::from_raw_parts(hdr.msg_control as *const u8, cmsg_len) };
        let cmsgs = parse_cmsgs(cmsg_buf)?;

        let close_on_exec =
            flags.force_cloexec() || call_flags.contains(MsgFlags::MSG_CMSG_CLOEXEC);
        let rand_fd = flags.force_rand_fd();

        for (hdr, data) in cmsgs {
            let ctrl_buf = if hdr.cmsg_level == libc::SOL_SOCKET
                && hdr.cmsg_type == libc::SCM_RIGHTS
            {
                // Handle SCM_RIGHTS logic.
                let numfds = data
                    .len()
                    .checked_div(size_of::<RawFd>())
                    .ok_or(Errno::EINVAL)?;
                let mut newfds = Vec::<u8>::new();
                newfds.try_reserve(data.len()).or(Err(Errno::ENOMEM))?;
                for idx in 0..numfds {
                    let off = idx
                        .checked_mul(size_of::<RawFd>())
                        .ok_or(Errno::EOVERFLOW)?;
                    // SAFETY: within-bounds read of RawFd.
                    #[expect(clippy::cast_ptr_alignment)]
                    let rfd = unsafe { *(data[off..].as_ptr() as *const RawFd) };

                    // SAFETY: rfd returned by kernel is a valid FD.
                    let rfd = unsafe { OwnedFd::from_raw_fd(rfd) };
                    let newfd = request.add_fd(rfd, close_on_exec, rand_fd)?;
                    newfds.extend_from_slice(&newfd.to_ne_bytes());
                }
                Cow::Owned(newfds)
            } else if hdr.cmsg_level == libc::SOL_SOCKET && hdr.cmsg_type == libc::SCM_CREDENTIALS {
                // Handle SCM_CREDENTIALS logic.
                if data.len() != size_of::<libc::ucred>() {
                    return Err(Errno::EINVAL);
                }

                // SAFETY: Bounded read of POD `libc::ucred` from kernel-filled cmsg payload.
                #[expect(clippy::cast_ptr_alignment)]
                let mut uc: libc::ucred = unsafe { *(data.as_ptr() as *const libc::ucred) };

                // Get socket inode, and lookup pid by inode in sandbox unix map.
                if let Some(pid) = peer_inode(&fd)
                    .ok()
                    .and_then(|inode| request.get_unix(inode))
                    .map(|unix| unix.pid.as_raw())
                {
                    uc.pid = pid;
                }

                // Serialize possibly-updated credentials back into a payload buffer.
                let mut creds = Vec::<u8>::new();
                creds.try_reserve(data.len()).or(Err(Errno::ENOMEM))?;

                // SAFETY: `libc::ucred` is POD; transmute to a byte array of equal size.
                let bytes: [u8; size_of::<libc::ucred>()] = unsafe { std::mem::transmute(uc) };
                creds.extend_from_slice(&bytes);

                Cow::Owned(creds)
            } else {
                // Pass-through other control messages without modification.
                Cow::Borrowed(data)
            };

            // Serialize cmsghdr for the target task (32-bit aware).
            #[expect(clippy::cast_possible_truncation)]
            if !is32 {
                // SAFETY: libc macros; sizes computed before writing.
                let hdr_len = unsafe { libc::CMSG_LEN(ctrl_buf.len() as u32) } as usize;
                // SAFETY: ditto.
                let cspace = unsafe { libc::CMSG_SPACE(ctrl_buf.len() as u32) } as usize;
                let hdr = cmsghdr {
                    cmsg_len: hdr_len as libc::size_t,
                    cmsg_level: hdr.cmsg_level,
                    cmsg_type: hdr.cmsg_type,
                };
                // SAFETY: cmsghdr is POD; serialize as bytes.
                let hbytes: [u8; size_of::<cmsghdr>()] = unsafe { std::mem::transmute(hdr) };
                out_cmsg.extend_from_slice(&hbytes);
                out_cmsg.extend_from_slice(&ctrl_buf);
                if cspace > hdr_len {
                    let pad = cspace.checked_sub(hdr_len).ok_or(Errno::EOVERFLOW)?;
                    let start = out_cmsg.len();
                    out_cmsg.try_reserve(pad).or(Err(Errno::ENOMEM))?;
                    out_cmsg.resize(start.checked_add(pad).ok_or(Errno::EOVERFLOW)?, 0);
                }
            } else {
                let hdr_len = cmsg_len_32(ctrl_buf.len() as u32);
                let cspace = cmsg_space_32(ctrl_buf.len() as u32);
                let hdr = cmsghdr32 {
                    cmsg_len: hdr_len as u32,
                    cmsg_level: hdr.cmsg_level,
                    cmsg_type: hdr.cmsg_type,
                };
                // SAFETY: cmsghdr32 is POD; serialize as bytes.
                let hbytes: [u8; size_of::<cmsghdr32>()] = unsafe { std::mem::transmute(hdr) };
                out_cmsg.extend_from_slice(&hbytes);
                out_cmsg.extend_from_slice(&ctrl_buf);
                if cspace > hdr_len {
                    let pad = cspace.checked_sub(hdr_len).ok_or(Errno::EOVERFLOW)?;
                    let start = out_cmsg.len();
                    out_cmsg.try_reserve(pad).or(Err(Errno::ENOMEM))?;
                    out_cmsg.resize(start.checked_add(pad).ok_or(Errno::EOVERFLOW)?, 0);
                }
            }
        }

        // Write the control message back into sandbox process memory.
        let cmsg_len = out_cmsg.len().min(user_ctl_size);
        if out_cmsg.len() > cmsg_len {
            hdr.msg_flags |= MsgFlags::MSG_CTRUNC.bits();
            let truncated = &out_cmsg[..cmsg_len];
            request.write_mem(truncated, user_ctl_base)?;
        } else {
            request.write_mem(&out_cmsg, user_ctl_base)?;
        }

        hdr.msg_control = user_ctl_base as *mut libc::c_void;
        hdr.msg_controllen = cmsg_len;
    } else {
        hdr.msg_controllen = 0;
    }

    // Write-back msghdr handling 32-bit as necessary.
    if is32 {
        let m32: msghdr32 = hdr.into();
        // SAFETY: POD -> bytes conversion.
        let bytes: [u8; size_of::<msghdr32>()] = unsafe { std::mem::transmute(m32) };
        request.write_mem(&bytes, args[1])?;
    } else {
        // SAFETY: POD -> bytes conversion.
        let bytes: [u8; size_of::<msghdr>()] = unsafe { std::mem::transmute(hdr) };
        request.write_mem(&bytes, args[1])?;
    }

    // Return number of payload bytes received.
    #[expect(clippy::cast_possible_wrap)]
    Ok(request.return_syscall(r_bytes as i64))
}

pub(crate) fn handle_recvmmsg(
    fd: OwnedFd,
    request: &UNotifyEventRequest,
    args: &[u64; 6],
    flags: Flags,
) -> Result<ScmpNotifResp, Errno> {
    // Determine if the process is 32-bit or 64-bit.
    let is32 = scmp_arch_bits(request.scmpreq.data.arch) == 32;

    // Read the timespec structure for timeout (32-bit or 64-bit).
    let timeout = if args[4] != 0 {
        if is32 {
            // Read TimeSpec32 if the process is 32-bit.
            Some(request.remote_timespec32(args[4])?)
        } else {
            // Read TimeSpec64 if the process is 64-bit.
            Some(request.remote_timespec64(args[4])?)
        }
    } else {
        None
    };

    // Pass the timeout to the internal function.
    handle_recvmmsg_internal(fd, request, args, flags, timeout)
}

pub(crate) fn handle_recvmmsg64(
    fd: OwnedFd,
    request: &UNotifyEventRequest,
    args: &[u64; 6],
    flags: Flags,
) -> Result<ScmpNotifResp, Errno> {
    // Read the timespec structure for timeout (explicit 64-bit).
    let timeout = if args[4] != 0 {
        Some(request.remote_timespec64(args[4])?)
    } else {
        None
    };

    // Pass the timeout to the internal function.
    handle_recvmmsg_internal(fd, request, args, flags, timeout)
}

// Internal function to handle both `recvmmsg` and `recvmmsg64` syscalls.
#[expect(clippy::cognitive_complexity)]
fn handle_recvmmsg_internal(
    fd: OwnedFd,
    request: &UNotifyEventRequest,
    args: &[u64; 6],
    flags: Flags,
    mut timeout: Option<TimeSpec>,
) -> Result<ScmpNotifResp, Errno> {
    // SAFETY: Reject undefined/invalid flags.
    let call_flags = to_msgflags(args[3])?;

    // SAFETY: Reject MSG_OOB as necessary.
    if !flags.allow_unsafe_oob() && call_flags.contains(MsgFlags::MSG_OOB) {
        // Signal no support to let the sandbox process
        // handle the error gracefully. This is consistent
        // with the Linux kernel.
        return Err(Errno::EOPNOTSUPP);
    }

    // Bitness of the sandbox process.
    let req = request.scmpreq;
    let is32 = scmp_arch_bits(req.data.arch) == 32;

    // Read the user mmsghdr array.
    //
    // 1. Validate message count.
    let msg_count = usize::try_from(args[2]).or(Err(Errno::EINVAL))?;
    if msg_count > 1_000_000 {
        // Invalid message count: Too large.
        return Err(Errno::EMSGSIZE);
    }
    let msgs_offset = args[1];

    // 2. Preallocate memory for mmsghdr array.
    let hdr_sz = if is32 {
        size_of::<mmsghdr32>()
    } else {
        size_of::<mmsghdr>()
    };

    let mut hdr_buf = Zeroizing::new(Vec::new());
    let total_sz = hdr_sz.checked_mul(msg_count).ok_or(Errno::EOVERFLOW)?;
    hdr_buf.try_reserve(total_sz).or(Err(Errno::ENOMEM))?;
    hdr_buf.resize(total_sz, 0);
    request.read_mem(&mut hdr_buf, msgs_offset)?;

    // 3. Convert to native format to pass to recvmmsg(2).
    let mut msgs = Vec::new();
    #[expect(clippy::type_complexity)]
    let mut msg_bufs: Vec<Option<Vec<(Zeroizing<Vec<u8>>, u64)>>> = Vec::new();
    let mut nam_bufs: Vec<Option<Vec<u8>>> = Vec::new();
    let mut ctl_bufs: Vec<Option<Vec<u8>>> = Vec::new();
    let mut msg_iovs: Vec<Vec<libc::iovec>> = Vec::new();
    let mut user_iov_bases: Vec<Option<u64>> = Vec::new();
    let mut user_nam_bases: Vec<Option<(u64, usize)>> = Vec::new();
    let mut user_ctl_bases: Vec<Option<(u64, usize)>> = Vec::new();
    msgs.try_reserve(msg_count).or(Err(Errno::ENOMEM))?;
    msg_bufs.try_reserve(msg_count).or(Err(Errno::ENOMEM))?;
    nam_bufs.try_reserve(msg_count).or(Err(Errno::ENOMEM))?;
    ctl_bufs.try_reserve(msg_count).or(Err(Errno::ENOMEM))?;
    msg_iovs.try_reserve(msg_count).or(Err(Errno::ENOMEM))?;
    user_iov_bases
        .try_reserve(msg_count)
        .or(Err(Errno::ENOMEM))?;
    user_nam_bases
        .try_reserve(msg_count)
        .or(Err(Errno::ENOMEM))?;
    user_ctl_bases
        .try_reserve(msg_count)
        .or(Err(Errno::ENOMEM))?;

    for chunk in hdr_buf.chunks(hdr_sz) {
        let mut hdr: libc::mmsghdr = if is32 {
            // SAFETY: We know that hdr_buf was allocated with enough space to hold mmsghdr32.
            let m32: mmsghdr32 = unsafe { std::ptr::read_unaligned(chunk.as_ptr() as *const _) };
            mmsghdr::from(m32).into()
        } else {
            // SAFETY: We know that hdr_buf was allocated with enough space to hold mmsghdr.
            let m64: mmsghdr = unsafe { std::ptr::read_unaligned(chunk.as_ptr() as *const _) };
            m64.into()
        };

        // Handle hdr.msg_iov.
        process_mmsghdr_iov(
            request,
            &mut hdr,
            &mut msg_bufs,
            &mut msg_iovs,
            &mut user_iov_bases,
        )?;

        // Handle hdr.msg_name.
        process_mmsghdr_name(&mut hdr, &mut nam_bufs, &mut user_nam_bases)?;

        // Handle hdr.msg_control.
        process_mmsghdr_ctl(is32, &mut hdr, &mut ctl_bufs, &mut user_ctl_bases)?;

        msgs.push(hdr);
    }

    let timeout_ptr = timeout
        .as_mut()
        .map_or_else(std::ptr::null_mut, |t| t as *mut _ as *mut libc::timespec);

    // Track blocking call for invalidation semantics.
    let is_blocking = !call_flags.contains(MsgFlags::MSG_DONTWAIT) && !get_nonblock(&fd)?;
    let ignore_restart = if is_blocking {
        timeout.is_some() || has_recv_timeout(&fd)?
    } else {
        false
    };
    if is_blocking {
        request.cache.add_sys_block(req, ignore_restart)?;
    }

    // SAFETY: Perform recvmmsg(2).
    //
    // Flags conversion is necessary on musl.
    #[expect(clippy::useless_conversion)]
    let result = Errno::result(unsafe {
        libc::recvmmsg(
            fd.as_raw_fd(),
            msgs.as_mut_ptr(),
            c_uint::try_from(msg_count).or(Err(Errno::EMSGSIZE))?,
            call_flags.bits().try_into().or(Err(Errno::EINVAL))?,
            timeout_ptr,
        )
    });

    if is_blocking {
        request
            .cache
            .del_sys_block(req.id, matches!(result, Err(Errno::EINTR)))?;
    }

    // Check result after critical block.
    #[expect(clippy::cast_sign_loss)]
    let msg_count = result? as usize;

    // Iterate over the raw `mmsghdr` results.
    for index in 0..msg_count {
        // SAFETY: Access the raw message header.
        let mmsg_hdr = unsafe { &mut *(msgs.as_mut_ptr().add(index)) };

        // Replace msg_iov pointer with the sandbox process pointer.
        if let Some(Some(iov_ptr)) = user_iov_bases.get(index) {
            mmsg_hdr.msg_hdr.msg_iov = (*iov_ptr) as *mut libc::iovec;
        }

        // Scatter payload back into the sandbox process.
        if mmsg_hdr.msg_len > 0 {
            if let Some(Some(bufs)) = msg_bufs.get(index) {
                let mut remaining = mmsg_hdr.msg_len as usize;
                for (buf, ptr) in bufs {
                    if remaining == 0 {
                        break;
                    }
                    let take = remaining.min(buf.len());
                    request.write_mem(&buf[..take], *ptr)?;
                    remaining = remaining.checked_sub(take).ok_or(Errno::EOVERFLOW)?;
                }
            }
        }

        // Handle peer address logic.
        let r_addr = if !mmsg_hdr.msg_hdr.msg_name.is_null() && mmsg_hdr.msg_hdr.msg_namelen > 0 {
            // SAFETY: `mmsg_hdr` is returned by the host kernel.
            unsafe {
                SockaddrStorage::from_raw(
                    mmsg_hdr.msg_hdr.msg_name as *const libc::sockaddr,
                    Some(mmsg_hdr.msg_hdr.msg_namelen),
                )
            }
        } else {
            None
        };

        // Modify source address if needed.
        if let Some(mut addr) = r_addr {
            if let Some(peer_addr) = addr
                .as_unix_addr()
                .and_then(|u| u.path())
                .map(|p| XPath::from_bytes(p.as_os_str().as_bytes()))
                .filter(|p| p.starts_with(b"./"))
                .map(|p| p.split().1)
                .and_then(|base| request.find_unix_addr(base).ok())
                .and_then(|sa| {
                    // SAFETY: `sa` originates from our bookkeeping; valid UnixAddr.
                    unsafe { SockaddrStorage::from_raw(sa.as_ptr().cast(), Some(sa.len())) }
                })
            {
                addr = peer_addr;
            }

            // Write the address back into sandbox process memory.
            if let Some(Some((nam_ptr, nam_len))) = user_nam_bases.get(index) {
                // SAFETY: SockaddrStorage is a POD and we use the correct length.
                let addr_bytes = unsafe {
                    std::slice::from_raw_parts(addr.as_ptr().cast::<u8>(), addr.len() as usize)
                };
                #[expect(clippy::cast_possible_truncation)]
                let out_len = addr
                    .len()
                    .min(mmsg_hdr.msg_hdr.msg_namelen)
                    .min(*nam_len as libc::socklen_t);
                // This write may fail if `nam_ptr` is not writable.
                // Therefore, we should handle EFAULT gracefully.
                // `process_mmsghdr_name` has already validated that
                // this pointer is not below mmap_min_addr.
                match request.write_mem(&addr_bytes[..out_len as usize], *nam_ptr) {
                    Ok(_) | Err(Errno::EFAULT) => {}
                    Err(errno) => return Err(errno),
                }
                mmsg_hdr.msg_hdr.msg_name = (*nam_ptr) as *mut libc::c_void;
                mmsg_hdr.msg_hdr.msg_namelen = out_len;
            } else {
                mmsg_hdr.msg_hdr.msg_namelen = 0;
            }
        }

        // Handle SCM_RIGHTS and SCM_CREDENTIALS in the control message for each result.
        let mut out_cmsg = Vec::new();
        if !mmsg_hdr.msg_hdr.msg_control.is_null() && mmsg_hdr.msg_hdr.msg_controllen > 0 {
            #[expect(clippy::unnecessary_cast)]
            let cmsg_len = mmsg_hdr.msg_hdr.msg_controllen as usize;
            // SAFETY: Casting from `*const c_void` to a byte slice.
            let cmsg_buf = unsafe {
                std::slice::from_raw_parts(mmsg_hdr.msg_hdr.msg_control as *const u8, cmsg_len)
            };
            let cmsgs = parse_cmsgs(cmsg_buf)?;

            let close_on_exec =
                flags.force_cloexec() || call_flags.contains(MsgFlags::MSG_CMSG_CLOEXEC);
            let rand_fd = flags.force_rand_fd();

            for (hdr, data) in cmsgs {
                let ctrl_buf = if hdr.cmsg_level == libc::SOL_SOCKET
                    && hdr.cmsg_type == libc::SCM_RIGHTS
                {
                    // Handle SCM_RIGHTS logic.
                    let numfds = data
                        .len()
                        .checked_div(size_of::<RawFd>())
                        .ok_or(Errno::EINVAL)?;
                    let mut newfds = Vec::<u8>::new();
                    newfds.try_reserve(data.len()).or(Err(Errno::ENOMEM))?;
                    for idx in 0..numfds {
                        let off = idx
                            .checked_mul(size_of::<RawFd>())
                            .ok_or(Errno::EOVERFLOW)?;
                        // SAFETY: within-bounds read of RawFd.
                        #[expect(clippy::cast_ptr_alignment)]
                        let rfd = unsafe { *(data[off..].as_ptr() as *const RawFd) };

                        // SAFETY: rfd returned by kernel is a valid FD.
                        let rfd = unsafe { OwnedFd::from_raw_fd(rfd) };
                        let newfd = request.add_fd(rfd, close_on_exec, rand_fd)?;
                        newfds.extend_from_slice(&newfd.to_ne_bytes());
                    }
                    Cow::Owned(newfds)
                } else if hdr.cmsg_level == libc::SOL_SOCKET
                    && hdr.cmsg_type == libc::SCM_CREDENTIALS
                {
                    // Handle SCM_CREDENTIALS logic.
                    if data.len() != size_of::<libc::ucred>() {
                        return Err(Errno::EINVAL);
                    }

                    // SAFETY: Bounded read of POD `libc::ucred` from kernel-filled cmsg payload.
                    #[expect(clippy::cast_ptr_alignment)]
                    let mut uc: libc::ucred = unsafe { *(data.as_ptr() as *const libc::ucred) };

                    // Get socket inode, and lookup pid by inode in sandbox unix map.
                    if let Some(pid) = peer_inode(&fd)
                        .ok()
                        .and_then(|inode| request.get_unix(inode))
                        .map(|unix| unix.pid.as_raw())
                    {
                        uc.pid = pid;
                    }

                    // Serialize possibly-updated credentials back into a payload buffer.
                    let mut creds = Vec::<u8>::new();
                    creds.try_reserve(data.len()).or(Err(Errno::ENOMEM))?;

                    // SAFETY: `libc::ucred` is POD; transmute to a byte array of equal size.
                    let bytes: [u8; size_of::<libc::ucred>()] = unsafe { std::mem::transmute(uc) };
                    creds.extend_from_slice(&bytes);

                    Cow::Owned(creds)
                } else {
                    // Pass-through other control messages without modification.
                    Cow::Borrowed(data)
                };

                // Serialize cmsghdr for the target task (32-bit aware).
                #[expect(clippy::cast_possible_truncation)]
                if !is32 {
                    // SAFETY: libc macros; sizes computed before writing.
                    let hdr_len = unsafe { libc::CMSG_LEN(ctrl_buf.len() as u32) } as usize;
                    // SAFETY: ditto.
                    let cspace = unsafe { libc::CMSG_SPACE(ctrl_buf.len() as u32) } as usize;
                    let hdr = cmsghdr {
                        cmsg_len: hdr_len as libc::size_t,
                        cmsg_level: hdr.cmsg_level,
                        cmsg_type: hdr.cmsg_type,
                    };
                    // SAFETY: cmsghdr is POD; serialize as bytes.
                    let hbytes: [u8; size_of::<cmsghdr>()] = unsafe { std::mem::transmute(hdr) };
                    out_cmsg.extend_from_slice(&hbytes);
                    out_cmsg.extend_from_slice(&ctrl_buf);
                    if cspace > hdr_len {
                        let pad = cspace.checked_sub(hdr_len).ok_or(Errno::EOVERFLOW)?;
                        let start = out_cmsg.len();
                        out_cmsg.try_reserve(pad).or(Err(Errno::ENOMEM))?;
                        out_cmsg.resize(start.checked_add(pad).ok_or(Errno::EOVERFLOW)?, 0);
                    }
                } else {
                    let hdr_len = cmsg_len_32(ctrl_buf.len() as u32);
                    let cspace = cmsg_space_32(ctrl_buf.len() as u32);
                    let hdr = cmsghdr32 {
                        cmsg_len: hdr_len as u32,
                        cmsg_level: hdr.cmsg_level,
                        cmsg_type: hdr.cmsg_type,
                    };
                    // SAFETY: cmsghdr32 is POD; serialize as bytes.
                    let hbytes: [u8; size_of::<cmsghdr32>()] = unsafe { std::mem::transmute(hdr) };
                    out_cmsg.extend_from_slice(&hbytes);
                    out_cmsg.extend_from_slice(&ctrl_buf);
                    if cspace > hdr_len {
                        let pad = cspace.checked_sub(hdr_len).ok_or(Errno::EOVERFLOW)?;
                        let start = out_cmsg.len();
                        out_cmsg.try_reserve(pad).or(Err(Errno::ENOMEM))?;
                        out_cmsg.resize(start.checked_add(pad).ok_or(Errno::EOVERFLOW)?, 0);
                    }
                }
            }
        }

        // Write the control message back into sandbox process memory.
        #[expect(clippy::disallowed_methods)]
        #[expect(clippy::useless_conversion)]
        if let Some(Some((ctl_ptr, ctl_len))) = user_ctl_bases.get(index) {
            // Handle control message truncation.
            let cmsg_len = out_cmsg.len().min(*ctl_len);
            if out_cmsg.len() > cmsg_len {
                mmsg_hdr.msg_hdr.msg_flags |= MsgFlags::MSG_CTRUNC.bits();
                let truncated = &out_cmsg[..cmsg_len];
                request.write_mem(truncated, *ctl_ptr)?;
            } else {
                request.write_mem(&out_cmsg, *ctl_ptr)?;
            }

            mmsg_hdr.msg_hdr.msg_control = (*ctl_ptr) as *mut libc::c_void;
            // SAFETY: unwrap is for musl compat.
            mmsg_hdr.msg_hdr.msg_controllen = cmsg_len.try_into().unwrap();
        } else {
            mmsg_hdr.msg_hdr.msg_controllen = 0;
        }

        // Write back mmsghdr for each result in the array.
        let m32: mmsghdr32;
        let m64: mmsghdr;
        let msg_header = if is32 {
            m32 = mmsghdr32::from(*mmsg_hdr);
            let ptr = &raw const m32 as *const u8;
            // SAFETY: Writing directly from the raw memory of mmsghdr32.
            unsafe { std::slice::from_raw_parts(ptr, size_of::<mmsghdr32>()) }
        } else {
            m64 = (*mmsg_hdr).into();
            let ptr = &raw const m64 as *const u8;
            // SAFETY: Writing directly from the raw memory of mmsghdr.
            unsafe { std::slice::from_raw_parts(ptr, size_of::<mmsghdr>()) }
        };

        let msg_header_size = msg_header.len() as u64;
        let msg_header_offs = (index as u64)
            .checked_mul(msg_header_size)
            .ok_or(Errno::EOVERFLOW)?;
        let offset = msgs_offset
            .checked_add(msg_header_offs)
            .ok_or(Errno::EOVERFLOW)?;
        request.write_mem(msg_header, offset)?;
    }

    // Return the number of messages received.
    #[expect(clippy::cast_possible_wrap)]
    Ok(request.return_syscall(msg_count as i64))
}

#[expect(clippy::type_complexity)]
fn process_mmsghdr_iov(
    request: &UNotifyEventRequest,
    hdr: &mut libc::mmsghdr,
    msg_bufs: &mut Vec<Option<Vec<(Zeroizing<Vec<u8>>, u64)>>>,
    msg_iovs: &mut Vec<Vec<libc::iovec>>,
    user_iov_bases: &mut Vec<Option<u64>>,
) -> Result<(), Errno> {
    let req = request.scmpreq;
    let is32 = scmp_arch_bits(req.data.arch) == 32;
    let vec_siz = if is32 {
        size_of::<iovec32>()
    } else {
        size_of::<libc::iovec>()
    };

    // Validate msg_iovlen and msg_iov.
    let msg_iov = hdr.msg_hdr.msg_iov as *mut u8;
    #[expect(clippy::unnecessary_cast)]
    let iov_len = hdr.msg_hdr.msg_iovlen as usize;
    // Validate msg_iov is non-null if msg_iovlen > 0.
    if msg_iov.is_null() && iov_len > 0 {
        // Invalid iovec buffer.
        return Err(Errno::EFAULT);
    }
    if iov_len > 0 && (msg_iov as u64) < *MMAP_MIN_ADDR {
        // Invalid iovec buffer.
        return Err(Errno::EFAULT);
    }
    if !msg_iov.is_null() && iov_len > 1024 {
        // Invalid iovec count: Too large or negative.
        return Err(Errno::EMSGSIZE);
    }

    // Handle msg_iov.
    if !msg_iov.is_null() && iov_len > 0 {
        let iov_siz = iov_len.checked_mul(vec_siz).ok_or(Errno::EINVAL)?;
        let mut iov_raw = Vec::new();
        iov_raw.try_reserve(iov_siz).or(Err(Errno::ENOMEM))?;
        iov_raw.resize(iov_siz, 0);
        request.read_mem(&mut iov_raw, msg_iov as u64)?;

        #[expect(clippy::type_complexity)]
        let mut bufs: Vec<(Zeroizing<Vec<u8>>, u64)> = Vec::new();
        let mut iovs: Vec<libc::iovec> = Vec::new();
        bufs.try_reserve(iov_len).or(Err(Errno::ENOMEM))?;
        iovs.try_reserve(iov_len).or(Err(Errno::ENOMEM))?;

        for chunk in iov_raw.chunks(vec_siz) {
            let iov: libc::iovec = if is32 {
                // SAFETY: POD, bounds checked.
                let iov32: iovec32 =
                    unsafe { std::ptr::read_unaligned(chunk.as_ptr() as *const _) };
                iov32.into()
            } else {
                // SAFETY: POD, bounds checked.
                unsafe { std::ptr::read_unaligned(chunk.as_ptr() as *const _) }
            };

            if iov.iov_base.is_null() && iov.iov_len > 0 {
                return Err(Errno::EFAULT);
            }
            if !iov.iov_base.is_null() && (iov.iov_base as u64) < *MMAP_MIN_ADDR {
                return Err(Errno::EFAULT);
            }

            if !iov.iov_base.is_null() && iov.iov_len > 0 {
                // SAFETY: Cap untrusted `iov_len`.
                let len = iov.iov_len.min(1_000_000);
                let mut buf = Vec::new();
                buf.try_reserve(len).or(Err(Errno::ENOMEM))?;
                buf.resize(len, 0);

                let ptr = iov.iov_base as u64;
                let mut buf = Zeroizing::new(buf);
                let iov = libc::iovec {
                    iov_base: buf.as_mut_ptr().cast(),
                    iov_len: buf.len(),
                };
                bufs.push((buf, ptr));
                iovs.push(iov);
            } else {
                let iov = libc::iovec {
                    iov_base: std::ptr::null_mut(),
                    iov_len: 0,
                };
                iovs.push(iov);
            }
        }
        // SAFETY: Conversion is required for musl.
        #[expect(clippy::disallowed_methods)]
        #[expect(clippy::useless_conversion)]
        {
            hdr.msg_hdr.msg_iovlen = iovs.len().try_into().unwrap();
        }
        if hdr.msg_hdr.msg_iovlen > 0 {
            hdr.msg_hdr.msg_iov = iovs.as_mut_ptr();
            msg_iovs.push(iovs); // Keep the reference alive.
        } else {
            hdr.msg_hdr.msg_iov = std::ptr::null_mut();
        }
        msg_bufs.push(Some(bufs));
        user_iov_bases.push(Some(msg_iov as u64));
    } else {
        hdr.msg_hdr.msg_iov = std::ptr::null_mut();
        hdr.msg_hdr.msg_iovlen = 0;
        msg_bufs.push(None);
        user_iov_bases.push(None);
    }

    Ok(())
}

fn process_mmsghdr_name(
    hdr: &mut libc::mmsghdr,
    nam_bufs: &mut Vec<Option<Vec<u8>>>,
    user_nam_bases: &mut Vec<Option<(u64, usize)>>,
) -> Result<(), Errno> {
    if hdr.msg_hdr.msg_name.is_null() && hdr.msg_hdr.msg_namelen > 0 {
        return Err(Errno::EFAULT);
    }
    if !hdr.msg_hdr.msg_name.is_null() && (hdr.msg_hdr.msg_name as u64) < *MMAP_MIN_ADDR {
        return Err(Errno::EFAULT);
    }
    #[expect(clippy::cast_possible_truncation)]
    if !hdr.msg_hdr.msg_name.is_null() && hdr.msg_hdr.msg_namelen > 0 {
        let mut name = Vec::new();
        let len = size_of::<SockaddrStorage>();
        let siz = usize::try_from(hdr.msg_hdr.msg_namelen).or(Err(Errno::EINVAL))?;
        name.try_reserve(len).or(Err(Errno::ENOMEM))?;
        name.resize(len, 0);

        user_nam_bases.push(Some((hdr.msg_hdr.msg_name as u64, siz)));
        hdr.msg_hdr.msg_name = name.as_mut_ptr().cast();
        hdr.msg_hdr.msg_namelen = len as libc::socklen_t;
        nam_bufs.push(Some(name));
    } else {
        hdr.msg_hdr.msg_name = std::ptr::null_mut();
        hdr.msg_hdr.msg_namelen = 0;
        user_nam_bases.push(None);
        nam_bufs.push(None);
    }

    Ok(())
}

fn process_mmsghdr_ctl(
    is32: bool,
    hdr: &mut libc::mmsghdr,
    ctl_bufs: &mut Vec<Option<Vec<u8>>>,
    user_ctl_bases: &mut Vec<Option<(u64, usize)>>,
) -> Result<(), Errno> {
    if hdr.msg_hdr.msg_control.is_null() && hdr.msg_hdr.msg_controllen > 0 {
        return Err(Errno::EFAULT);
    }
    if !hdr.msg_hdr.msg_control.is_null() && (hdr.msg_hdr.msg_control as u64) < *MMAP_MIN_ADDR {
        return Err(Errno::EFAULT);
    }
    #[expect(clippy::cast_possible_truncation)]
    #[expect(clippy::disallowed_methods)]
    #[expect(clippy::unnecessary_cast)]
    #[expect(clippy::useless_conversion)]
    if !hdr.msg_hdr.msg_control.is_null() && hdr.msg_hdr.msg_controllen > 0 {
        let mut ctl = Vec::new();
        // SAFETY: Cap length at 1 mio.
        let mut len = hdr.msg_hdr.msg_controllen.min(1_000_000) as usize;
        if is32 {
            // SAFETY: CMSG_SPACE is always safe.
            len = unsafe { libc::CMSG_SPACE(len as u32) } as usize;
        }

        ctl.try_reserve(len).or(Err(Errno::ENOMEM))?;
        ctl.resize(len, 0);

        #[expect(clippy::unnecessary_cast)]
        user_ctl_bases.push(Some((
            hdr.msg_hdr.msg_control as u64,
            hdr.msg_hdr.msg_controllen as usize,
        )));
        hdr.msg_hdr.msg_control = ctl.as_mut_ptr().cast();
        // SAFETY: unwrap is for musl compat.
        hdr.msg_hdr.msg_controllen = len.try_into().unwrap();
        ctl_bufs.push(Some(ctl));
    } else {
        hdr.msg_hdr.msg_control = std::ptr::null_mut();
        hdr.msg_hdr.msg_controllen = 0;
        ctl_bufs.push(None);
    }

    Ok(())
}

#[expect(clippy::type_complexity)]
fn process_msghdr_iov(
    request: &UNotifyEventRequest,
    hdr: &mut msghdr,
    msg_bufs: &mut Vec<(Zeroizing<Vec<u8>>, u64)>,
    msg_iovs: &mut Vec<libc::iovec>,
) -> Result<u64, Errno> {
    let req = request.scmpreq;
    let is32 = scmp_arch_bits(req.data.arch) == 32;
    let vec_siz = if is32 {
        size_of::<iovec32>()
    } else {
        size_of::<libc::iovec>()
    };

    // Validate msg_iovlen and msg_iov.
    let msg_iov = hdr.msg_iov as *mut u8;
    let iov_len = hdr.msg_iovlen;
    // Validate msg_iov is non-null if msg_iovlen > 0.
    if msg_iov.is_null() && iov_len > 0 {
        // Invalid iovec buffer.
        return Err(Errno::EFAULT);
    }
    if iov_len > 0 && (msg_iov as u64) < *MMAP_MIN_ADDR {
        // Invalid iovec buffer.
        return Err(Errno::EFAULT);
    }
    if !msg_iov.is_null() && iov_len > 1024 {
        // Invalid iovec count: Too large or negative.
        return Err(Errno::EMSGSIZE);
    }

    // Handle msg_iov.
    if !msg_iov.is_null() && iov_len > 0 {
        let iov_siz = iov_len.checked_mul(vec_siz).ok_or(Errno::EINVAL)?;
        let mut iov_raw = Zeroizing::new(Vec::new());
        iov_raw.try_reserve(iov_siz).or(Err(Errno::ENOMEM))?;
        iov_raw.resize(iov_siz, 0);
        request.read_mem(&mut iov_raw, msg_iov as u64)?;

        msg_bufs.try_reserve(iov_len).or(Err(Errno::ENOMEM))?;
        msg_iovs.try_reserve(iov_len).or(Err(Errno::ENOMEM))?;

        for chunk in iov_raw.chunks(vec_siz) {
            let iov: libc::iovec = if is32 {
                // SAFETY: POD, bounds checked.
                let iov32: iovec32 =
                    unsafe { std::ptr::read_unaligned(chunk.as_ptr() as *const _) };
                iov32.into()
            } else {
                // SAFETY: POD, bounds checked.
                unsafe { std::ptr::read_unaligned(chunk.as_ptr() as *const _) }
            };

            if iov.iov_base.is_null() && iov.iov_len > 0 {
                return Err(Errno::EFAULT);
            }
            if !iov.iov_base.is_null() && (iov.iov_base as u64) < *MMAP_MIN_ADDR {
                return Err(Errno::EFAULT);
            }

            if !iov.iov_base.is_null() && iov.iov_len > 0 {
                // SAFETY: Cap untrusted `iov_len`.
                let len = iov.iov_len.min(1_000_000);
                let mut buf = Vec::new();
                buf.try_reserve(len).or(Err(Errno::ENOMEM))?;
                buf.resize(len, 0);

                let ptr = iov.iov_base as u64;
                let mut buf = Zeroizing::new(buf);
                let iov = libc::iovec {
                    iov_base: buf.as_mut_ptr().cast(),
                    iov_len: buf.len(),
                };
                msg_bufs.push((buf, ptr));
                msg_iovs.push(iov);
            } else {
                let iov = libc::iovec {
                    iov_base: std::ptr::null_mut(),
                    iov_len: 0,
                };
                msg_iovs.push(iov);
            }
        }
        hdr.msg_iovlen = msg_iovs.len();
        if hdr.msg_iovlen > 0 {
            hdr.msg_iov = msg_iovs.as_mut_ptr();
        } else {
            hdr.msg_iov = std::ptr::null_mut();
        }
    } else {
        hdr.msg_iov = std::ptr::null_mut();
        hdr.msg_iovlen = 0;
    }

    Ok(msg_iov as u64)
}

fn process_msghdr_name(hdr: &mut msghdr, nam_buf: &mut Vec<u8>) -> Result<(u64, usize), Errno> {
    if hdr.msg_name.is_null() && hdr.msg_namelen > 0 {
        return Err(Errno::EFAULT);
    }
    if !hdr.msg_name.is_null() && (hdr.msg_name as u64) < *MMAP_MIN_ADDR {
        return Err(Errno::EFAULT);
    }
    #[expect(clippy::cast_possible_truncation)]
    if !hdr.msg_name.is_null() && hdr.msg_namelen > 0 {
        let mut name = Vec::new();
        let len = size_of::<SockaddrStorage>();
        let siz = usize::try_from(hdr.msg_namelen).or(Err(Errno::EINVAL))?;
        name.try_reserve(len).or(Err(Errno::ENOMEM))?;
        name.resize(len, 0);

        let base = hdr.msg_name as u64;
        hdr.msg_name = name.as_mut_ptr().cast();
        hdr.msg_namelen = len as libc::socklen_t;
        *nam_buf = name;
        Ok((base, siz))
    } else {
        hdr.msg_name = std::ptr::null_mut();
        hdr.msg_namelen = 0;
        Ok((0, 0))
    }
}

fn process_msghdr_ctl(
    is32: bool,
    hdr: &mut msghdr,
    ctl_buf: &mut Vec<u8>,
) -> Result<(u64, usize), Errno> {
    if hdr.msg_control.is_null() && hdr.msg_controllen > 0 {
        return Err(Errno::EFAULT);
    }
    if !hdr.msg_control.is_null() && (hdr.msg_control as u64) < *MMAP_MIN_ADDR {
        return Err(Errno::EFAULT);
    }
    if !hdr.msg_control.is_null() && hdr.msg_controllen > 0 {
        let mut ctl = Vec::new();
        // SAFETY: Cap length at 1 mio.
        let mut len = hdr.msg_controllen.min(1_000_000);
        #[expect(clippy::cast_possible_truncation)]
        if is32 {
            // SAFETY: CMSG_SPACE is always safe.
            len = unsafe { libc::CMSG_SPACE(len as u32) } as usize;
        }

        ctl.try_reserve(len).or(Err(Errno::ENOMEM))?;
        ctl.resize(len, 0);

        let base = hdr.msg_control as u64;
        let size = hdr.msg_controllen;
        hdr.msg_control = ctl.as_mut_ptr().cast();
        hdr.msg_controllen = len;
        *ctl_buf = ctl;
        Ok((base, size))
    } else {
        hdr.msg_control = std::ptr::null_mut();
        hdr.msg_controllen = 0;
        Ok((0, 0))
    }
}

// Parse native cmsgs vector, return [(header, data),...]
#[expect(clippy::type_complexity)]
fn parse_cmsgs(buf: &[u8]) -> Result<Vec<(cmsghdr, &[u8])>, Errno> {
    let mut cmsgs = Vec::new();
    let mut offset = 0usize;

    #[expect(clippy::arithmetic_side_effects)]
    #[expect(clippy::cast_possible_truncation)]
    while offset < buf.len() {
        // SAFETY: buffer was returned by the host kernel.
        let hdr = unsafe { std::ptr::read_unaligned(buf[offset..].as_ptr() as *const cmsghdr) };
        // SAFETY: CMSG_LEN is always safe.
        let len0 = unsafe { libc::CMSG_LEN(0) } as usize;

        #[expect(clippy::useless_conversion)]
        let data_len: usize = hdr.cmsg_len.try_into().or(Err(Errno::EINVAL))?;
        let data_len = data_len.checked_sub(len0).ok_or(Errno::EINVAL)?;

        let data_off = offset.checked_add(len0).ok_or(Errno::EINVAL)?;
        let data_end = data_off.checked_add(data_len).ok_or(Errno::EINVAL)?;
        if data_end > buf.len() {
            return Err(Errno::EINVAL); // Data goes beyond buffer.
        }
        let data = &buf[data_off..data_end];

        cmsgs.try_reserve(1).or(Err(Errno::ENOMEM))?;
        cmsgs.push((hdr, data));

        // SAFETY: See the comment above.
        offset += unsafe { libc::CMSG_SPACE(data_len as u32) } as usize;
    }

    Ok(cmsgs)
}
