From 6ef8a013b8cde94b6229b16e5e418696f981c885 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 22 Jan 2026 10:25:19 -0500 Subject: [PATCH] ephemeral: Fix UKI detection to look inside kernel version subdirectories UKI files are located at /usr/lib/modules//.efi, not directly in /usr/lib/modules/. Update the kernel/initramfs detection loop to search inside version subdirectories for .efi files. This enables ephemeral boot for "pure UKI" images that ship only a Unified Kernel Image without separate vmlinuz/initramfs.img files. Note: UKI boot extracts kernel/initramfs using objcopy, which breaks the signature chain. Secure Boot is not supported for ephemeral runs. See docs/todo/ephemeral-uefi.md for future UEFI boot work. Closes: #161 Assisted-by: OpenCode (Claude Sonnet 4) Signed-off-by: Colin Walters --- .../fixtures/Dockerfile.uki-only | 55 ++ .../src/tests/run_ephemeral.rs | 135 +++++ crates/kit/src/kernel.rs | 497 ++++++++++++++++++ crates/kit/src/lib.rs | 1 + crates/kit/src/main.rs | 1 + crates/kit/src/qemu.rs | 9 +- crates/kit/src/run_ephemeral.rs | 86 +-- docs/todo/ephemeral-uefi.md | 195 +++++++ 8 files changed, 936 insertions(+), 43 deletions(-) create mode 100644 crates/integration-tests/fixtures/Dockerfile.uki-only create mode 100644 crates/kit/src/kernel.rs create mode 100644 docs/todo/ephemeral-uefi.md diff --git a/crates/integration-tests/fixtures/Dockerfile.uki-only b/crates/integration-tests/fixtures/Dockerfile.uki-only new file mode 100644 index 00000000..7fe8e4f6 --- /dev/null +++ b/crates/integration-tests/fixtures/Dockerfile.uki-only @@ -0,0 +1,55 @@ +# Test fixture: Bootc image with UKI-only boot (no separate vmlinuz/initramfs) +# +# This creates a "pure UKI" image for testing bcvk ephemeral boot compatibility +# with images that only ship a Unified Kernel Image, not separate kernel/initramfs files. +# +# The UKI is placed in /boot/EFI/Linux/ which is the standard bootc location +# (per Boot Loader Specification). This matches how real bootc sealed images work. +# +# Usage: +# podman build -f Dockerfile.uki-only -t bcvk-test-uki-only . +# +# Note: This requires ukify and systemd-boot packages in the base image. + +ARG BASE_IMAGE=ghcr.io/bootc-dev/dev-bootc:fedora-43-uki + +FROM ${BASE_IMAGE} AS builder + +# Install ukify if not present (should be in fedora-43-uki) +RUN command -v ukify || dnf install -y systemd-ukify + +# Build UKI and place it in the standard /boot/EFI/Linux/ location +RUN < Result<()> { } integration_test!(test_run_ephemeral_instancetype_invalid); +/// Test that ephemeral VMs can boot from UKI-only images (no separate vmlinuz/initramfs) +/// +/// This tests compatibility with bootc images that only ship a Unified Kernel Image, +/// verifying that bcvk can extract kernel/initramfs from the UKI using objcopy. +fn test_run_ephemeral_uki_only() -> Result<()> { + let base_image = get_test_image(); + let uki_image = "bcvk-test-uki-only:latest"; + + // Build the UKI-only test image from the fixture Dockerfile + let fixture_path = + std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("fixtures/Dockerfile.uki-only"); + + debug!( + "Building UKI-only test image from {} using base {}", + fixture_path.display(), + base_image + ); + + let build_output = Command::new("podman") + .args([ + "build", + "-f", + fixture_path.to_str().unwrap(), + "-t", + uki_image, + "--build-arg", + &format!("BASE_IMAGE={}", base_image), + fixture_path.parent().unwrap().to_str().unwrap(), + ]) + .output() + .expect("Failed to run podman build"); + + assert!( + build_output.status.success(), + "Failed to build UKI-only test image: {}", + String::from_utf8_lossy(&build_output.stderr) + ); + + // Verify the image has a UKI in /boot/EFI/Linux/ and no vmlinuz + let verify_output = Command::new("podman") + .args([ + "run", + "--rm", + uki_image, + "sh", + "-c", + "ls /usr/lib/modules/*/vmlinuz 2>/dev/null && echo HAS_VMLINUZ || echo NO_VMLINUZ; ls /boot/EFI/Linux/*.efi 2>/dev/null && echo HAS_UKI || echo NO_UKI", + ]) + .output() + .expect("Failed to verify image contents"); + + let verify_stdout = String::from_utf8_lossy(&verify_output.stdout); + debug!("Image verification: {}", verify_stdout); + assert!( + verify_stdout.contains("NO_VMLINUZ"), + "UKI-only image should not have vmlinuz: {}", + verify_stdout + ); + assert!( + verify_stdout.contains("HAS_UKI"), + "UKI-only image should have a UKI in /boot/EFI/Linux/: {}", + verify_stdout + ); + + // Run ephemeral VM from UKI-only image + let output = run_bcvk(&[ + "ephemeral", + "run", + "--rm", + "--label", + INTEGRATION_TEST_LABEL, + "--execute", + "echo UKI_BOOT_SUCCESS", + uki_image, + ])?; + + output.assert_success("ephemeral run with UKI-only image"); + assert!( + output.stdout.contains("UKI_BOOT_SUCCESS"), + "UKI boot should output success message: {}", + output.stdout + ); + + // Cleanup the test image + let _ = Command::new("podman") + .args(["rmi", "-f", uki_image]) + .output(); + + Ok(()) +} +integration_test!(test_run_ephemeral_uki_only); + +/// Test ephemeral boot with the CentOS 10 UKI image +/// +/// This tests a real-world UKI image that may have both UKI and traditional +/// kernel files, verifying that bcvk correctly prefers the UKI. +fn test_run_ephemeral_centos_uki() -> Result<()> { + const CENTOS_UKI_IMAGE: &str = "ghcr.io/bootc-dev/dev-bootc:centos-10-uki"; + + debug!("Testing ephemeral boot with {}", CENTOS_UKI_IMAGE); + + // Pull the image first (it's not in the standard test image set) + let pull_output = Command::new("podman") + .args(["pull", "-q", CENTOS_UKI_IMAGE]) + .output() + .expect("Failed to run podman pull"); + + assert!( + pull_output.status.success(), + "Failed to pull CentOS UKI image: {}", + String::from_utf8_lossy(&pull_output.stderr) + ); + + let output = run_bcvk(&[ + "ephemeral", + "run", + "--rm", + "--label", + INTEGRATION_TEST_LABEL, + "--execute", + "echo CENTOS_UKI_BOOT_SUCCESS && cat /etc/os-release | grep -E '^(ID|VERSION_ID)='", + CENTOS_UKI_IMAGE, + ])?; + + output.assert_success("ephemeral run with CentOS 10 UKI image"); + assert!( + output.stdout.contains("CENTOS_UKI_BOOT_SUCCESS"), + "CentOS UKI boot should output success message: {}", + output.stdout + ); + + Ok(()) +} +integration_test!(test_run_ephemeral_centos_uki); + /// Test that ephemeral VMs have the expected mount layout: /// - / is read-only virtiofs /// - /etc is overlayfs with tmpfs upper (writable) diff --git a/crates/kit/src/kernel.rs b/crates/kit/src/kernel.rs new file mode 100644 index 00000000..5804c8b9 --- /dev/null +++ b/crates/kit/src/kernel.rs @@ -0,0 +1,497 @@ +//! Kernel detection for container images. +//! +//! This module provides functionality to detect kernel and initramfs in container +//! images, supporting both traditional kernels (with separate vmlinuz/initrd) and +//! Unified Kernel Images (UKI). + +use std::path::Path; + +use camino::{Utf8Path, Utf8PathBuf}; +use cap_std_ext::cap_std::fs::Dir; +use cap_std_ext::dirext::CapStdExtDirExt; +use color_eyre::eyre::{bail, Context, Result}; + +/// The EFI Linux directory where UKIs are stored (relative to /boot) +const EFI_LINUX: &str = "EFI/Linux"; + +/// The modules directory (relative to /usr/lib) +const MODULES_DIR: &str = "modules"; + +/// UKI file extension +const UKI_EXTENSION: &str = "efi"; + +/// Traditional kernel filename +const VMLINUZ: &str = "vmlinuz"; + +/// Traditional initramfs filename +const INITRAMFS: &str = "initramfs.img"; + +/// Information about a kernel found in a container image. +#[derive(Debug, Clone)] +pub struct KernelInfo { + /// Path to the kernel (vmlinuz or UKI .efi file) + pub kernel_path: Utf8PathBuf, + /// Path to the initramfs (only for traditional kernels, None for UKI) + pub initramfs_path: Option, + /// Whether this is a Unified Kernel Image + pub is_uki: bool, +} + +/// Find kernel/initramfs in a container image root directory. +/// +/// UKIs take precedence over traditional kernels. This handles older images +/// that may have both a UKI and vmlinuz+initramfs. +/// +/// Search order: +/// 1. `/boot/EFI/Linux/*.efi` - UKI in ESP +/// 2. `/usr/lib/modules//*.efi` - UKI alongside modules +/// 3. `/usr/lib/modules//vmlinuz` + `initramfs.img` - traditional +/// +/// Returns an error if multiple UKIs are found, or if no UKI exists and +/// multiple traditional kernels are found. +/// Returns `None` if no kernel is found. +pub fn find_kernel(root: &Dir) -> Result> { + // First, collect all UKIs + let mut ukis: Vec = Vec::new(); + ukis.extend(find_ukis_in_esp(root)?); + ukis.extend(find_ukis_in_modules(root)?); + + // If we have UKIs, require exactly one + if !ukis.is_empty() { + return match ukis.len() { + 1 => Ok(ukis.into_iter().next()), + n => { + let paths: Vec<_> = ukis.iter().map(|k| k.kernel_path.as_str()).collect(); + bail!( + "Found {n} UKIs, expected exactly one:\n {}", + paths.join("\n ") + ); + } + }; + } + + // No UKIs found, look for traditional kernels + let traditional = find_traditional_kernels_in_modules(root)?; + + match traditional.len() { + 0 => Ok(None), + 1 => Ok(traditional.into_iter().next()), + n => { + let paths: Vec<_> = traditional.iter().map(|k| k.kernel_path.as_str()).collect(); + bail!( + "Found {n} traditional kernels, expected exactly one:\n {}", + paths.join("\n ") + ); + } + } +} + +/// Check if a filename has the UKI extension (.efi) +fn is_uki_file(name: &std::ffi::OsStr) -> bool { + Path::new(name) + .extension() + .is_some_and(|ext| ext == UKI_EXTENSION) +} + +/// Find all UKIs in /boot/EFI/Linux/*.efi +fn find_ukis_in_esp(root: &Dir) -> Result> { + let Some(boot) = root.open_dir_optional("boot")? else { + return Ok(Vec::new()); + }; + let Some(efi_linux) = boot.open_dir_optional(EFI_LINUX)? else { + return Ok(Vec::new()); + }; + + let mut ukis = Vec::new(); + for entry in efi_linux.entries()? { + let entry = entry?; + let name = entry.file_name(); + if is_uki_file(&name) { + if let Some(name_str) = name.to_str() { + ukis.push(KernelInfo { + kernel_path: Utf8PathBuf::from(format!("boot/{EFI_LINUX}/{name_str}")), + initramfs_path: None, + is_uki: true, + }); + } + } + } + + Ok(ukis) +} + +/// Open the modules directory, returning None if it doesn't exist +fn open_modules_dir(root: &Dir) -> Result> { + let Some(usr_lib) = root.open_dir_optional("usr/lib")? else { + return Ok(None); + }; + Ok(usr_lib.open_dir_optional(MODULES_DIR)?) +} + +/// Find all UKIs in /usr/lib/modules//*.efi +fn find_ukis_in_modules(root: &Dir) -> Result> { + let Some(modules) = open_modules_dir(root)? else { + return Ok(Vec::new()); + }; + + let mut ukis = Vec::new(); + + for entry in modules.entries()? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + let Some(version) = entry.file_name().to_str().map(|s| s.to_owned()) else { + continue; + }; + + let version_dir = modules + .open_dir(&version) + .with_context(|| format!("opening modules/{version}"))?; + + for uki_name in find_ukis_in_version_dir(&version_dir)? { + ukis.push(KernelInfo { + kernel_path: Utf8PathBuf::from(format!( + "usr/lib/{MODULES_DIR}/{version}/{uki_name}" + )), + initramfs_path: None, + is_uki: true, + }); + } + } + + Ok(ukis) +} + +/// Find all traditional kernels in /usr/lib/modules// +fn find_traditional_kernels_in_modules(root: &Dir) -> Result> { + let Some(modules) = open_modules_dir(root)? else { + return Ok(Vec::new()); + }; + + let mut kernels = Vec::new(); + + for entry in modules.entries()? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + let Some(version) = entry.file_name().to_str().map(|s| s.to_owned()) else { + continue; + }; + + let version_dir = modules + .open_dir(&version) + .with_context(|| format!("opening modules/{version}"))?; + + if has_traditional_kernel(&version_dir) { + kernels.push(KernelInfo { + kernel_path: Utf8PathBuf::from(format!( + "usr/lib/{MODULES_DIR}/{version}/{VMLINUZ}" + )), + initramfs_path: Some(Utf8PathBuf::from(format!( + "usr/lib/{MODULES_DIR}/{version}/{INITRAMFS}" + ))), + is_uki: false, + }); + } + } + + Ok(kernels) +} + +/// Find all UKI (.efi files) in a kernel version directory +fn find_ukis_in_version_dir(version_dir: &Dir) -> Result> { + let mut ukis = Vec::new(); + for entry in version_dir.entries()? { + let entry = entry?; + let name = entry.file_name(); + if is_uki_file(&name) && entry.file_type()?.is_file() { + if let Some(name_str) = name.to_str() { + ukis.push(name_str.to_owned()); + } + } + } + Ok(ukis) +} + +/// Check if a version directory has a traditional kernel (vmlinuz + initramfs.img) +fn has_traditional_kernel(version_dir: &Dir) -> bool { + version_dir.exists(VMLINUZ) && version_dir.exists(INITRAMFS) +} + +/// Prepend a root path prefix to a KernelInfo's paths +pub fn with_root_prefix(info: KernelInfo, root: &Utf8Path) -> KernelInfo { + KernelInfo { + kernel_path: root.join(&info.kernel_path), + initramfs_path: info.initramfs_path.map(|p| root.join(&p)), + is_uki: info.is_uki, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use cap_std_ext::cap_std; + use cap_std_ext::cap_tempfile; + + #[test] + fn test_find_kernel_none() -> Result<()> { + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority())?; + assert!(find_kernel(&tempdir)?.is_none()); + Ok(()) + } + + #[test] + fn test_find_kernel_traditional() -> Result<()> { + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority())?; + tempdir.create_dir_all("usr/lib/modules/6.12.0-100.fc41.x86_64")?; + tempdir.atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/vmlinuz", + b"fake kernel", + )?; + tempdir.atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/initramfs.img", + b"fake initramfs", + )?; + + let info = find_kernel(&tempdir)?.expect("should find kernel"); + assert!(!info.is_uki); + assert!(info.kernel_path.as_str().contains("vmlinuz")); + assert!(info.initramfs_path.is_some()); + assert!(info + .initramfs_path + .as_ref() + .unwrap() + .as_str() + .contains("initramfs.img")); + Ok(()) + } + + #[test] + fn test_find_kernel_uki_in_esp() -> Result<()> { + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority())?; + tempdir.create_dir_all("boot/EFI/Linux")?; + tempdir.atomic_write("boot/EFI/Linux/fedora-6.12.0.efi", b"fake uki")?; + + let info = find_kernel(&tempdir)?.expect("should find kernel"); + assert!(info.is_uki); + assert!(info.kernel_path.as_str().contains("fedora-6.12.0.efi")); + assert!(info.initramfs_path.is_none()); + Ok(()) + } + + #[test] + fn test_find_kernel_uki_in_modules() -> Result<()> { + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority())?; + tempdir.create_dir_all("usr/lib/modules/6.12.0-100.fc41.x86_64")?; + tempdir.atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/6.12.0-100.fc41.x86_64.efi", + b"fake uki", + )?; + + let info = find_kernel(&tempdir)?.expect("should find kernel"); + assert!(info.is_uki); + assert!(info + .kernel_path + .as_str() + .contains("6.12.0-100.fc41.x86_64.efi")); + assert!(info.initramfs_path.is_none()); + Ok(()) + } + + #[test] + fn test_find_kernel_uki_preferred_over_traditional() -> Result<()> { + // Old images may have both UKI and vmlinuz - UKI should take precedence + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority())?; + + // Traditional kernel in modules + tempdir.create_dir_all("usr/lib/modules/6.12.0-100.fc41.x86_64")?; + tempdir.atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/vmlinuz", + b"fake kernel", + )?; + tempdir.atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/initramfs.img", + b"fake initramfs", + )?; + + // UKI in ESP + tempdir.create_dir_all("boot/EFI/Linux")?; + tempdir.atomic_write("boot/EFI/Linux/fedora-6.12.0.efi", b"fake uki")?; + + // Should find the UKI, ignoring traditional kernel + let info = find_kernel(&tempdir)?.expect("should find kernel"); + assert!(info.is_uki); + assert!(info.kernel_path.as_str().contains("fedora-6.12.0.efi")); + Ok(()) + } + + #[test] + fn test_find_kernel_uki_preferred_in_same_dir() -> Result<()> { + // UKI and traditional in same version dir - UKI takes precedence + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority())?; + tempdir.create_dir_all("usr/lib/modules/6.12.0-100.fc41.x86_64")?; + + // Both UKI and traditional in same version dir + tempdir.atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/vmlinuz", + b"fake kernel", + )?; + tempdir.atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/initramfs.img", + b"fake initramfs", + )?; + tempdir.atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/6.12.0-100.fc41.x86_64.efi", + b"fake uki", + )?; + + // Should find the UKI, ignoring traditional kernel + let info = find_kernel(&tempdir)?.expect("should find kernel"); + assert!(info.is_uki); + assert!(info + .kernel_path + .as_str() + .contains("6.12.0-100.fc41.x86_64.efi")); + Ok(()) + } + + #[test] + fn test_find_kernel_multiple_ukis_in_esp_errors() { + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority()).unwrap(); + tempdir.create_dir_all("boot/EFI/Linux").unwrap(); + tempdir + .atomic_write("boot/EFI/Linux/zzz.efi", b"fake uki") + .unwrap(); + tempdir + .atomic_write("boot/EFI/Linux/aaa.efi", b"fake uki") + .unwrap(); + tempdir + .atomic_write("boot/EFI/Linux/mmm.efi", b"fake uki") + .unwrap(); + + let result = find_kernel(&tempdir); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("Found 3 UKIs")); + } + + #[test] + fn test_find_kernel_multiple_versions_errors() { + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority()).unwrap(); + + // Two different kernel versions + tempdir + .create_dir_all("usr/lib/modules/6.12.0-100.fc41.x86_64") + .unwrap(); + tempdir + .atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/vmlinuz", + b"fake kernel", + ) + .unwrap(); + tempdir + .atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/initramfs.img", + b"fake initramfs", + ) + .unwrap(); + + tempdir + .create_dir_all("usr/lib/modules/6.11.0-50.fc41.x86_64") + .unwrap(); + tempdir + .atomic_write( + "usr/lib/modules/6.11.0-50.fc41.x86_64/vmlinuz", + b"fake kernel", + ) + .unwrap(); + tempdir + .atomic_write( + "usr/lib/modules/6.11.0-50.fc41.x86_64/initramfs.img", + b"fake initramfs", + ) + .unwrap(); + + let result = find_kernel(&tempdir); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("Found 2 traditional kernels")); + } + + #[test] + fn test_find_kernel_multiple_ukis_in_modules_errors() { + let tempdir = cap_tempfile::tempdir(cap_std::ambient_authority()).unwrap(); + + // Two UKIs in different version directories + tempdir + .create_dir_all("usr/lib/modules/6.12.0-100.fc41.x86_64") + .unwrap(); + tempdir + .atomic_write( + "usr/lib/modules/6.12.0-100.fc41.x86_64/6.12.0-100.fc41.x86_64.efi", + b"fake uki", + ) + .unwrap(); + + tempdir + .create_dir_all("usr/lib/modules/6.11.0-50.fc41.x86_64") + .unwrap(); + tempdir + .atomic_write( + "usr/lib/modules/6.11.0-50.fc41.x86_64/6.11.0-50.fc41.x86_64.efi", + b"fake uki", + ) + .unwrap(); + + let result = find_kernel(&tempdir); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("Found 2 UKIs")); + } + + #[test] + fn test_with_root_prefix() { + let info = KernelInfo { + kernel_path: Utf8PathBuf::from("boot/EFI/Linux/test.efi"), + initramfs_path: None, + is_uki: true, + }; + + let prefixed = with_root_prefix(info, Utf8Path::new("/run/source-image")); + assert_eq!( + prefixed.kernel_path.as_str(), + "/run/source-image/boot/EFI/Linux/test.efi" + ); + } + + #[test] + fn test_with_root_prefix_traditional() { + let info = KernelInfo { + kernel_path: Utf8PathBuf::from("usr/lib/modules/6.12.0/vmlinuz"), + initramfs_path: Some(Utf8PathBuf::from("usr/lib/modules/6.12.0/initramfs.img")), + is_uki: false, + }; + + let prefixed = with_root_prefix(info, Utf8Path::new("/run/source-image")); + assert_eq!( + prefixed.kernel_path.as_str(), + "/run/source-image/usr/lib/modules/6.12.0/vmlinuz" + ); + assert_eq!( + prefixed.initramfs_path.as_ref().unwrap().as_str(), + "/run/source-image/usr/lib/modules/6.12.0/initramfs.img" + ); + } + + #[test] + fn test_is_uki_file() { + use std::ffi::OsStr; + assert!(is_uki_file(OsStr::new("kernel.efi"))); + assert!(is_uki_file(OsStr::new("6.12.0-100.fc41.x86_64.efi"))); + assert!(!is_uki_file(OsStr::new("vmlinuz"))); + assert!(!is_uki_file(OsStr::new("initramfs.img"))); + assert!(!is_uki_file(OsStr::new("config"))); + } +} diff --git a/crates/kit/src/lib.rs b/crates/kit/src/lib.rs index 35860d5d..aa9486ba 100644 --- a/crates/kit/src/lib.rs +++ b/crates/kit/src/lib.rs @@ -1,5 +1,6 @@ //! bcvk library - exposes internal modules for testing pub mod cpio; +pub mod kernel; pub mod qemu_img; pub mod xml_utils; diff --git a/crates/kit/src/main.rs b/crates/kit/src/main.rs index 8d5326f1..468acd71 100644 --- a/crates/kit/src/main.rs +++ b/crates/kit/src/main.rs @@ -17,6 +17,7 @@ mod ephemeral; mod images; mod install_options; mod instancetypes; +mod kernel; mod libvirt; mod libvirt_upload_disk; #[allow(dead_code)] diff --git a/crates/kit/src/qemu.rs b/crates/kit/src/qemu.rs index a12207b9..ecb8f89c 100644 --- a/crates/kit/src/qemu.rs +++ b/crates/kit/src/qemu.rs @@ -115,7 +115,14 @@ impl Default for ResourceLimits { #[derive(Debug)] pub enum BootMode { /// Direct kernel boot (fast, testing-focused) - /// Also used for UKI boot after extracting kernel/initramfs from UKI PE sections + /// Also used for UKI boot after extracting kernel/initramfs from UKI PE sections. + /// + /// Note: For UKI images, we extract kernel/initramfs using objcopy rather than + /// booting the UKI directly via OVMF. This allows us to append bcvk units to + /// the initramfs for /etc overlay and /var setup. The tradeoff is that this + /// breaks the UKI signature chain, so Secure Boot is not supported for + /// ephemeral runs. See https://github.com/bootc-dev/bcvk/issues/161 for + /// future work on UEFI boot support. DirectBoot { kernel_path: String, initramfs_path: String, diff --git a/crates/kit/src/run_ephemeral.rs b/crates/kit/src/run_ephemeral.rs index 3ab10cad..1b45f02a 100644 --- a/crates/kit/src/run_ephemeral.rs +++ b/crates/kit/src/run_ephemeral.rs @@ -879,53 +879,49 @@ pub(crate) async fn run_impl(opts: RunEphemeralOpts) -> Result<()> { // Create QEMU mount points fs::create_dir_all("/run/qemu")?; - // Find kernel and initramfs in /usr/lib/modules/ - let modules_dir = Utf8Path::new("/run/source-image/usr/lib/modules"); - let mut uki_file: Option = None; - let mut vmlinuz_path: Option = None; - let mut initramfs_path: Option = None; - - let entries = fs::read_dir(modules_dir) - .with_context(|| format!("Failed to read kernel modules directory at {}. This container image may not be a valid bootc image.", modules_dir))?; + // Find kernel and initramfs using the kernel detection module + let source_root = cap_std_ext::cap_std::fs::Dir::open_ambient_dir( + "/run/source-image", + cap_std_ext::cap_std::ambient_authority(), + ) + .context("opening /run/source-image")?; + + let kernel_info = crate::kernel::find_kernel(&source_root) + .context("searching for kernel")? + .ok_or_else(|| { + eyre!( + "No kernel found. Checked:\n\ + - /boot/EFI/Linux/*.efi (UKI)\n\ + - /usr/lib/modules//.efi (UKI)\n\ + - /usr/lib/modules//vmlinuz + initramfs.img" + ) + })?; - for entry in entries { - let entry = entry?; - let path = Utf8PathBuf::from_path_buf(entry.path()) - .map_err(|p| eyre!("Path is not valid UTF-8: {}", p.display()))?; - - // Check for UKI (.efi file) - if path.is_file() && path.extension() == Some("efi") { - debug!("Found UKI file: {:?}", path); - uki_file = Some(path); - break; - } + // Add the source-image prefix to get absolute paths + let kernel_info = + crate::kernel::with_root_prefix(kernel_info, Utf8Path::new("/run/source-image")); - // Check for traditional kernel in subdirectories - if path.is_dir() { - let vmlinuz = path.join("vmlinuz"); - let initramfs = path.join("initramfs.img"); - if vmlinuz.exists() && initramfs.exists() { - debug!("Found kernel at: {:?}", vmlinuz); - vmlinuz_path = Some(vmlinuz); - initramfs_path = Some(initramfs); - break; - } - } - } + debug!( + "Found kernel: {:?} (UKI: {})", + kernel_info.kernel_path, kernel_info.is_uki + ); let kernel_mount = "/run/qemu/kernel"; let initramfs_mount = "/run/qemu/initramfs"; // Extract from UKI if found, otherwise use traditional kernel - if let Some(uki_path) = uki_file { - debug!("Extracting kernel and initramfs from UKI: {:?}", uki_path); + if kernel_info.is_uki { + debug!( + "Extracting kernel and initramfs from UKI: {:?}", + kernel_info.kernel_path + ); // Extract .linux section (kernel) from UKI Command::new("objcopy") .args([ "--dump-section", &format!(".linux={}", kernel_mount), - uki_path.as_str(), + kernel_info.kernel_path.as_str(), ]) .run() .map_err(|e| eyre!("Failed to extract kernel from UKI: {e}"))?; @@ -936,27 +932,33 @@ pub(crate) async fn run_impl(opts: RunEphemeralOpts) -> Result<()> { .args([ "--dump-section", &format!(".initrd={}", initramfs_mount), - uki_path.as_str(), + kernel_info.kernel_path.as_str(), ]) .run() .map_err(|e| eyre!("Failed to extract initramfs from UKI: {e}"))?; debug!("Extracted initramfs from UKI to {}", initramfs_mount); } else { - let vmlinuz_path = vmlinuz_path - .ok_or_else(|| eyre!("No kernel found in /run/source-image/usr/lib/modules"))?; - let source_initramfs_path = initramfs_path - .ok_or_else(|| eyre!("No initramfs found in /run/source-image/usr/lib/modules"))?; + let source_initramfs_path = kernel_info + .initramfs_path + .as_ref() + .ok_or_else(|| eyre!("Traditional kernel found but no initramfs path"))?; - fs::File::create(&kernel_mount)?; + fs::File::create(kernel_mount)?; // Bind mount kernel (read-only is fine) Command::new("mount") - .args(["--bind", "-o", "ro", vmlinuz_path.as_str(), &kernel_mount]) + .args([ + "--bind", + "-o", + "ro", + kernel_info.kernel_path.as_str(), + kernel_mount, + ]) .run() .map_err(|e| eyre!("Failed to bind mount kernel: {e}"))?; // Copy initramfs so we can append to it - fs::copy(&source_initramfs_path, &initramfs_mount) + fs::copy(source_initramfs_path, initramfs_mount) .map_err(|e| eyre!("Failed to copy initramfs: {e}"))?; } diff --git a/docs/todo/ephemeral-uefi.md b/docs/todo/ephemeral-uefi.md new file mode 100644 index 00000000..a7a41079 --- /dev/null +++ b/docs/todo/ephemeral-uefi.md @@ -0,0 +1,195 @@ +# TODO: UEFI Boot for Ephemeral VMs + +Tracking issue: https://github.com/bootc-dev/bcvk/issues/161 + +## Current State + +Ephemeral VMs currently use direct kernel boot via QEMU's `-kernel` and +`-initrd` options. For UKI-only images, we extract the kernel and initramfs +from the UKI using `objcopy --dump-section`. + +### Where UKIs Live in bootc Images + +UKIs can be in either location: +- `/boot/EFI/Linux/*.efi` - ESP location (Boot Loader Specification) +- `/usr/lib/modules//.efi` - alongside kernel modules + +bcvk checks all locations: +1. `/boot/EFI/Linux/*.efi` - UKI in ESP +2. `/usr/lib/modules//.efi` - UKI alongside modules +3. `/usr/lib/modules//vmlinuz` + `initramfs.img` - traditional + +### Limitations + +This works but has limitations: +- Doesn't exercise the real systemd-boot/UKI boot path +- Breaks the UKI signature chain (no Secure Boot) + +## Phase 1: systemd-boot + UKI Boot + +**Goal**: Support booting ephemeral VMs through systemd-boot + UKI path, matching +more closely the boot process for "full installs". + +### Approach + +Use modern systemd features to inject bcvk's configuration without modifying +the UKI itself: + +1. **`io.systemd.stub.kernel-cmdline-extra`** (SMBIOS credential) + - systemd-stub reads this from SMBIOS Type 11 strings + - We already pass credentials via SMBIOS, so this is a natural fit + - Use this to pass bcvk's kernel command line arguments + +2. **System/Config Extensions** for injecting units + - `*.sysext.raw` or `*.confext.raw` placed on the ESP + - systemd-stub loads these and makes them available to the initrd + - Can contain bcvk's systemd units for /etc overlay, /var setup, etc. + +### Implementation Steps + +1. **Create ESP image dynamically**: + - Build a small FAT32 disk image using `mtools` (no root required) + - Copy the UKI from the container to `/EFI/Linux/.efi` + - Create bcvk confext with our systemd units + +2. **Boot via OVMF**: + - Pass OVMF firmware to QEMU (`-bios` or `-drive if=pflash`) + - Attach ESP as a disk + - systemd-boot auto-discovers and boots the UKI + - Pass the virtiofs mount as a karg, same as we do today + +3. **Pass credentials via SMBIOS**: + - Continue using existing SMBIOS credential mechanism + - Add `io.systemd.stub.kernel-cmdline-extra` for additional cmdline args + +### Requirements + +- systemd >= 254 for robust `kernel-cmdline-extra` support +- OVMF firmware available on the host +- `mtools` for ESP creation (or `mkfs.fat` + loop mount with privileges) + +## Phase 2: Secure Boot Support (Nice to Have) + +**Goal**: Support Secure Boot for ephemeral VMs, maintaining the full trust +chain from firmware through UKI. + +### Key Insight: Upstream the Mount Setup to bootc + +The cleanest path to Secure Boot support is to **not require bcvk-specific +initramfs modifications at all**. The baseline functionality that bcvk +currently injects (e.g., /etc overlay, /var tmpfs setup) should be handled +by bootc's upstream initramfs code, triggered by kernel command line +arguments or systemd credentials. + +This means: +- bootc's initramfs generator includes support for ephemeral/read-only root +- bcvk just passes the right cmdline args via `io.systemd.stub.kernel-cmdline-extra` +- The UKI remains completely unmodified, preserving its signature +- Secure Boot works out of the box + +### What Needs Upstreaming to bootc + +1. **Ephemeral /etc overlay**: Mount /etc as an overlay with tmpfs upper + - Triggered by e.g. `bootc.etc=overlay` or a credential + +2. **Ephemeral /var**: Mount /var as tmpfs instead of persistent storage + - Triggered by e.g. `bootc.var=tmpfs` + +3. **Read-only root awareness**: Handle virtiofs or other read-only root + filesystems gracefully + +Once these are in bootc's initramfs, bcvk ephemeral mode becomes: +1. Boot the UKI via OVMF (no modifications) +2. Pass credentials/cmdline via SMBIOS +3. Done - Secure Boot compatible + +### bcvk-Specific Features (Still Need Injection) + +Some bcvk features may still need addon EFI or confext injection: +- Journal streaming to host (`--log` functionality) +- Execute command services (`--execute`) +- SSH key injection (though credentials may suffice) + +For these, the Phase 1 confext approach works, and signing becomes a +user choice rather than a hard requirement. + +The challenge is that anything we inject this way via systemd-stub +needs signing. + +I think what might work here is for us to locally sign our generated +content, and then inject those signing keys into the firmware trust roots +too. + +## Technical Details + +### systemd-stub Addon Mechanism + +From systemd source (`src/boot/stub.c`), addon files named `*.addon.efi` +placed next to the UKI are loaded as PE binaries: + +```c +// Addon .initrd sections are appended to the base initrd +if (initrd_addons && PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_INITRD)) { + // ... loads .initrd section from addon +} +``` + +Addon EFI binaries can contain: +- `.initrd` section - appended to base initrd (measured into PCR 12) +- `.cmdline` section - appended to kernel command line + +### UKI Location in Container + +bootc images store UKIs at: +``` +/boot/EFI/Linux/.efi +``` + +For composefs sealed images, bootc uses a subdirectory: +``` +/boot/EFI/Linux/bootc/.efi +``` + +### ESP Layout for UEFI Boot + +For Phase 1, bcvk would create a virtual ESP with: +``` +/EFI/ + BOOT/ + BOOTX64.EFI # systemd-boot + Linux/ + .efi # UKI copied from container's /boot/EFI/Linux/ +``` + +For Phase 2 with addons: +``` +/EFI/ + Linux/ + .efi # The UKI + systemd/ + addon/ + bcvk.addon.efi # bcvk addon (signed for Secure Boot) +``` + +Or for confexts: +``` +/loader/ + addons/ + bcvk.confext.raw # Configuration extension with bcvk units +``` + +### SMBIOS Credentials + +systemd-stub reads these SMBIOS Type 11 strings: +- `io.systemd.credential:=` - arbitrary credentials +- `io.systemd.stub.kernel-cmdline-extra=` - extra kernel arguments + +We already use SMBIOS for credentials; extending this is straightforward. + +## References + +- [systemd-stub(7)](https://man7.org/linux/man-pages/man7/systemd-stub.7.html) - UEFI stub documentation +- [systemd-boot(7)](https://man7.org/linux/man-pages/man7/systemd-boot.7.html) - Boot manager +- [systemd-sysext(8)](https://man7.org/linux/man-pages/man8/systemd-sysext.8.html) - System extensions +- [ukify(1)](https://www.freedesktop.org/software/systemd/man/latest/ukify.html) - UKI build tool +- https://github.com/bootc-dev/bootc/issues/1940 - Related bootc issue