From c7cdc4f86ad05db28ee8a8f59f120336aa28eff6 Mon Sep 17 00:00:00 2001 From: Chaser Huang Date: Sat, 3 Jan 2026 09:22:39 -0500 Subject: [PATCH 1/2] gc: Fix garbage collection implementation (Commit message from Colin Walters, Assisted-by: Opus 4.5) The previous GC implementation was completely broken - it never added any object IDs to the live set and would mark every object for deletion. The code assumed an old composefs structure where non-first-level entries could directly link to object stores. This commit provides a working GC implementation that: - Properly walks named references in `*/refs/` directories to find GC roots - Recursively traverses splitstream named references (stream_refs) to find all transitively reachable objects, handling the two-layer OCI structure where config splitstreams reference layer splitstreams - Supports caller-specified additional GC roots via `--root-images` and `--root-streams` flags, useful for external integrations like bootc - Actually performs deletions (previously was dry-run only) - Cleans up broken symlinks in images/ and streams/ after GC - Uses `--dry-run` flag (conventional) instead of `--force` (inverted) Includes comprehensive test coverage for: - Stream and image GC with various root configurations - Shared objects between multiple streams/images - Named reference traversal with different table vs repo names Signed-off-by: Chaser Huang Signed-off-by: Colin Walters --- crates/cfsctl/src/main.rs | 20 +- crates/composefs/src/repository.rs | 1044 ++++++++++++++++++++++++++-- 2 files changed, 1002 insertions(+), 62 deletions(-) diff --git a/crates/cfsctl/src/main.rs b/crates/cfsctl/src/main.rs index 881b64c3..f9ab225d 100644 --- a/crates/cfsctl/src/main.rs +++ b/crates/cfsctl/src/main.rs @@ -135,7 +135,17 @@ enum Command { name: String, }, /// Perform garbage collection - GC, + GC { + // digest of root images for gc operations + #[clap(long, short = 'i')] + root_images: Vec, + // digest of root streams for gc operations + #[clap(long, short = 's')] + root_streams: Vec, + /// Preview what would be deleted without actually deleting + #[clap(long, short = 'n')] + dry_run: bool, + }, /// Imports a composefs image (unsafe!) ImportImage { reference: String }, /// Commands for dealing with OCI layers @@ -402,8 +412,12 @@ where println!("{}", object.to_id()); } } - Command::GC => { - repo.gc()?; + Command::GC { + root_images, + root_streams, + dry_run, + } => { + repo.gc(&root_images, &root_streams, dry_run)?; } #[cfg(feature = "http")] Command::Fetch { url, name } => { diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index 1006754c..d38e5842 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -5,24 +5,28 @@ //! verification and garbage collection support. use std::{ - collections::HashSet, - ffi::CStr, + collections::{HashMap, HashSet}, + ffi::{CStr, CString, OsStr, OsString}, fs::{canonicalize, File}, io::{Read, Write}, - os::fd::{AsFd, OwnedFd}, + os::{ + fd::{AsFd, OwnedFd}, + unix::ffi::OsStrExt, + }, path::{Path, PathBuf}, sync::Arc, thread::available_parallelism, }; +use log::{debug, info, trace}; use tokio::sync::Semaphore; use anyhow::{bail, ensure, Context, Result}; use once_cell::sync::OnceCell; use rustix::{ fs::{ - flock, linkat, mkdirat, open, openat, readlinkat, statat, syncfs, AtFlags, Dir, FileType, - FlockOperation, Mode, OFlags, CWD, + flock, linkat, mkdirat, open, openat, readlinkat, statat, syncfs, unlinkat, AtFlags, Dir, + FileType, FlockOperation, Mode, OFlags, CWD, }, io::{Errno, Result as ErrnoResult}, }; @@ -93,6 +97,12 @@ impl Drop for Repository { } } +// For Repository::gc_category +enum GCCategoryWalkMode { + RefsOnly, + AllEntries, +} + impl Repository { /// Return the objects directory. pub fn objects_dir(&self) -> ErrnoResult<&OwnedFd> { @@ -780,7 +790,7 @@ impl Repository { Ok(ObjectID::from_object_pathname(link_content.to_bytes())?) } - fn walk_symlinkdir(fd: OwnedFd, objects: &mut HashSet) -> Result<()> { + fn walk_symlinkdir(fd: OwnedFd, entry_digests: &mut HashSet) -> Result<()> { for item in Dir::read_from(&fd)? { let entry = item?; // NB: the underlying filesystem must support returning filetype via direntry @@ -789,12 +799,25 @@ impl Repository { FileType::Directory => { let filename = entry.file_name(); if filename != c"." && filename != c".." { - let dirfd = openat(&fd, filename, OFlags::RDONLY, Mode::empty())?; - Self::walk_symlinkdir(dirfd, objects)?; + let dirfd = openat( + &fd, + filename, + OFlags::RDONLY | OFlags::CLOEXEC, + Mode::empty(), + )?; + Self::walk_symlinkdir(dirfd, entry_digests)?; } } FileType::Symlink => { - objects.insert(Self::read_symlink_hashvalue(&fd, entry.file_name())?); + let link_content = readlinkat(&fd, entry.file_name(), [])?; + let linked_path = Path::new(OsStr::from_bytes(link_content.as_bytes())); + if let Some(entry_name) = linked_path.file_name() { + entry_digests.insert(entry_name.to_os_string()); + } else { + // Does not have a proper file base name (i.e. "..") + // TODO: this case needs to be checked in fsck implementation + continue; + } } _ => { bail!("Unexpected file type encountered"); @@ -816,53 +839,188 @@ impl Repository { ) } - fn gc_category(&self, category: &str) -> Result> { - let mut objects = HashSet::new(); - + // For a GC category (images / streams), return underlying entry digests and + // object IDs for each entry + // Under RefsOnly mode, only entries explicitly referenced in `/refs` + // directory structure would be walked and returned + // Under AllEntries mode, all entires will be returned + // Note that this function assumes all`*/refs/` links link to 1st level entries + // and all 1st level entries link to object store + // TODO: fsck the above noted assumption + fn gc_category( + &self, + category: &str, + mode: GCCategoryWalkMode, + ) -> Result> { let Some(category_fd) = self .openat(category, OFlags::RDONLY | OFlags::DIRECTORY) .filter_errno(Errno::NOENT) - .context("Opening {category} dir in repository")? + .context(format!("Opening {category} dir in repository"))? else { - return Ok(objects); + return Ok(Vec::new()); }; - if let Some(refs) = openat( - &category_fd, - "refs", - OFlags::RDONLY | OFlags::DIRECTORY, - Mode::empty(), - ) - .filter_errno(Errno::NOENT) - .context("Opening {category}/refs dir in repository")? - { - Self::walk_symlinkdir(refs, &mut objects)?; + let mut entry_digests = HashSet::new(); + match mode { + GCCategoryWalkMode::RefsOnly => { + if let Some(refs) = openat( + &category_fd, + "refs", + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .filter_errno(Errno::NOENT) + .context(format!("Opening {category}/refs dir in repository"))? + { + Self::walk_symlinkdir(refs, &mut entry_digests)?; + } + } + GCCategoryWalkMode::AllEntries => { + // All first-level link entries should be directly object references + for item in Dir::read_from(&category_fd)? { + let entry = item?; + let filename = entry.file_name(); + if filename != c"refs" && filename != c"." && filename != c".." { + if entry.file_type() != FileType::Symlink { + bail!("category directory contains non-symlink"); + } + entry_digests.insert(OsString::from(&OsStr::from_bytes( + entry.file_name().to_bytes(), + ))); + } + } + } } - for item in Dir::read_from(&category_fd)? { + let objects = entry_digests + .into_iter() + .map(|entry_fn| { + Ok(( + Self::read_symlink_hashvalue( + &category_fd, + CString::new(entry_fn.as_bytes())?.as_c_str(), + )?, + entry_fn + .to_str() + .context("str conversion fails")? + .to_owned(), + )) + }) + .collect::>()?; + + Ok(objects) + } + + // Remove all broken links from a directory, may operate recursively + fn cleanup_broken_links(fd: &OwnedFd, recursive: bool) -> Result<()> { + for item in Dir::read_from(fd)? { let entry = item?; - let filename = entry.file_name(); - if filename != c"refs" && filename != c"." && filename != c".." { - if entry.file_type() != FileType::Symlink { - bail!("category directory contains non-symlink"); + match entry.file_type() { + FileType::Directory => { + if !recursive { + continue; + } + let filename = entry.file_name(); + if filename != c"." && filename != c".." { + let dirfd = openat( + fd, + filename, + OFlags::RDONLY | OFlags::CLOEXEC, + Mode::empty(), + )?; + Self::cleanup_broken_links(&dirfd, recursive)?; + } } - // TODO: we need to sort this out. the symlink itself might be a sha256 content ID - // (as for splitstreams), not an object/ to be preserved. - continue; - - /* - let mut value = Sha256HashValue::EMPTY; - hex::decode_to_slice(filename.to_bytes(), &mut value)?; + FileType::Symlink => { + let filename = entry.file_name(); + let result = statat(fd, filename, AtFlags::empty()) + .filter_errno(Errno::NOENT) + .context("Testing for broken links")?; + if result.is_none() { + unlinkat(fd, filename, AtFlags::empty()) + .context("Unlinking broken links")?; + } + } - if !objects.contains(&value) { - println!("rm {}/{:?}", category, filename); + _ => { + bail!("Unexpected file type encountered"); } - */ } } + Ok(()) + } - Ok(objects) + // Clean up broken links in a gc category + fn cleanup_gc_category(&self, category: &'static str) -> Result<()> { + let Some(category_fd) = self + .openat(category, OFlags::RDONLY | OFlags::DIRECTORY) + .filter_errno(Errno::NOENT) + .context(format!("Opening {category} dir in repository"))? + else { + return Ok(()); + }; + // Always cleanup first-level first, then the refs + Self::cleanup_broken_links(&category_fd, false) + .context(format!("Cleaning up broken links in {category}/"))?; + let ref_fd = openat( + &category_fd, + "refs", + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .filter_errno(Errno::NOENT) + .context(format!("Opening {category}/refs to clean up broken links"))?; + if let Some(ref dirfd) = ref_fd { + Self::cleanup_broken_links(dirfd, true).context(format!( + "Cleaning up broken links recursively in {category}/refs" + ))?; + } + Ok(()) + } + + // Traverse split streams to resolve all linked objects + fn walk_streams( + &self, + stream_name_map: &HashMap, + stream_name: &str, + walked_streams: &mut HashSet, + objects: &mut HashSet, + ) -> Result<()> { + if walked_streams.contains(stream_name) { + return Ok(()); + } + walked_streams.insert(stream_name.to_owned()); + + let mut split_stream = self.open_stream(stream_name, None, None)?; + // Plain object references, add to live objects set + split_stream.get_object_refs(|id| { + debug!(" with {id:?}"); + objects.insert(id.clone()); + })?; + // Collect all stream names from named references table to be walked next + let streams_to_walk: Vec<_> = split_stream.iter_named_refs().collect(); + // Note that stream name from the named references table is not stream name in repository + // In practice repository name is often table name prefixed with stream types (e.g. oci-config-) + // Here we always match objectID to be absolutely sure + for (stream_name_in_table, stream_object_id) in streams_to_walk { + debug!( + " named reference stream {stream_name_in_table} lives, with {stream_object_id:?}" + ); + objects.insert(stream_object_id.clone()); + if let Some(stream_name_in_repo) = stream_name_map.get(stream_object_id) { + self.walk_streams( + stream_name_map, + stream_name_in_repo, + walked_streams, + objects, + )?; + } else { + // stream is in table but not in repo, the repo is potentially broken, issue a warning + trace!("broken repo: named reference stream {stream_name_in_table} not found as stream in repo"); + } + } + Ok(()) } /// Given an image, return the set of all objects referenced by it. @@ -896,29 +1054,75 @@ impl Repository { /// # Locking /// /// An exclusive lock is held for the duration of this operation. - pub fn gc(&self) -> Result<()> { + /// + /// # Dry run mode + /// + /// When `dry_run` is true, no files are deleted; instead the operation logs + /// what would be removed. Use this to preview GC effects before committing. + pub fn gc>( + &self, + root_image_names: &[S], + root_stream_names: &[S], + dry_run: bool, + ) -> Result<()> { flock(&self.repository, FlockOperation::LockExclusive)?; let mut objects = HashSet::new(); - for ref object in self.gc_category("images")? { - println!("{object:?} lives as an image"); - objects.insert(object.clone()); - objects.extend(self.objects_for_image(&object.to_hex())?); + // All GC root image names, as specified by user + let root_image_name_set: HashSet<_> = root_image_names + .iter() + .map(|s| s.as_ref().to_string()) + .collect(); + // All images stored in repo + let all_images = self.gc_category("images", GCCategoryWalkMode::AllEntries)?; + // All GC root images, including those referenced in images/refs + let root_images: Vec<_> = all_images + .into_iter() + // filter only keeps user specified images + .filter(|(_id, name)| root_image_name_set.contains(name)) + // then add images referenced in images/refs + .chain(self.gc_category("images", GCCategoryWalkMode::RefsOnly)?) + .collect(); + + for ref image in root_images { + debug!("{image:?} lives as an image"); + objects.insert(image.0.clone()); + self.objects_for_image(&image.1)?.iter().for_each(|id| { + debug!(" with {id:?}"); + objects.insert(id.clone()); + }); } - /* TODO - for object in self.gc_category("streams")? { - println!("{object:?} lives as a stream"); - objects.insert(object.clone()); - - let mut split_stream = self.open_stream(&object.to_hex(), None, None)?; - split_stream.get_object_refs(|id| { - println!(" with {id:?}"); - objects.insert(id.clone()); - })?; + // All GC root stream names, as specified by user + let root_stream_name_set: HashSet<_> = root_stream_names + .iter() + .map(|s| s.as_ref().to_string()) + .collect(); + // All streams stored in repo + let all_streams = self.gc_category("streams", GCCategoryWalkMode::AllEntries)?; + // Reverse map of stream object IDs to their names, used for resolving stream names in repo + let stream_name_map: HashMap<_, _> = all_streams.clone().into_iter().collect(); + // All GC root streams, including those referenced in streams/refs + let root_streams: Vec<_> = all_streams + .into_iter() + // filter only keeps user specified streams + .filter(|(_id, name)| root_stream_name_set.contains(name)) + // then add streams referenced in streams/refs + .chain(self.gc_category("streams", GCCategoryWalkMode::RefsOnly)?) + .collect(); + + let mut walked_streams = HashSet::new(); + for stream in root_streams { + debug!("{stream:?} lives as a stream"); + objects.insert(stream.0.clone()); + self.walk_streams( + &stream_name_map, + &stream.1, + &mut walked_streams, + &mut objects, + )?; } - */ for first_byte in 0x0..=0xff { let dirfd = match self.openat( @@ -929,20 +1133,33 @@ impl Repository { Err(Errno::NOENT) => continue, Err(e) => Err(e)?, }; - for item in Dir::new(dirfd)? { + for item in Dir::read_from(&dirfd)? { let entry = item?; let filename = entry.file_name(); if filename != c"." && filename != c".." { let id = ObjectID::from_object_dir_and_basename(first_byte, filename.to_bytes())?; if !objects.contains(&id) { - println!("rm objects/{first_byte:02x}/{filename:?}"); + if dry_run { + info!("would remove: objects/{first_byte:02x}/{filename:?}"); + } else { + info!("rm objects/{first_byte:02x}/{filename:?}"); + unlinkat(&dirfd, filename, AtFlags::empty())?; + } } else { - println!("# objects/{first_byte:02x}/{filename:?} lives"); + debug!("objects/{first_byte:02x}/{filename:?} lives"); } } } } + // Clean up all broken links + if dry_run { + info!("Dry run complete; no files were deleted"); + } else { + info!("Cleaning up broken links"); + self.cleanup_gc_category("images")?; + self.cleanup_gc_category("streams")? + } Ok(flock(&self.repository, FlockOperation::LockShared)?) // XXX: finally { } ? } @@ -951,3 +1168,712 @@ impl Repository { // unimplemented!() // } } + +#[cfg(test)] +mod tests { + use std::vec; + + use super::*; + use crate::fsverity::Sha512HashValue; + use crate::test::tempdir; + use rustix::fs::{statat, CWD}; + use tempfile::TempDir; + + /// Create a test repository in insecure mode (no fs-verity required). + fn create_test_repo(path: &Path) -> Result>> { + mkdirat(CWD, path, Mode::from_raw_mode(0o755))?; + let mut repo = Repository::open_path(CWD, path)?; + repo.set_insecure(true); + Ok(Arc::new(repo)) + } + + /// Generate deterministic test data of a given size. + fn generate_test_data(size: u64, seed: u8) -> Vec { + (0..size) + .map(|i| ((i as u8).wrapping_add(seed)).wrapping_mul(17)) + .collect() + } + + fn read_links_in_repo

(tmp: &TempDir, repo_sub_path: P) -> Result> + where + P: AsRef, + { + let full_path = tmp.path().join("repo").join(repo_sub_path); + match readlinkat(CWD, &full_path, Vec::new()) { + Ok(result) => Ok(Some(PathBuf::from(result.to_str()?))), + Err(rustix::io::Errno::NOENT) => Ok(None), + Err(e) => Err(e.into()), + } + } + + // Does not follow symlinks + fn test_path_exists_in_repo

(tmp: &TempDir, repo_sub_path: P) -> Result + where + P: AsRef, + { + let full_path = tmp.path().join("repo").join(repo_sub_path); + match statat(CWD, &full_path, AtFlags::SYMLINK_NOFOLLOW) { + Ok(_) => Ok(true), + Err(rustix::io::Errno::NOENT) => Ok(false), + Err(e) => Err(e.into()), + } + } + + fn test_object_exists(tmp: &TempDir, obj: &Sha512HashValue) -> Result { + let digest = obj.to_hex(); + let (first_two, remainder) = digest.split_at(2); + test_path_exists_in_repo(tmp, &format!("objects/{first_two}/{remainder}")) + } + + #[test] + fn test_gc_removes_one_stream() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1 = generate_test_data(32 * 1024, 0xAE); + let obj2 = generate_test_data(64 * 1024, 0xEA); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id: Sha512HashValue = compute_verity(&obj2); + + let mut writer = repo.create_stream(0); + writer.write_external(&obj2)?; + let _stream_id = repo.write_stream(writer, "test-stream", None)?; + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + + // Now perform gc + repo.gc::<&str>(&vec![], &vec![], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(!test_object_exists(&tmp, &obj2_id)?); + assert!(!test_path_exists_in_repo(&tmp, "streams/test-stream")?); + Ok(()) + } + + #[test] + fn test_gc_keeps_one_stream() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1 = generate_test_data(32 * 1024, 0xAE); + let obj2 = generate_test_data(64 * 1024, 0xEA); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id: Sha512HashValue = compute_verity(&obj2); + + let mut writer = repo.create_stream(0); + writer.write_external(&obj2)?; + let _stream_id = repo.write_stream(writer, "test-stream", None)?; + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + + // Now perform gc + repo.gc(&vec![], &vec!["test-stream"], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + Ok(()) + } + + #[test] + fn test_gc_keeps_one_stream_from_refs() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1 = generate_test_data(32 * 1024, 0xAE); + let obj2 = generate_test_data(64 * 1024, 0xEA); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id: Sha512HashValue = compute_verity(&obj2); + + let mut writer = repo.create_stream(0); + writer.write_external(&obj2)?; + let _stream_id = repo.write_stream(writer, "test-stream", Some("ref-name"))?; + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + + // Now perform gc + repo.gc::<&str>(&vec![], &vec![], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + Ok(()) + } + + #[test] + fn test_gc_keeps_one_stream_from_two_overlapped() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1 = generate_test_data(32 * 1024, 0xAE); + let obj2 = generate_test_data(64 * 1024, 0xEA); + let obj3 = generate_test_data(64 * 1024, 0xAA); + let obj4 = generate_test_data(64 * 1024, 0xEE); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id: Sha512HashValue = compute_verity(&obj2); + let obj3_id: Sha512HashValue = compute_verity(&obj3); + let obj4_id: Sha512HashValue = compute_verity(&obj4); + + let mut writer1 = repo.create_stream(0); + writer1.write_external(&obj2)?; + writer1.write_external(&obj3)?; + let _stream1_id = repo.write_stream(writer1, "test-stream1", None)?; + + let mut writer2 = repo.create_stream(0); + writer2.write_external(&obj2)?; + writer2.write_external(&obj4)?; + let _stream2_id = repo.write_stream(writer2, "test-stream2", None)?; + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_object_exists(&tmp, &obj3_id)?); + assert!(test_object_exists(&tmp, &obj4_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream2")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + + // Now perform gc + repo.gc(&vec![], &vec!["test-stream1"], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_object_exists(&tmp, &obj3_id)?); + assert!(!test_object_exists(&tmp, &obj4_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(!test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + Ok(()) + } + + #[test] + fn test_gc_keeps_named_references() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1 = generate_test_data(32 * 1024, 0xAE); + let obj2 = generate_test_data(64 * 1024, 0xEA); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id: Sha512HashValue = compute_verity(&obj2); + + let mut writer1 = repo.create_stream(0); + writer1.write_external(&obj2)?; + let stream1_id = repo.write_stream(writer1, "test-stream1", None)?; + + let mut writer2 = repo.create_stream(0); + writer2.add_named_stream_ref("test-stream1", &stream1_id); + let _stream2_id = repo.write_stream(writer2, "test-stream2", None)?; + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream2")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + + // Now perform gc + repo.gc(&vec![], &vec!["test-stream2"], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream2")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + Ok(()) + } + + #[test] + fn test_gc_keeps_named_references_with_different_table_name() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1 = generate_test_data(32 * 1024, 0xAE); + let obj2 = generate_test_data(64 * 1024, 0xEA); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id: Sha512HashValue = compute_verity(&obj2); + + let mut writer1 = repo.create_stream(0); + writer1.write_external(&obj2)?; + let stream1_id = repo.write_stream(writer1, "test-stream1", None)?; + + let mut writer2 = repo.create_stream(0); + writer2.add_named_stream_ref("different-table-name-for-test-stream1", &stream1_id); + let _stream2_id = repo.write_stream(writer2, "test-stream2", None)?; + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream2")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + + // Now perform gc + repo.gc(&vec![], &vec!["test-stream2"], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream2")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + Ok(()) + } + + #[test] + fn test_gc_keeps_one_named_reference_from_two_overlapped() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1 = generate_test_data(32 * 1024, 0xAE); + let obj2 = generate_test_data(64 * 1024, 0xEA); + let obj3 = generate_test_data(64 * 1024, 0xAA); + let obj4 = generate_test_data(64 * 1024, 0xEE); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id: Sha512HashValue = compute_verity(&obj2); + let obj3_id: Sha512HashValue = compute_verity(&obj3); + let obj4_id: Sha512HashValue = compute_verity(&obj4); + + let mut writer = repo.create_stream(0); + writer.write_external(&obj2)?; + let stream1_id = repo.write_stream(writer, "test-stream1", None)?; + + let mut writer = repo.create_stream(0); + writer.write_external(&obj3)?; + let stream2_id = repo.write_stream(writer, "test-stream2", None)?; + + let mut writer = repo.create_stream(0); + writer.write_external(&obj4)?; + let stream3_id = repo.write_stream(writer, "test-stream3", None)?; + + let mut writer = repo.create_stream(0); + writer.add_named_stream_ref("test-stream1", &stream1_id); + writer.add_named_stream_ref("test-stream2", &stream2_id); + let _ref_stream1_id = repo.write_stream(writer, "ref-stream1", None)?; + + let mut writer = repo.create_stream(0); + writer.add_named_stream_ref("test-stream1", &stream1_id); + writer.add_named_stream_ref("test-stream3", &stream3_id); + let _ref_stream2_id = repo.write_stream(writer, "ref-stream2", None)?; + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_object_exists(&tmp, &obj3_id)?); + assert!(test_object_exists(&tmp, &obj4_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream2")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream3")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream3")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/ref-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/ref-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/ref-stream2")?); + let link_target = + read_links_in_repo(&tmp, "streams/ref-stream2")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + + // Now perform gc + repo.gc(&vec![], &vec!["ref-stream1".to_string()], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_object_exists(&tmp, &obj3_id)?); + assert!(!test_object_exists(&tmp, &obj4_id)?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + let link_target = + read_links_in_repo(&tmp, "streams/test-stream2")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(!test_path_exists_in_repo(&tmp, "streams/test-stream3")?); + assert!(test_path_exists_in_repo(&tmp, "streams/ref-stream1")?); + let link_target = + read_links_in_repo(&tmp, "streams/ref-stream1")?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("streams").join(&link_target) + )?); + assert!(!test_path_exists_in_repo(&tmp, "streams/ref-stream2")?); + + Ok(()) + } + + use crate::tree::{FileSystem, Inode, Leaf, LeafContent, RegularFile, Stat}; + + /// Create a default root stat for test filesystems + fn test_root_stat() -> Stat { + Stat { + st_mode: 0o755, + st_uid: 0, + st_gid: 0, + st_mtim_sec: 0, + xattrs: Default::default(), + } + } + + /// Make a test in-memory filesystem that only contains one externally referenced object + fn make_test_fs(obj: &Sha512HashValue, size: u64) -> FileSystem { + let mut fs: FileSystem = FileSystem::new(test_root_stat()); + let inode = Inode::Leaf(std::rc::Rc::new(Leaf { + stat: Stat { + st_mode: 0o644, + st_uid: 0, + st_gid: 0, + st_mtim_sec: 0, + xattrs: Default::default(), + }, + content: LeafContent::Regular(RegularFile::External(obj.clone(), size)), + })); + fs.root.insert(OsStr::new("data"), inode); + fs + } + + #[test] + fn test_gc_removes_one_image() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1_size: u64 = 32 * 1024; + let obj1 = generate_test_data(obj1_size, 0xAE); + let obj2_size: u64 = 64 * 1024; + let obj2 = generate_test_data(obj2_size, 0xEA); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id = repo.ensure_object(&obj2)?; + + let fs = make_test_fs(&obj2_id, obj2_size); + let image1 = fs.commit_image(&repo, None)?; + let image1_path = format!("images/{}", image1.to_hex()); + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, &image1_path)?); + let link_target = read_links_in_repo(&tmp, &image1_path)?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("images").join(&link_target) + )?); + + // Now perform gc + repo.gc::<&str>(&vec![], &vec![], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(!test_object_exists(&tmp, &obj2_id)?); + assert!(!test_path_exists_in_repo(&tmp, &image1_path)?); + Ok(()) + } + + #[test] + fn test_gc_keeps_one_image() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1_size: u64 = 32 * 1024; + let obj1 = generate_test_data(obj1_size, 0xAE); + let obj2_size: u64 = 64 * 1024; + let obj2 = generate_test_data(obj2_size, 0xEA); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id = repo.ensure_object(&obj2)?; + + let fs = make_test_fs(&obj2_id, obj2_size); + let image1 = fs.commit_image(&repo, None)?; + let image1_path = format!("images/{}", image1.to_hex()); + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, &image1_path)?); + let link_target = read_links_in_repo(&tmp, &image1_path)?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("images").join(&link_target) + )?); + + // Now perform gc + repo.gc(&vec![image1.to_hex()], &vec![], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, &image1_path)?); + let link_target = read_links_in_repo(&tmp, &image1_path)?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("images").join(&link_target) + )?); + Ok(()) + } + + #[test] + fn test_gc_keeps_one_image_from_refs() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1_size: u64 = 32 * 1024; + let obj1 = generate_test_data(obj1_size, 0xAE); + let obj2_size: u64 = 64 * 1024; + let obj2 = generate_test_data(obj2_size, 0xEA); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id = repo.ensure_object(&obj2)?; + + let fs = make_test_fs(&obj2_id, obj2_size); + let image1 = fs.commit_image(&repo, Some("ref-name"))?; + let image1_path = format!("images/{}", image1.to_hex()); + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, &image1_path)?); + let link_target = read_links_in_repo(&tmp, &image1_path)?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("images").join(&link_target) + )?); + + // Now perform gc + repo.gc::<&str>(&vec![], &vec![], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_path_exists_in_repo(&tmp, &image1_path)?); + let link_target = read_links_in_repo(&tmp, &image1_path)?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("images").join(&link_target) + )?); + Ok(()) + } + + fn make_test_fs_with_two_files( + obj1: &Sha512HashValue, + size1: u64, + obj2: &Sha512HashValue, + size2: u64, + ) -> FileSystem { + let mut fs = make_test_fs(obj1, size1); + let inode = Inode::Leaf(std::rc::Rc::new(Leaf { + stat: Stat { + st_mode: 0o644, + st_uid: 0, + st_gid: 0, + st_mtim_sec: 0, + xattrs: Default::default(), + }, + content: LeafContent::Regular(RegularFile::External(obj2.clone(), size2)), + })); + fs.root.insert(OsStr::new("extra_data"), inode); + fs + } + + #[test] + fn test_gc_keeps_one_image_from_two_overlapped() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1_size: u64 = 32 * 1024; + let obj1 = generate_test_data(obj1_size, 0xAE); + let obj2_size: u64 = 64 * 1024; + let obj2 = generate_test_data(obj2_size, 0xEA); + let obj3_size: u64 = 64 * 1024; + let obj3 = generate_test_data(obj2_size, 0xAA); + let obj4_size: u64 = 64 * 1024; + let obj4 = generate_test_data(obj2_size, 0xEE); + + let obj1_id = repo.ensure_object(&obj1)?; + let obj2_id = repo.ensure_object(&obj2)?; + let obj3_id = repo.ensure_object(&obj3)?; + let obj4_id = repo.ensure_object(&obj4)?; + + let fs = make_test_fs_with_two_files(&obj2_id, obj2_size, &obj3_id, obj3_size); + let image1 = fs.commit_image(&repo, None)?; + let image1_path = format!("images/{}", image1.to_hex()); + + let fs = make_test_fs_with_two_files(&obj2_id, obj2_size, &obj4_id, obj4_size); + let image2 = fs.commit_image(&repo, None)?; + let image2_path = format!("images/{}", image2.to_hex()); + + repo.sync()?; + + assert!(test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_object_exists(&tmp, &obj3_id)?); + assert!(test_object_exists(&tmp, &obj4_id)?); + assert!(test_path_exists_in_repo(&tmp, &image1_path)?); + let link_target = read_links_in_repo(&tmp, &image1_path)?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("images").join(&link_target) + )?); + assert!(test_path_exists_in_repo(&tmp, &image2_path)?); + let link_target = read_links_in_repo(&tmp, &image2_path)?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("images").join(&link_target) + )?); + + // Now perform gc + repo.gc(&vec![image1.to_hex()], &vec![], false)?; + + assert!(!test_object_exists(&tmp, &obj1_id)?); + assert!(test_object_exists(&tmp, &obj2_id)?); + assert!(test_object_exists(&tmp, &obj3_id)?); + assert!(!test_object_exists(&tmp, &obj4_id)?); + assert!(test_path_exists_in_repo(&tmp, &image1_path)?); + let link_target = read_links_in_repo(&tmp, &image1_path)?.expect("link is not broken"); + assert!(test_path_exists_in_repo( + &tmp, + PathBuf::from("images").join(&link_target) + )?); + assert!(!test_path_exists_in_repo(&tmp, &image2_path)?); + Ok(()) + } +} From 22d27db4beb2edcf0896146d6a32dc5ad8941a0c Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 27 Jan 2026 13:25:38 -0500 Subject: [PATCH 2/2] gc: Return GcResult with statistics, add gc_dry_run() Refactor the GC API: - gc(additional_roots) performs GC, returns GcResult with statistics - gc_dry_run(additional_roots) previews without deleting Key difference: gc_dry_run() only acquires a shared lock since it doesn't modify anything, allowing concurrent reads during preview. Additional roots are looked up in both images and streams by name, useful for external integrations (like bootc) that track roots outside the repository's refs. GcResult includes: - objects_removed: count of unreferenced objects deleted - objects_bytes: total bytes freed - images_pruned/streams_pruned: broken symlinks cleaned up Tests verify GcResult values match expected counts. Assisted-by: Claude (Opus) Signed-off-by: Colin Walters --- crates/cfsctl/src/main.rs | 35 ++-- crates/composefs/src/repository.rs | 286 +++++++++++++++++++---------- 2 files changed, 217 insertions(+), 104 deletions(-) diff --git a/crates/cfsctl/src/main.rs b/crates/cfsctl/src/main.rs index f9ab225d..6c81c0ea 100644 --- a/crates/cfsctl/src/main.rs +++ b/crates/cfsctl/src/main.rs @@ -136,12 +136,9 @@ enum Command { }, /// Perform garbage collection GC { - // digest of root images for gc operations - #[clap(long, short = 'i')] - root_images: Vec, - // digest of root streams for gc operations - #[clap(long, short = 's')] - root_streams: Vec, + /// Additional roots to keep (image or stream names) + #[clap(long, short = 'r')] + root: Vec, /// Preview what would be deleted without actually deleting #[clap(long, short = 'n')] dry_run: bool, @@ -412,12 +409,26 @@ where println!("{}", object.to_id()); } } - Command::GC { - root_images, - root_streams, - dry_run, - } => { - repo.gc(&root_images, &root_streams, dry_run)?; + Command::GC { root, dry_run } => { + let roots: Vec<&str> = root.iter().map(|s| s.as_str()).collect(); + let result = if dry_run { + repo.gc_dry_run(&roots)? + } else { + repo.gc(&roots)? + }; + if dry_run { + println!("Dry run (no files deleted):"); + } + println!( + "Objects: {} removed ({} bytes)", + result.objects_removed, result.objects_bytes + ); + if result.images_pruned > 0 || result.streams_pruned > 0 { + println!( + "Pruned symlinks: {} images, {} streams", + result.images_pruned, result.streams_pruned + ); + } } #[cfg(feature = "http")] Command::Fetch { url, name } => { diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index d38e5842..00e2a319 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -18,7 +18,7 @@ use std::{ thread::available_parallelism, }; -use log::{debug, info, trace}; +use log::{debug, trace}; use tokio::sync::Semaphore; use anyhow::{bail, ensure, Context, Result}; @@ -97,12 +97,27 @@ impl Drop for Repository { } } -// For Repository::gc_category +/// For Repository::gc_category enum GCCategoryWalkMode { RefsOnly, AllEntries, } +/// Statistics from a garbage collection operation. +/// +/// Returned by [`Repository::gc`] to report what was (or would be) removed. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct GcResult { + /// Number of unreferenced objects removed (or that would be removed) + pub objects_removed: u64, + /// Total bytes of object data removed (or that would be removed) + pub objects_bytes: u64, + /// Number of broken symlinks removed in images/ + pub images_pruned: u64, + /// Number of broken symlinks removed in streams/ + pub streams_pruned: u64, +} + impl Repository { /// Return the objects directory. pub fn objects_dir(&self) -> ErrnoResult<&OwnedFd> { @@ -912,7 +927,10 @@ impl Repository { } // Remove all broken links from a directory, may operate recursively - fn cleanup_broken_links(fd: &OwnedFd, recursive: bool) -> Result<()> { + /// Remove broken symlinks from a directory. + /// If `dry_run` is true, counts but does not remove. Returns the count. + fn cleanup_broken_links(fd: &OwnedFd, recursive: bool, dry_run: bool) -> Result { + let mut count = 0; for item in Dir::read_from(fd)? { let entry = item?; match entry.file_type() { @@ -928,7 +946,7 @@ impl Repository { OFlags::RDONLY | OFlags::CLOEXEC, Mode::empty(), )?; - Self::cleanup_broken_links(&dirfd, recursive)?; + count += Self::cleanup_broken_links(&dirfd, recursive, dry_run)?; } } @@ -938,8 +956,11 @@ impl Repository { .filter_errno(Errno::NOENT) .context("Testing for broken links")?; if result.is_none() { - unlinkat(fd, filename, AtFlags::empty()) - .context("Unlinking broken links")?; + count += 1; + if !dry_run { + unlinkat(fd, filename, AtFlags::empty()) + .context("Unlinking broken links")?; + } } } @@ -948,20 +969,20 @@ impl Repository { } } } - Ok(()) + Ok(count) } - // Clean up broken links in a gc category - fn cleanup_gc_category(&self, category: &'static str) -> Result<()> { + /// Clean up broken links in a gc category. Returns count of links removed. + fn cleanup_gc_category(&self, category: &'static str, dry_run: bool) -> Result { let Some(category_fd) = self .openat(category, OFlags::RDONLY | OFlags::DIRECTORY) .filter_errno(Errno::NOENT) .context(format!("Opening {category} dir in repository"))? else { - return Ok(()); + return Ok(0); }; // Always cleanup first-level first, then the refs - Self::cleanup_broken_links(&category_fd, false) + let mut count = Self::cleanup_broken_links(&category_fd, false, dry_run) .context(format!("Cleaning up broken links in {category}/"))?; let ref_fd = openat( &category_fd, @@ -972,11 +993,11 @@ impl Repository { .filter_errno(Errno::NOENT) .context(format!("Opening {category}/refs to clean up broken links"))?; if let Some(ref dirfd) = ref_fd { - Self::cleanup_broken_links(dirfd, true).context(format!( + count += Self::cleanup_broken_links(dirfd, true, dry_run).context(format!( "Cleaning up broken links recursively in {category}/refs" ))?; } - Ok(()) + Ok(count) } // Traverse split streams to resolve all linked objects @@ -1049,81 +1070,90 @@ impl Repository { tokio::task::spawn_blocking(move || self_.sync()).await? } - /// Perform a garbage collection operation. + /// Perform garbage collection, removing unreferenced objects. + /// + /// Objects reachable from `images/refs/` or `streams/refs/` are preserved, + /// plus any `additional_roots` (looked up in both images and streams). + /// Returns statistics about what was removed. /// /// # Locking /// /// An exclusive lock is held for the duration of this operation. + pub fn gc(&self, additional_roots: &[&str]) -> Result { + flock(&self.repository, FlockOperation::LockExclusive)?; + self.gc_impl(additional_roots, false) + } + + /// Preview what garbage collection would remove, without deleting. /// - /// # Dry run mode + /// Returns the same statistics that [`gc`](Self::gc) would return, + /// but no files are actually deleted. /// - /// When `dry_run` is true, no files are deleted; instead the operation logs - /// what would be removed. Use this to preview GC effects before committing. - pub fn gc>( - &self, - root_image_names: &[S], - root_stream_names: &[S], - dry_run: bool, - ) -> Result<()> { - flock(&self.repository, FlockOperation::LockExclusive)?; + /// # Locking + /// + /// A shared lock is held for the duration of this operation (readers + /// are not blocked). + pub fn gc_dry_run(&self, additional_roots: &[&str]) -> Result { + // Shared lock is sufficient since we don't modify anything + flock(&self.repository, FlockOperation::LockShared)?; + self.gc_impl(additional_roots, true) + } - let mut objects = HashSet::new(); + /// Internal GC implementation (lock must already be held). + fn gc_impl(&self, additional_roots: &[&str], dry_run: bool) -> Result { + let mut result = GcResult::default(); + let mut live_objects = HashSet::new(); - // All GC root image names, as specified by user - let root_image_name_set: HashSet<_> = root_image_names - .iter() - .map(|s| s.as_ref().to_string()) - .collect(); - // All images stored in repo + // Build set of additional roots (checked in both images and streams) + let extra_roots: HashSet<_> = additional_roots.iter().map(|s| s.to_string()).collect(); + + // Collect images: those in images/refs plus caller-specified roots let all_images = self.gc_category("images", GCCategoryWalkMode::AllEntries)?; - // All GC root images, including those referenced in images/refs - let root_images: Vec<_> = all_images + let root_images: Vec<_> = self + .gc_category("images", GCCategoryWalkMode::RefsOnly)? .into_iter() - // filter only keeps user specified images - .filter(|(_id, name)| root_image_name_set.contains(name)) - // then add images referenced in images/refs - .chain(self.gc_category("images", GCCategoryWalkMode::RefsOnly)?) + .chain( + all_images + .into_iter() + .filter(|(_, name)| extra_roots.contains(name)), + ) .collect(); for ref image in root_images { debug!("{image:?} lives as an image"); - objects.insert(image.0.clone()); + live_objects.insert(image.0.clone()); self.objects_for_image(&image.1)?.iter().for_each(|id| { debug!(" with {id:?}"); - objects.insert(id.clone()); + live_objects.insert(id.clone()); }); } - // All GC root stream names, as specified by user - let root_stream_name_set: HashSet<_> = root_stream_names - .iter() - .map(|s| s.as_ref().to_string()) - .collect(); - // All streams stored in repo + // Collect all streams for the name map, then filter to roots let all_streams = self.gc_category("streams", GCCategoryWalkMode::AllEntries)?; - // Reverse map of stream object IDs to their names, used for resolving stream names in repo - let stream_name_map: HashMap<_, _> = all_streams.clone().into_iter().collect(); - // All GC root streams, including those referenced in streams/refs - let root_streams: Vec<_> = all_streams + let stream_name_map: HashMap<_, _> = all_streams.iter().cloned().collect(); + let root_streams: Vec<_> = self + .gc_category("streams", GCCategoryWalkMode::RefsOnly)? .into_iter() - // filter only keeps user specified streams - .filter(|(_id, name)| root_stream_name_set.contains(name)) - // then add streams referenced in streams/refs - .chain(self.gc_category("streams", GCCategoryWalkMode::RefsOnly)?) + .chain( + all_streams + .into_iter() + .filter(|(_, name)| extra_roots.contains(name)), + ) .collect(); let mut walked_streams = HashSet::new(); for stream in root_streams { debug!("{stream:?} lives as a stream"); - objects.insert(stream.0.clone()); + live_objects.insert(stream.0.clone()); self.walk_streams( &stream_name_map, &stream.1, &mut walked_streams, - &mut objects, + &mut live_objects, )?; } + // Walk all objects and remove unreferenced ones for first_byte in 0x0..=0xff { let dirfd = match self.openat( &format!("objects/{first_byte:02x}"), @@ -1139,29 +1169,33 @@ impl Repository { if filename != c"." && filename != c".." { let id = ObjectID::from_object_dir_and_basename(first_byte, filename.to_bytes())?; - if !objects.contains(&id) { - if dry_run { - info!("would remove: objects/{first_byte:02x}/{filename:?}"); - } else { - info!("rm objects/{first_byte:02x}/{filename:?}"); + if !live_objects.contains(&id) { + // Get file size before removing + if let Ok(stat) = statat(&dirfd, filename, AtFlags::empty()) { + result.objects_bytes += stat.st_size as u64; + } + result.objects_removed += 1; + + if !dry_run { + debug!("removing: objects/{first_byte:02x}/{filename:?}"); unlinkat(&dirfd, filename, AtFlags::empty())?; } } else { - debug!("objects/{first_byte:02x}/{filename:?} lives"); + trace!("objects/{first_byte:02x}/{filename:?} lives"); } } } } - // Clean up all broken links - if dry_run { - info!("Dry run complete; no files were deleted"); - } else { - info!("Cleaning up broken links"); - self.cleanup_gc_category("images")?; - self.cleanup_gc_category("streams")? - } - Ok(flock(&self.repository, FlockOperation::LockShared)?) // XXX: finally { } ? + // Clean up broken symlinks + result.images_pruned = self.cleanup_gc_category("images", dry_run)?; + result.streams_pruned = self.cleanup_gc_category("streams", dry_run)?; + + // Downgrade to shared lock if we had exclusive (for actual GC) + if !dry_run { + flock(&self.repository, FlockOperation::LockShared)?; + } + Ok(result) } // fn fsck(&self) -> Result<()> { @@ -1252,12 +1286,18 @@ mod tests { PathBuf::from("streams").join(&link_target) )?); - // Now perform gc - repo.gc::<&str>(&vec![], &vec![], false)?; + // Now perform gc - should remove 2 objects (obj1 + obj2) and 1 stream symlink + let result = repo.gc(&[])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(!test_object_exists(&tmp, &obj2_id)?); assert!(!test_path_exists_in_repo(&tmp, "streams/test-stream")?); + + // Verify GcResult: 3 objects removed (obj1, obj2, splitstream), stream symlink pruned + assert_eq!(result.objects_removed, 3); + assert!(result.objects_bytes > 0); + assert_eq!(result.streams_pruned, 1); + assert_eq!(result.images_pruned, 0); Ok(()) } @@ -1288,8 +1328,8 @@ mod tests { PathBuf::from("streams").join(&link_target) )?); - // Now perform gc - repo.gc(&vec![], &vec!["test-stream"], false)?; + // Now perform gc - should remove only obj1, keep obj2 and stream + let result = repo.gc(&["test-stream"])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1300,6 +1340,12 @@ mod tests { &tmp, PathBuf::from("streams").join(&link_target) )?); + + // Verify GcResult: only 1 object removed, no symlinks pruned + assert_eq!(result.objects_removed, 1); + assert!(result.objects_bytes > 0); + assert_eq!(result.streams_pruned, 0); + assert_eq!(result.images_pruned, 0); Ok(()) } @@ -1330,8 +1376,8 @@ mod tests { PathBuf::from("streams").join(&link_target) )?); - // Now perform gc - repo.gc::<&str>(&vec![], &vec![], false)?; + // Now perform gc - stream is kept via ref, only obj1 removed + let result = repo.gc(&[])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1342,6 +1388,12 @@ mod tests { &tmp, PathBuf::from("streams").join(&link_target) )?); + + // Verify GcResult: 1 object removed, no symlinks pruned (stream has ref) + assert_eq!(result.objects_removed, 1); + assert!(result.objects_bytes > 0); + assert_eq!(result.streams_pruned, 0); + assert_eq!(result.images_pruned, 0); Ok(()) } @@ -1391,8 +1443,8 @@ mod tests { PathBuf::from("streams").join(&link_target) )?); - // Now perform gc - repo.gc(&vec![], &vec!["test-stream1"], false)?; + // Now perform gc - keep stream1, remove obj1, obj4, and stream2 + let result = repo.gc(&["test-stream1"])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1406,6 +1458,12 @@ mod tests { PathBuf::from("streams").join(&link_target) )?); assert!(!test_path_exists_in_repo(&tmp, "streams/test-stream2")?); + + // Verify GcResult: 3 objects removed (obj1, obj4, stream2's splitstream), 1 stream pruned + assert_eq!(result.objects_removed, 3); + assert!(result.objects_bytes > 0); + assert_eq!(result.streams_pruned, 1); + assert_eq!(result.images_pruned, 0); Ok(()) } @@ -1447,8 +1505,8 @@ mod tests { PathBuf::from("streams").join(&link_target) )?); - // Now perform gc - repo.gc(&vec![], &vec!["test-stream2"], false)?; + // Now perform gc - stream2 refs stream1, both kept, only obj1 removed + let result = repo.gc(&["test-stream2"])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1466,6 +1524,12 @@ mod tests { &tmp, PathBuf::from("streams").join(&link_target) )?); + + // Verify GcResult: 1 object removed, no symlinks pruned + assert_eq!(result.objects_removed, 1); + assert!(result.objects_bytes > 0); + assert_eq!(result.streams_pruned, 0); + assert_eq!(result.images_pruned, 0); Ok(()) } @@ -1507,8 +1571,8 @@ mod tests { PathBuf::from("streams").join(&link_target) )?); - // Now perform gc - repo.gc(&vec![], &vec!["test-stream2"], false)?; + // Now perform gc - different table name, but same object ID links them + let result = repo.gc(&["test-stream2"])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1526,6 +1590,12 @@ mod tests { &tmp, PathBuf::from("streams").join(&link_target) )?); + + // Verify GcResult: 1 object removed, no symlinks pruned + assert_eq!(result.objects_removed, 1); + assert!(result.objects_bytes > 0); + assert_eq!(result.streams_pruned, 0); + assert_eq!(result.images_pruned, 0); Ok(()) } @@ -1608,8 +1678,8 @@ mod tests { PathBuf::from("streams").join(&link_target) )?); - // Now perform gc - repo.gc(&vec![], &vec!["ref-stream1".to_string()], false)?; + // Now perform gc - ref-stream1 refs stream1+stream2, so keep those and their objects + let result = repo.gc(&["ref-stream1"])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1639,6 +1709,12 @@ mod tests { )?); assert!(!test_path_exists_in_repo(&tmp, "streams/ref-stream2")?); + // Verify GcResult: objects removed include obj1, obj4, plus splitstreams for stream3 and ref-stream2 + assert_eq!(result.objects_removed, 4); + assert!(result.objects_bytes > 0); + assert_eq!(result.streams_pruned, 2); + assert_eq!(result.images_pruned, 0); + Ok(()) } @@ -1700,12 +1776,18 @@ mod tests { PathBuf::from("images").join(&link_target) )?); - // Now perform gc - repo.gc::<&str>(&vec![], &vec![], false)?; + // Now perform gc - no refs, so image and both objects removed + let result = repo.gc(&[])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(!test_object_exists(&tmp, &obj2_id)?); assert!(!test_path_exists_in_repo(&tmp, &image1_path)?); + + // Verify GcResult: 3 objects removed (obj1, obj2, image erofs), 1 image pruned + assert_eq!(result.objects_removed, 3); + assert!(result.objects_bytes > 0); + assert_eq!(result.images_pruned, 1); + assert_eq!(result.streams_pruned, 0); Ok(()) } @@ -1737,8 +1819,9 @@ mod tests { PathBuf::from("images").join(&link_target) )?); - // Now perform gc - repo.gc(&vec![image1.to_hex()], &vec![], false)?; + // Now perform gc - keep image via additional_roots + let image1_hex = image1.to_hex(); + let result = repo.gc(&[image1_hex.as_str()])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1748,6 +1831,12 @@ mod tests { &tmp, PathBuf::from("images").join(&link_target) )?); + + // Verify GcResult: 1 object removed (obj1), no symlinks pruned + assert_eq!(result.objects_removed, 1); + assert!(result.objects_bytes > 0); + assert_eq!(result.images_pruned, 0); + assert_eq!(result.streams_pruned, 0); Ok(()) } @@ -1779,8 +1868,8 @@ mod tests { PathBuf::from("images").join(&link_target) )?); - // Now perform gc - repo.gc::<&str>(&vec![], &vec![], false)?; + // Now perform gc - image kept via ref, only obj1 removed + let result = repo.gc(&[])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1790,6 +1879,12 @@ mod tests { &tmp, PathBuf::from("images").join(&link_target) )?); + + // Verify GcResult: 1 object removed, no symlinks pruned (image has ref) + assert_eq!(result.objects_removed, 1); + assert!(result.objects_bytes > 0); + assert_eq!(result.images_pruned, 0); + assert_eq!(result.streams_pruned, 0); Ok(()) } @@ -1860,8 +1955,9 @@ mod tests { PathBuf::from("images").join(&link_target) )?); - // Now perform gc - repo.gc(&vec![image1.to_hex()], &vec![], false)?; + // Now perform gc - keep image1, remove image2 and its unique objects + let image1_hex = image1.to_hex(); + let result = repo.gc(&[image1_hex.as_str()])?; assert!(!test_object_exists(&tmp, &obj1_id)?); assert!(test_object_exists(&tmp, &obj2_id)?); @@ -1874,6 +1970,12 @@ mod tests { PathBuf::from("images").join(&link_target) )?); assert!(!test_path_exists_in_repo(&tmp, &image2_path)?); + + // Verify GcResult: 3 objects removed (obj1, obj4, image2 erofs), 1 image pruned + assert_eq!(result.objects_removed, 3); + assert!(result.objects_bytes > 0); + assert_eq!(result.images_pruned, 1); + assert_eq!(result.streams_pruned, 0); Ok(()) } }