From ca3b3511a69f223c46daf959d4e797010cce11a6 Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Tue, 21 Jan 2025 01:18:20 +0000 Subject: [PATCH 01/14] chore: Implement cat-file subcommand --- src/commands/cat_file.rs | 122 ++++++++++++++++++++++++++++++++++++ src/commands/hash_object.rs | 27 +++++++- src/commands/mod.rs | 16 ++++- 3 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 src/commands/cat_file.rs diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs new file mode 100644 index 0000000..2f994cf --- /dev/null +++ b/src/commands/cat_file.rs @@ -0,0 +1,122 @@ +use crate::commands::hash_object::ObjectType; +use crate::commands::{get_object_path, CommandArgs}; + +use std::fs::File; +use std::io::{BufRead, BufReader, Read, Write}; + +use anyhow::Context; +use clap::Args; +use flate2::read::ZlibDecoder; + +impl CommandArgs for CatFileArgs { + fn run(self) -> anyhow::Result<()> { + let object_path = get_object_path(&self.object)?; + let object = File::open(object_path)?; + if self.object_type || self.size { + return read_metadata(&self, &object); + } + if self.exit_zero || self.pretty_print { + return read_content(&self, &object); + } + unreachable!("one of -t, -s, -e, or -p must be specified"); + } +} + +fn read_content(args: &CatFileArgs, file: &File) -> anyhow::Result<()> { + let zlib = ZlibDecoder::new(file); + let mut zlib = BufReader::new(zlib); + let mut header = Vec::new(); + zlib.read_until(0, &mut header)?; + + let header = std::str::from_utf8(&header).context("object header is not valid utf-8")?; + let (object_type, size) = header + .split_once(' ') + .context("object header is not valid")?; + let object_type = ObjectType::try_from(object_type.as_bytes())?; + let size = size + .trim_end_matches('\0') + .parse::() + .context("object size is not a valid integer")?; + + let mut content = Vec::new(); + zlib.read_to_end(&mut content)?; + + if size != content.len() { + anyhow::bail!("object size does not match header"); + } + + if args.exit_zero { + return Ok(()); + } + + if args.pretty_print { + match object_type { + ObjectType::Blob => { + std::io::stdout() + .write_all(&content) + .context("write object to stdout")?; + } + _ => unimplemented!("pretty-printing for object type {:?}", object_type), + } + } + + Ok(()) +} + +fn read_metadata(args: &CatFileArgs, file: &File) -> anyhow::Result<()> { + let zlib = ZlibDecoder::new(file); + let mut zlib = BufReader::new(zlib); + let mut object_type = Vec::new(); + + // The object type is the first word in the object header + zlib.read_until(b' ', &mut object_type)?; + object_type.pop(); // Remove the trailing space + + if !args.allow_unknown_type { + // Bail out if the object type fails to parse + ObjectType::try_from(object_type.as_slice())?; + } + + // If the object type is requested, print it and return + if args.object_type { + std::io::stdout() + .write_all(&object_type) + .context("write object type to stdout")?; + return Ok(()); + } + + // If the object size is requested, print it and return + if args.size { + let mut size = Vec::new(); + // Read until the null byte to get the object size + zlib.read_until(0, &mut size)?; + std::io::stdout() + .write_all(&size) + .context("write object size to stdout")?; + return Ok(()); + } + + unreachable!("either -t or -s must be specified"); +} + +#[derive(Args, Debug)] +pub(crate) struct CatFileArgs { + /// show object type + #[arg(short = 't', groups = ["meta", "flags"])] + object_type: bool, + /// show object size + #[arg(short, groups = ["meta", "flags"])] + size: bool, + /// check if exists + #[arg(short, group = "flags")] + exit_zero: bool, + /// pretty-print content + #[arg(short, group = "flags")] + pretty_print: bool, + /// allow -s and -t to work with broken/corrupt objects + #[arg(long, requires = "meta")] + allow_unknown_type: bool, + /// the object to display + #[arg(name = "object")] + object: String, +} diff --git a/src/commands/hash_object.rs b/src/commands/hash_object.rs index 4fb5d70..b7cb556 100644 --- a/src/commands/hash_object.rs +++ b/src/commands/hash_object.rs @@ -79,6 +79,26 @@ impl fmt::Display for ObjectType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ObjectType::Blob => write!(f, "blob"), + ObjectType::Tree => write!(f, "tree"), + ObjectType::Commit => write!(f, "commit"), + ObjectType::Tag => write!(f, "tag"), + } + } +} + +impl TryFrom<&[u8]> for ObjectType { + type Error = anyhow::Error; + + fn try_from(value: &[u8]) -> anyhow::Result { + match value { + b"blob" => Ok(ObjectType::Blob), + b"tree" => Ok(ObjectType::Tree), + b"commit" => Ok(ObjectType::Commit), + b"tag" => Ok(ObjectType::Tag), + _ => { + let value = std::str::from_utf8(value).context("object type is not valid utf-8")?; + anyhow::bail!("unknown object type: {}", value) + } } } } @@ -86,7 +106,7 @@ impl fmt::Display for ObjectType { #[derive(Parser, Debug)] pub(crate) struct HashObjectArgs { /// object type - #[arg(short = 't', value_enum, default_value_t, value_name = "type")] + #[arg(short = 't', value_enum, default_value_t, name = "type")] object_type: ObjectType, /// write the object into the object database #[arg(short)] @@ -97,9 +117,12 @@ pub(crate) struct HashObjectArgs { } #[derive(Debug, Default, Clone, ValueEnum)] -enum ObjectType { +pub(super) enum ObjectType { #[default] Blob, + Tree, + Commit, + Tag, } #[cfg(test)] diff --git a/src/commands/mod.rs b/src/commands/mod.rs index fa397e8..b84d765 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -3,6 +3,7 @@ use std::path::PathBuf; use anyhow::Context; use clap::Subcommand; +mod cat_file; mod hash_object; mod init; @@ -11,6 +12,7 @@ impl Command { match self { Command::HashObject(args) => args.run(), Command::Init(args) => args.run(), + Command::CatFile(args) => args.run(), } } } @@ -19,6 +21,7 @@ impl Command { pub(crate) enum Command { HashObject(hash_object::HashObjectArgs), Init(init::InitArgs), + CatFile(cat_file::CatFileArgs), } pub(crate) trait CommandArgs { @@ -32,7 +35,6 @@ fn get_current_dir() -> anyhow::Result { fn git_dir() -> anyhow::Result { let git_dir_path = std::env::var("GIT_DIR").unwrap_or_else(|_| ".git".to_string()); let mut current_dir = get_current_dir()?; - println!("current_dir: {:?}", current_dir); while current_dir.exists() { let git_dir = current_dir.join(&git_dir_path); @@ -56,3 +58,15 @@ fn git_object_dir() -> anyhow::Result { git_dir().map(|git_dir| git_dir.join(git_object_dir_path)) } + +fn get_object_path(object: &str) -> anyhow::Result { + let object_dir = git_object_dir()?; + let object_dir = object_dir.join(&object[..2]); + let object_path = object_dir.join(&object[2..]); + + if !object_path.exists() { + anyhow::bail!("fatal: object '{}' not found", object); + } + + Ok(object_path) +} From 8d92ed8d3485d241a3ce7d910b3339ce1a267bc2 Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Wed, 29 Jan 2025 22:20:46 +0000 Subject: [PATCH 02/14] chore: Implement cat-file subcommand --- src/commands/cat_file.rs | 558 ++++++++++++++++++++++++++++++++---- src/commands/hash_object.rs | 100 ++----- src/commands/init.rs | 114 +++----- src/commands/mod.rs | 46 --- src/main.rs | 1 + 5 files changed, 570 insertions(+), 249 deletions(-) diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index 2f994cf..3be65ef 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -1,5 +1,5 @@ -use crate::commands::hash_object::ObjectType; -use crate::commands::{get_object_path, CommandArgs}; +use crate::commands::CommandArgs; +use crate::utils::{get_object_path, parse_header}; use std::fs::File; use std::io::{BufRead, BufReader, Read, Write}; @@ -10,88 +10,85 @@ use flate2::read::ZlibDecoder; impl CommandArgs for CatFileArgs { fn run(self) -> anyhow::Result<()> { - let object_path = get_object_path(&self.object)?; - let object = File::open(object_path)?; - if self.object_type || self.size { - return read_metadata(&self, &object); + if self.show_type || self.size { + return read_header(&self, &mut std::io::stdout()); } - if self.exit_zero || self.pretty_print { - return read_content(&self, &object); + + if self.exit_zero { + return read_object(&self, &mut std::io::stdout()); } - unreachable!("one of -t, -s, -e, or -p must be specified"); + + unreachable!("either -t, -s, or -e must be specified"); } } -fn read_content(args: &CatFileArgs, file: &File) -> anyhow::Result<()> { +fn read_object(args: &CatFileArgs, writer: &mut W) -> anyhow::Result<()> +where + W: Write, +{ + let object_path = get_object_path(&args.object_hash, true)?; + let file = File::open(object_path)?; + + // Create a zlib decoder to read the object header and content let zlib = ZlibDecoder::new(file); let mut zlib = BufReader::new(zlib); + + // Read the object header let mut header = Vec::new(); zlib.read_until(0, &mut header)?; + let header = parse_header(&header)?; - let header = std::str::from_utf8(&header).context("object header is not valid utf-8")?; - let (object_type, size) = header - .split_once(' ') - .context("object header is not valid")?; - let object_type = ObjectType::try_from(object_type.as_bytes())?; - let size = size - .trim_end_matches('\0') - .parse::() - .context("object size is not a valid integer")?; - + // Read the object content let mut content = Vec::new(); zlib.read_to_end(&mut content)?; - if size != content.len() { + // Ensure the object size matches the header + if header.parse_size()? != content.len() { anyhow::bail!("object size does not match header"); } + // Exit early if the object exists and passes validation if args.exit_zero { return Ok(()); } - if args.pretty_print { - match object_type { - ObjectType::Blob => { - std::io::stdout() - .write_all(&content) - .context("write object to stdout")?; - } - _ => unimplemented!("pretty-printing for object type {:?}", object_type), - } - } - - Ok(()) + // Output the object content to stdout + writer.write_all(&content).context("write object to stdout") } -fn read_metadata(args: &CatFileArgs, file: &File) -> anyhow::Result<()> { +fn read_header(args: &CatFileArgs, writer: &mut W) -> anyhow::Result<()> +where + W: Write, +{ + let object_path = get_object_path(&args.object_hash, true)?; + let file = File::open(object_path)?; + + // Create a zlib decoder to read the object header let zlib = ZlibDecoder::new(file); let mut zlib = BufReader::new(zlib); - let mut object_type = Vec::new(); - // The object type is the first word in the object header - zlib.read_until(b' ', &mut object_type)?; - object_type.pop(); // Remove the trailing space + // Read the object header + let mut buf = Vec::new(); + zlib.read_until(0, &mut buf)?; + let header = parse_header(&buf)?; if !args.allow_unknown_type { // Bail out if the object type fails to parse - ObjectType::try_from(object_type.as_slice())?; + header.parse_type()?; } // If the object type is requested, print it and return - if args.object_type { - std::io::stdout() - .write_all(&object_type) + if args.show_type { + writer + .write_all(header.object_type) .context("write object type to stdout")?; return Ok(()); } // If the object size is requested, print it and return if args.size { - let mut size = Vec::new(); - // Read until the null byte to get the object size - zlib.read_until(0, &mut size)?; - std::io::stdout() - .write_all(&size) + writer + .write_all(header.size) .context("write object size to stdout")?; return Ok(()); } @@ -103,20 +100,475 @@ fn read_metadata(args: &CatFileArgs, file: &File) -> anyhow::Result<()> { pub(crate) struct CatFileArgs { /// show object type #[arg(short = 't', groups = ["meta", "flags"])] - object_type: bool, + show_type: bool, /// show object size #[arg(short, groups = ["meta", "flags"])] size: bool, /// check if exists - #[arg(short, group = "flags")] + #[arg(short, groups = ["content", "flags"])] exit_zero: bool, - /// pretty-print content - #[arg(short, group = "flags")] - pretty_print: bool, /// allow -s and -t to work with broken/corrupt objects #[arg(long, requires = "meta")] allow_unknown_type: bool, /// the object to display #[arg(name = "object")] - object: String, + object_hash: String, +} + +#[cfg(test)] +mod tests { + use crate::commands::cat_file::{read_header, read_object, CatFileArgs}; + use crate::utils::env; + use crate::utils::test::{TempEnv, TempPwd}; + + use flate2::write::ZlibEncoder; + use flate2::Compression; + use std::fs; + use std::io::Write; + + const OBJECT_CONTENT: &str = "Hello, World!"; + const OBJECT_HASH: &str = "b45ef6fec89518d314f546fd6c3025367b721684"; + const OBJECT_HASH_UNKNOWN_TYPE: &str = "de7a5d7e25b0b0700efda74301e3afddf222f2da"; // type: unknown + const OBJECT_HASH_INVALID_SIZE: &str = "5eacd92a2d45548f23ddee14fc6401a141f2dc9f"; // size: 0 + const OBJECT_TYPE: &str = "blob"; + + fn compress_object() -> Vec { + let object = format!( + "{} {}\0{}", + OBJECT_TYPE, + OBJECT_CONTENT.len(), + OBJECT_CONTENT + ); + let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); + zlib.write_all(object.as_bytes()).unwrap(); + zlib.finish().unwrap() + } + + fn compress_object_unknown_type() -> Vec { + let object = format!("unknown {}\0{}", OBJECT_CONTENT.len(), OBJECT_CONTENT); + let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); + zlib.write_all(object.as_bytes()).unwrap(); + zlib.finish().unwrap() + } + + fn compress_object_invalid_size() -> Vec { + let object = format!("{} 0\0{}", OBJECT_TYPE, OBJECT_CONTENT); + let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); + zlib.write_all(object.as_bytes()).unwrap(); + zlib.finish().unwrap() + } + + #[test] + fn displays_object_content() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!(".git/objects/{}/{}", &OBJECT_HASH[..2], &OBJECT_HASH[2..]); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object()).unwrap(); + + let args = CatFileArgs { + show_type: false, + size: false, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = read_object(&args, &mut output); + + assert!(result.is_ok()); + assert_eq!(output, OBJECT_CONTENT.as_bytes()); + } + + #[test] + fn exits_successfully() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!(".git/objects/{}/{}", &OBJECT_HASH[..2], &OBJECT_HASH[2..]); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object()).unwrap(); + + let args = CatFileArgs { + show_type: false, + size: false, + exit_zero: true, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = read_object(&args, &mut output); + + assert!(result.is_ok()); + assert!(output.is_empty()); + } + + #[test] + fn displays_object_type() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!(".git/objects/{}/{}", &OBJECT_HASH[..2], &OBJECT_HASH[2..]); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object()).unwrap(); + + let args = CatFileArgs { + show_type: true, + size: false, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = read_header(&args, &mut output); + + assert!(result.is_ok()); + assert_eq!(output, OBJECT_TYPE.as_bytes()); + } + + #[test] + fn displays_object_size() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!(".git/objects/{}/{}", &OBJECT_HASH[..2], &OBJECT_HASH[2..]); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object()).unwrap(); + + let args = CatFileArgs { + show_type: false, + size: true, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = read_header(&args, &mut output); + + assert!(result.is_ok()); + assert_eq!(output, OBJECT_CONTENT.len().to_string().as_bytes()); + } + + #[test] + fn displays_object_type_with_unknown_type() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!( + ".git/objects/{}/{}", + &OBJECT_HASH_UNKNOWN_TYPE[..2], + &OBJECT_HASH_UNKNOWN_TYPE[2..] + ); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object_unknown_type()).unwrap(); + + let args = CatFileArgs { + show_type: true, + size: false, + exit_zero: false, + allow_unknown_type: true, + object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + }; + + let mut output = Vec::new(); + let result = read_header(&args, &mut output); + + assert!(result.is_ok()); + assert_eq!(output, b"unknown"); + } + + #[test] + fn displays_object_size_with_unknown_type() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!( + ".git/objects/{}/{}", + &OBJECT_HASH_UNKNOWN_TYPE[..2], + &OBJECT_HASH_UNKNOWN_TYPE[2..] + ); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object_unknown_type()).unwrap(); + + let args = CatFileArgs { + show_type: false, + size: true, + exit_zero: false, + allow_unknown_type: true, + object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + }; + + let mut output = Vec::new(); + let result = read_header(&args, &mut output); + + assert!(result.is_ok()); + assert_eq!(output, OBJECT_CONTENT.len().to_string().as_bytes()); + } + + #[test] + fn fails_to_display_object_type_with_unknown_type() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!( + ".git/objects/{}/{}", + &OBJECT_HASH_UNKNOWN_TYPE[..2], + &OBJECT_HASH_UNKNOWN_TYPE[2..] + ); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object_unknown_type()).unwrap(); + + let args = CatFileArgs { + show_type: true, + size: false, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + }; + + let mut output = Vec::new(); + let result = read_header(&args, &mut output); + + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_object_size_with_unknown_type() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!( + ".git/objects/{}/{}", + &OBJECT_HASH_UNKNOWN_TYPE[..2], + &OBJECT_HASH_UNKNOWN_TYPE[2..] + ); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object_unknown_type()).unwrap(); + + let args = CatFileArgs { + show_type: false, + size: true, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + }; + + let mut output = Vec::new(); + let result = read_header(&args, &mut output); + + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_object_content_with_invalid_size() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!( + ".git/objects/{}/{}", + &OBJECT_HASH_INVALID_SIZE[..2], + &OBJECT_HASH_INVALID_SIZE[2..] + ); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object_invalid_size()).unwrap(); + + let args = CatFileArgs { + show_type: false, + size: false, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + }; + + let mut output = Vec::new(); + let result = read_object(&args, &mut output); + + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_object_content_with_unknown_type() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!( + ".git/objects/{}/{}", + &OBJECT_HASH_INVALID_SIZE[..2], + &OBJECT_HASH_INVALID_SIZE[2..] + ); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object_invalid_size()).unwrap(); + + let args = CatFileArgs { + show_type: false, + size: false, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + }; + + let mut output = Vec::new(); + let result = read_object(&args, &mut output); + + assert!(result.is_err()); + } + + #[test] + fn displays_object_type_with_invalid_size() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!( + ".git/objects/{}/{}", + &OBJECT_HASH_INVALID_SIZE[..2], + &OBJECT_HASH_INVALID_SIZE[2..] + ); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object_invalid_size()).unwrap(); + + let args = CatFileArgs { + show_type: true, + size: false, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + }; + + let mut output = Vec::new(); + let result = read_header(&args, &mut output); + + assert!(result.is_ok()); + assert_eq!(output, OBJECT_TYPE.as_bytes()); + } + + #[test] + fn displays_object_size_with_invalid_size() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = format!( + ".git/objects/{}/{}", + &OBJECT_HASH_INVALID_SIZE[..2], + &OBJECT_HASH_INVALID_SIZE[2..] + ); + let object_path = temp_pwd.path().join(object_path); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_object_invalid_size()).unwrap(); + + let args = CatFileArgs { + show_type: false, + size: true, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + }; + + let mut output = Vec::new(); + let result = read_header(&args, &mut output); + + assert!(result.is_ok()); + assert_eq!(output, b"0"); + } + + #[test] + fn read_object_non_existent_hash() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + let _temp_pwd = TempPwd::new(); + + let args = CatFileArgs { + show_type: false, + size: false, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + let result = read_object(&args, &mut Vec::new()); + + assert!(result.is_err()); + } + + #[test] + fn read_header_non_existent_hash() { + // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + let _temp_pwd = TempPwd::new(); + + let args = CatFileArgs { + show_type: false, + size: false, + exit_zero: false, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + let result = read_header(&args, &mut Vec::new()); + + assert!(result.is_err()); + } } diff --git a/src/commands/hash_object.rs b/src/commands/hash_object.rs index b7cb556..54c3753 100644 --- a/src/commands/hash_object.rs +++ b/src/commands/hash_object.rs @@ -1,11 +1,11 @@ -use crate::commands::{git_object_dir, CommandArgs}; +use crate::commands::CommandArgs; +use crate::utils::{format_header, git_object_dir, ObjectType}; -use std::fmt; use std::io::Write; use std::path::PathBuf; use anyhow::Context; -use clap::{Parser, ValueEnum}; +use clap::Parser; use flate2::write::ZlibEncoder; use flate2::Compression; use sha1::{Digest, Sha1}; @@ -29,7 +29,7 @@ impl CommandArgs for HashObjectArgs { // Create blob from header and file content. let content = std::fs::read(&path).context(format!("read {}", path.display()))?; - let header = format!("{} {}\0", object_type, content.len()); + let header = format_header(object_type, content.len()); let mut blob = header.into_bytes(); blob.extend(content); @@ -62,7 +62,7 @@ impl CommandArgs for HashObjectArgs { /// * `anyhow::Result<()>` - The result of the write operation. fn write_blob(blob: &[u8], hash: &str) -> anyhow::Result<()> { // Create the object directory if it doesn't exist. - let object_dir = git_object_dir()?.join(&hash[..2]); + let object_dir = git_object_dir(false)?.join(&hash[..2]); std::fs::create_dir_all(&object_dir).context("create subdir in .git/objects")?; // Compress the blob with zlib. @@ -75,34 +75,6 @@ fn write_blob(blob: &[u8], hash: &str) -> anyhow::Result<()> { std::fs::write(object_path, compressed).context("write compressed blob") } -impl fmt::Display for ObjectType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ObjectType::Blob => write!(f, "blob"), - ObjectType::Tree => write!(f, "tree"), - ObjectType::Commit => write!(f, "commit"), - ObjectType::Tag => write!(f, "tag"), - } - } -} - -impl TryFrom<&[u8]> for ObjectType { - type Error = anyhow::Error; - - fn try_from(value: &[u8]) -> anyhow::Result { - match value { - b"blob" => Ok(ObjectType::Blob), - b"tree" => Ok(ObjectType::Tree), - b"commit" => Ok(ObjectType::Commit), - b"tag" => Ok(ObjectType::Tag), - _ => { - let value = std::str::from_utf8(value).context("object type is not valid utf-8")?; - anyhow::bail!("unknown object type: {}", value) - } - } - } -} - #[derive(Parser, Debug)] pub(crate) struct HashObjectArgs { /// object type @@ -116,47 +88,23 @@ pub(crate) struct HashObjectArgs { path: PathBuf, } -#[derive(Debug, Default, Clone, ValueEnum)] -pub(super) enum ObjectType { - #[default] - Blob, - Tree, - Commit, - Tag, -} - #[cfg(test)] mod tests { - use super::*; - use std::fs; + use super::{write_blob, HashObjectArgs}; + use crate::commands::CommandArgs; + use crate::utils::test::TempPwd; + use crate::utils::ObjectType; - /// A temporary directory for testing. - /// Changes the current directory to the temporary directory and restores it on drop. - struct TempDir { - old_dir: PathBuf, - dir: tempfile::TempDir, - } - - impl TempDir { - fn new() -> Self { - let old_dir = std::env::current_dir().unwrap(); - let dir = tempfile::tempdir().unwrap(); - std::env::set_current_dir(&dir).unwrap(); - Self { old_dir, dir } - } - } + use std::fs; + use std::path::PathBuf; - impl Drop for TempDir { - fn drop(&mut self) { - std::env::set_current_dir(&self.old_dir).unwrap(); - } - } + use sha1::{Digest, Sha1}; #[test] fn run_hashes_blob_and_prints_hash() { // Create a temporary file with some content. - let temp_dir = TempDir::new(); - let file_path = temp_dir.dir.path().join("testfile.txt"); + let temp_pwd = TempPwd::new(); + let file_path = temp_pwd.path().join("testfile.txt"); fs::write(&file_path, b"test content").unwrap(); let args = HashObjectArgs { @@ -172,12 +120,12 @@ mod tests { #[test] fn run_writes_blob_to_git_objects() { // Create a temporary file with some content. - let temp_dir = TempDir::new(); - let file_path = temp_dir.dir.path().join("testfile.txt"); + let temp_pwd = TempPwd::new(); + let file_path = temp_pwd.path().join("testfile.txt"); fs::write(&file_path, b"test content").unwrap(); // Create the .git directory. - fs::create_dir(temp_dir.dir.path().join(".git")).unwrap(); + fs::create_dir(temp_pwd.path().join(".git")).unwrap(); let args = HashObjectArgs { write: true, @@ -196,7 +144,7 @@ mod tests { }; // Check that the object file was written to the `.git/objects` directory. - let object_dir = temp_dir.dir.path().join(".git/objects").join(&hash[..2]); + let object_dir = temp_pwd.path().join(".git/objects").join(&hash[..2]); let object_path = object_dir.join(&hash[2..]); assert!(object_path.exists()); } @@ -216,11 +164,11 @@ mod tests { #[test] fn write_blob_creates_object_directory() { // Create a temporary directory for testing. - let temp_dir = TempDir::new(); + let temp_pwd = TempPwd::new(); let blob = b"blob 12\0test content"; // Create the .git directory. - fs::create_dir(temp_dir.dir.path().join(".git")).unwrap(); + fs::create_dir(temp_pwd.path().join(".git")).unwrap(); // Expected hash of the blob. let hash = { @@ -233,18 +181,18 @@ mod tests { assert!(result.is_ok()); // Check that the object directory was created. - let object_dir = temp_dir.dir.path().join(".git/objects").join(&hash[..2]); + let object_dir = temp_pwd.path().join(".git/objects").join(&hash[..2]); assert!(object_dir.exists()); } #[test] fn write_blob_writes_compressed_blob() { // Create a temporary directory for testing. - let temp_dir = TempDir::new(); + let temp_pwd = TempPwd::new(); let blob = b"blob 12\0test content"; // Create the .git directory. - fs::create_dir(temp_dir.dir.path().join(".git")).unwrap(); + fs::create_dir(temp_pwd.path().join(".git")).unwrap(); // Expected hash of the blob. let hash = { @@ -257,7 +205,7 @@ mod tests { assert!(result.is_ok()); // Check that the object file was written to the `.git/objects` directory. - let object_dir = temp_dir.dir.path().join(".git/objects").join(&hash[..2]); + let object_dir = temp_pwd.path().join(".git/objects").join(&hash[..2]); let object_path = object_dir.join(&hash[2..]); assert!(object_path.exists()); } diff --git a/src/commands/init.rs b/src/commands/init.rs index 6dce603..4373ff0 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -1,4 +1,5 @@ use crate::commands::CommandArgs; +use crate::utils::env; use clap::Parser; use std::path::PathBuf; @@ -11,17 +12,17 @@ impl CommandArgs for InitArgs { directory } else { let directory = std::env::current_dir()?; - let git_dir = std::env::var("GIT_DIR").unwrap_or_else(|_| ".".to_string()); + let git_dir = std::env::var(env::GIT_DIR).unwrap_or_else(|_| ".".to_string()); directory.join(git_dir) } } else { let directory = self.directory.unwrap_or_else(|| ".".into()); - let git_dir = std::env::var("GIT_DIR").unwrap_or_else(|_| ".git".to_string()); + let git_dir = std::env::var(env::GIT_DIR).unwrap_or_else(|_| ".git".to_string()); directory.join(git_dir) }; // The directory where git objects are stored. - let git_object_dir = std::env::var("GIT_OBJECT_DIRECTORY") + let git_object_dir = std::env::var(env::GIT_OBJECT_DIRECTORY) .map(|object_dir| git_dir.join(object_dir)) .unwrap_or_else(|_| git_dir.join("objects")); @@ -61,67 +62,27 @@ pub(crate) struct InitArgs { #[cfg(test)] mod tests { - use super::*; + use super::InitArgs; + use crate::commands::CommandArgs; + use crate::utils::env; + use crate::utils::test::{TempEnv, TempPwd}; + use std::fs; - use tempfile::tempdir; + use std::path::PathBuf; const INITIAL_BRANCH: &str = "main"; const CUSTOM_GIT_DIR: &str = "custom_git_dir"; const CUSTOM_OBJECT_DIR: &str = "custom_object_dir"; - struct TempEnv { - old_git_dir: Option, - old_git_object_dir: Option, - } - - impl TempEnv { - fn new(git_dir: Option<&str>, git_object_dir: Option<&str>) -> Self { - let old_git_dir = std::env::var("GIT_DIR").ok(); - let old_git_object_dir = std::env::var("GIT_OBJECT_DIRECTORY").ok(); - - if let Some(git_dir) = git_dir { - std::env::set_var("GIT_DIR", git_dir); - } else { - std::env::remove_var("GIT_DIR"); - } - - if let Some(git_object_dir) = git_object_dir { - std::env::set_var("GIT_OBJECT_DIRECTORY", git_object_dir); - } else { - std::env::remove_var("GIT_OBJECT_DIRECTORY"); - } - - TempEnv { - old_git_dir, - old_git_object_dir, - } - } - } - - impl Drop for TempEnv { - fn drop(&mut self) { - if let Some(git_dir) = &self.old_git_dir { - std::env::set_var("GIT_DIR", git_dir); - } else { - std::env::remove_var("GIT_DIR"); - } - - if let Some(git_object_dir) = &self.old_git_object_dir { - std::env::set_var("GIT_OBJECT_DIRECTORY", git_object_dir); - } else { - std::env::remove_var("GIT_OBJECT_DIRECTORY"); - } - } - } - #[test] fn init_repository() { - let _env = TempEnv::new(None, None); + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - let temp_dir = tempdir().unwrap(); - let git_dir = temp_dir.path().join(".git"); + let temp_pwd = TempPwd::new(); + let git_dir = temp_pwd.path().join(".git"); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: false, quiet: true, initial_branch: INITIAL_BRANCH.to_string(), @@ -140,11 +101,12 @@ mod tests { #[test] fn init_bare_repository() { - let _env = TempEnv::new(None, None); + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - let temp_dir = tempdir().unwrap(); + let temp_pwd = TempPwd::new(); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: true, quiet: true, initial_branch: INITIAL_BRANCH.to_string(), @@ -152,23 +114,24 @@ mod tests { let result = args.run(); assert!(result.is_ok()); - assert!(temp_dir.path().join("objects").exists()); - assert!(temp_dir.path().join("refs").exists()); - assert!(temp_dir.path().join("HEAD").exists()); + assert!(temp_pwd.path().join("objects").exists()); + assert!(temp_pwd.path().join("refs").exists()); + assert!(temp_pwd.path().join("HEAD").exists()); - let head_content = fs::read_to_string(temp_dir.path().join("HEAD")).unwrap(); + let head_content = fs::read_to_string(temp_pwd.path().join("HEAD")).unwrap(); assert_eq!(head_content, "ref: refs/heads/main\n"); } #[test] fn init_repository_with_branch() { - let _env = TempEnv::new(None, None); + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - let temp_dir = tempdir().unwrap(); - let git_dir = temp_dir.path().join(".git"); + let temp_pwd = TempPwd::new(); + let git_dir = temp_pwd.path().join(".git"); let custom_branch = "develop".to_string(); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: false, quiet: true, initial_branch: custom_branch.clone(), @@ -185,12 +148,13 @@ mod tests { #[test] fn init_repository_with_git_dir() { - let _env = TempEnv::new(Some(CUSTOM_GIT_DIR), None); + let _git_dir_env = TempEnv::new(env::GIT_DIR, Some(CUSTOM_GIT_DIR)); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - let temp_dir = tempdir().unwrap(); - let git_dir = temp_dir.path().join(CUSTOM_GIT_DIR); + let temp_pwd = TempPwd::new(); + let git_dir = temp_pwd.path().join(CUSTOM_GIT_DIR); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: false, quiet: true, initial_branch: INITIAL_BRANCH.to_string(), @@ -209,12 +173,13 @@ mod tests { #[test] fn init_repository_with_object_dir() { - let _env = TempEnv::new(None, Some(CUSTOM_OBJECT_DIR)); + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, Some(CUSTOM_OBJECT_DIR)); - let temp_dir = tempdir().unwrap(); - let git_dir = temp_dir.path().join(".git"); + let temp_pwd = TempPwd::new(); + let git_dir = temp_pwd.path().join(".git"); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: false, quiet: true, initial_branch: INITIAL_BRANCH.to_string(), @@ -228,7 +193,8 @@ mod tests { #[test] fn fail_on_invalid_dir() { - let _env = TempEnv::new(None, None); + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let args = InitArgs { directory: Some(PathBuf::from("/invalid/directory")), diff --git a/src/commands/mod.rs b/src/commands/mod.rs index b84d765..87f7975 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,6 +1,3 @@ -use std::path::PathBuf; - -use anyhow::Context; use clap::Subcommand; mod cat_file; @@ -27,46 +24,3 @@ pub(crate) enum Command { pub(crate) trait CommandArgs { fn run(self) -> anyhow::Result<()>; } - -fn get_current_dir() -> anyhow::Result { - std::env::current_dir().context("get path of current directory") -} - -fn git_dir() -> anyhow::Result { - let git_dir_path = std::env::var("GIT_DIR").unwrap_or_else(|_| ".git".to_string()); - let mut current_dir = get_current_dir()?; - - while current_dir.exists() { - let git_dir = current_dir.join(&git_dir_path); - - if git_dir.exists() { - return Ok(git_dir); - } - - current_dir = current_dir - .parent() - .context("get path of parent directory")? - .to_path_buf(); - } - - anyhow::bail!("not a git repository (or any of the parent directories): .git") -} - -fn git_object_dir() -> anyhow::Result { - let git_object_dir_path = - std::env::var("GIT_OBJECT_DIRECTORY").unwrap_or_else(|_| "objects".to_string()); - - git_dir().map(|git_dir| git_dir.join(git_object_dir_path)) -} - -fn get_object_path(object: &str) -> anyhow::Result { - let object_dir = git_object_dir()?; - let object_dir = object_dir.join(&object[..2]); - let object_path = object_dir.join(&object[2..]); - - if !object_path.exists() { - anyhow::bail!("fatal: object '{}' not found", object); - } - - Ok(object_path) -} diff --git a/src/main.rs b/src/main.rs index 0b63083..a279165 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod commands; +mod utils; use clap::Parser; use commands::Command; From 3d462b07cb54b8b719d671359d777c099664dea1 Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Wed, 29 Jan 2025 22:21:08 +0000 Subject: [PATCH 03/14] fix: commit utils.rs --- src/utils.rs | 307 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 src/utils.rs diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..96918d4 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,307 @@ +use std::fmt; +use std::path::PathBuf; + +use anyhow::Context; +use clap::ValueEnum; + +/// Format the header of a `.git/objects` file +pub(crate) fn format_header(object_type: O, size: S) -> String +where + O: fmt::Display, + S: fmt::Display, +{ + format!("{} {}\0", object_type, size) +} + +/// Read the header of a `.git/objects` file +pub(crate) fn parse_header(header: &[u8]) -> anyhow::Result { + // Split the header into type and size + let mut header = header.splitn(2, |&b| b == b' '); + + let object_type = header.next().context("invalid object header")?; + let size = header.next().context("invalid object header")?; + let size = &size[..size.len().saturating_sub(1)]; // Remove the trailing null byte + + Ok(ObjectHeader { object_type, size }) +} + +/// Get the path of the current directory. +pub(crate) fn get_current_dir() -> anyhow::Result { + std::env::current_dir().context("get path of current directory") +} + +/// Get the path to the git directory. +/// This could be either of the following (in order of precedence): +/// +/// 1. `$GIT_DIR` +/// 2. `.git` +/// +/// # Returns +/// +/// The path to the git directory +pub(crate) fn git_dir() -> anyhow::Result { + let git_dir_path = std::env::var(env::GIT_DIR).unwrap_or_else(|_| ".git".to_string()); + let mut current_dir = get_current_dir()?; + + // Search for the git directory in the current directory and its parents + while current_dir.exists() { + let git_dir = current_dir.join(&git_dir_path); + + // Return the git directory if it exists + if git_dir.exists() { + return Ok(git_dir); + } + + let Some(parent_dir) = current_dir.parent() else { + break; + }; + + current_dir = parent_dir.to_path_buf(); + } + + anyhow::bail!( + "not a git repository (or any of the parent directories): {}", + git_dir_path + ) +} + +/// Get the path to the git object directory. +/// This could be either of the following (in order of precedence): +/// +/// 1. `/$GIT_OBJECT_DIRECTORY` +/// 2. `/objects` +/// +/// # Arguments +/// +/// * `check_exists` - Whether to check if the object directory exists, +/// exiting with an error if it does not +/// +/// # Returns +/// +/// The path to the git object directory +pub(crate) fn git_object_dir(check_exists: bool) -> anyhow::Result { + let git_dir = git_dir()?; + let git_object_dir = + std::env::var(env::GIT_OBJECT_DIRECTORY).unwrap_or_else(|_| "objects".to_string()); + let git_object_dir = git_dir.join(&git_object_dir); + + // Check if the object directory exists + if check_exists && !git_object_dir.exists() { + anyhow::bail!( + "{}/{} directory does not exist", + git_dir.display(), + git_object_dir.display() + ); + } + + Ok(git_object_dir) +} + +/// Get the path to a git object. +/// The path is constructed as follows: +/// +/// `//` +/// +/// # Example +/// +/// If the default git and object directories are used, +/// the path for object `e7a11a969c037e00a796aafeff6258501ec15e9a` would be: +/// +/// `.git/objects/e7/a11a969c037e00a796aafeff6258501ec15e9a` +/// +/// # Arguments +/// +/// * `hash` - The object hash +/// * `check_exists` - Whether to check if the object exists, +/// exiting with an error if it does not +/// +/// # Returns +/// +/// The path to the object file +pub(crate) fn get_object_path(hash: &str, check_exists: bool) -> anyhow::Result { + let object_dir = git_object_dir(check_exists)?; + let object_dir = object_dir.join(&hash[..2]); + let object_path = object_dir.join(&hash[2..]); + + // Check if the object exists + if check_exists && !object_path.exists() { + anyhow::bail!("{} is not a valid object", hash); + } + + Ok(object_path) +} + +/// The type of object in the Git object database +#[derive(Default, Debug, ValueEnum, Clone)] +pub(crate) enum ObjectType { + #[default] + Blob, + Tree, + Commit, + Tag, +} + +/// The header of a Git object +pub(crate) struct ObjectHeader<'a> { + /// The type of object + pub(crate) object_type: &'a [u8], + /// The size of the object in bytes + pub(crate) size: &'a [u8], +} + +impl ObjectHeader<'_> { + /// Parse the size of the object + pub(crate) fn parse_size(&self) -> anyhow::Result { + let size = std::str::from_utf8(self.size) + .context("object size is not valid utf-8")? + .parse::() + .context("object size is not a number")?; + + Ok(size) + } + + /// Parse the type of the object + pub(crate) fn parse_type(&self) -> anyhow::Result { + ObjectType::try_from(self.object_type) + } +} + +impl fmt::Display for ObjectType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ObjectType::Blob => write!(f, "blob"), + ObjectType::Tree => write!(f, "tree"), + ObjectType::Commit => write!(f, "commit"), + ObjectType::Tag => write!(f, "tag"), + } + } +} + +impl TryFrom<&[u8]> for ObjectType { + type Error = anyhow::Error; + + fn try_from(value: &[u8]) -> anyhow::Result { + match value { + b"blob" => Ok(ObjectType::Blob), + b"tree" => Ok(ObjectType::Tree), + b"commit" => Ok(ObjectType::Commit), + b"tag" => Ok(ObjectType::Tag), + _ => { + let value = std::str::from_utf8(value).context("object type is not valid utf-8")?; + anyhow::bail!("unknown object type: {}", value) + } + } + } +} + +/// Utility structs and functions for testing +#[cfg(test)] +pub(crate) mod test { + use std::path::{Path, PathBuf}; + + /// A temporary environment for testing. + /// Changes the environment variable and restores it on drop. + /// Tests must be run serially to avoid conflicts (`cargo test -- --test-threads=1`) + /// + /// # Example + /// + /// ``` + /// # use crate::utils::test::TempEnv; + /// let temp_env = TempEnv::new("KEY", Some("VALUE")); + /// assert_eq!(std::env::var("KEY"), Ok("VALUE".to_string())); + /// + /// // The environment variable is restored when the `TempEnv` instance is dropped + /// drop(temp_env); + /// + /// // Setting the value to `None` unsets the environment variable + /// let temp_env = TempEnv::new("KEY", None); + /// assert!(std::env::var("KEY").is_err()); + /// + /// drop(temp_env); + /// ``` + pub(crate) struct TempEnv { + /// The environment variable's key + key: String, + /// The old value of the environment variable + old_value: Option, + } + + impl TempEnv { + /// Create a new temporary environment variable. + /// + /// * If `value` is `Some`, the environment variable is set to that value. + /// * If `value` is `None`, the environment variable is unset. + pub(crate) fn new(key: S, value: Option<&str>) -> Self + where + S: Into, + { + let key = key.into(); + let old_value = std::env::var(&key).ok(); + + if let Some(value) = value { + std::env::set_var(&key, value); + } else { + std::env::remove_var(&key); + } + + TempEnv { key, old_value } + } + } + + impl Drop for TempEnv { + fn drop(&mut self) { + if let Some(value) = &self.old_value { + std::env::set_var(&self.key, value); + } else { + std::env::remove_var(&self.key); + } + } + } + + /// A temporary directory for testing. + /// Changes the current directory to the temporary directory and restores it on drop. + /// + /// # Example + /// + /// ``` + /// # use crate::utils::test::TempPwd; + /// let temp_pwd = TempPwd::new(); + /// assert_eq!(std::env::current_dir().unwrap(), temp_pwd.temp_pwd.path()); + /// + /// // The current directory is restored when the `TempPwd` instance is dropped + /// drop(temp_pwd); + /// ``` + pub(crate) struct TempPwd { + old_pwd: PathBuf, + temp_pwd: tempfile::TempDir, + } + + impl TempPwd { + pub(crate) fn new() -> Self { + let old_pwd = std::env::current_dir().unwrap(); + let temp_pwd = tempfile::tempdir().unwrap(); + + // Change the current directory to the temporary directory + std::env::set_current_dir(&temp_pwd).unwrap(); + + Self { old_pwd, temp_pwd } + } + + pub(crate) fn path(&self) -> &Path { + self.temp_pwd.path() + } + } + + impl Drop for TempPwd { + fn drop(&mut self) { + // Restore the current directory + std::env::set_current_dir(&self.old_pwd).unwrap(); + } + } +} + +/// Environment variables used by the Git CLI +pub(crate) mod env { + pub(crate) const GIT_DIR: &str = "GIT_DIR"; + pub(crate) const GIT_OBJECT_DIRECTORY: &str = "GIT_OBJECT_DIRECTORY"; +} From 81e24f1afa959dd9b688413a81ceb8c2a68bcd3b Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Thu, 30 Jan 2025 09:53:22 +0000 Subject: [PATCH 04/14] refactor(fmt): Add rustfmt config --- .github/workflows/ci.yml | 3 ++- .rustfmt.toml | 18 ++++++++++++++++++ src/commands/cat_file.rs | 17 +++++++++-------- src/commands/hash_object.rs | 16 ++++++++-------- src/commands/init.rs | 13 +++++++------ src/utils.rs | 6 +++--- 6 files changed, 47 insertions(+), 26 deletions(-) create mode 100644 .rustfmt.toml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d97d814..b78fae1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ concurrency: env: CARGO_TERM_COLOR: always RUST_VERSION_STABLE: stable + RUST_VERSION_NIGHTLY: nightly jobs: test: @@ -51,7 +52,7 @@ jobs: - name: Install Rust toolchain uses: actions-rs/toolchain@v1 with: - toolchain: ${{ env.RUST_VERSION_STABLE }} + toolchain: ${{ env.RUST_VERSION_NIGHTLY }} profile: minimal components: rustfmt, clippy override: true diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..4ebe4c1 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,18 @@ +# Stable +match_block_trailing_comma = true +newline_style = "Unix" +use_field_init_shorthand = true +use_try_shorthand = true + +# Nightly +imports_granularity = "Module" +combine_control_expr = false +condense_wildcard_suffixes = true +format_code_in_doc_comments = true +format_macro_matchers = true +hex_literal_case = "Lower" +normalize_comments = true +normalize_doc_attributes = true +overflow_delimited_expr = true +reorder_impl_items = true +group_imports = "StdExternalCrate" diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index 3be65ef..525d512 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -1,6 +1,3 @@ -use crate::commands::CommandArgs; -use crate::utils::{get_object_path, parse_header}; - use std::fs::File; use std::io::{BufRead, BufReader, Read, Write}; @@ -8,6 +5,9 @@ use anyhow::Context; use clap::Args; use flate2::read::ZlibDecoder; +use crate::commands::CommandArgs; +use crate::utils::{get_object_path, parse_header}; + impl CommandArgs for CatFileArgs { fn run(self) -> anyhow::Result<()> { if self.show_type || self.size { @@ -117,14 +117,15 @@ pub(crate) struct CatFileArgs { #[cfg(test)] mod tests { - use crate::commands::cat_file::{read_header, read_object, CatFileArgs}; - use crate::utils::env; - use crate::utils::test::{TempEnv, TempPwd}; + use std::fs; + use std::io::Write; use flate2::write::ZlibEncoder; use flate2::Compression; - use std::fs; - use std::io::Write; + + use crate::commands::cat_file::{read_header, read_object, CatFileArgs}; + use crate::utils::env; + use crate::utils::test::{TempEnv, TempPwd}; const OBJECT_CONTENT: &str = "Hello, World!"; const OBJECT_HASH: &str = "b45ef6fec89518d314f546fd6c3025367b721684"; diff --git a/src/commands/hash_object.rs b/src/commands/hash_object.rs index 54c3753..b4f387d 100644 --- a/src/commands/hash_object.rs +++ b/src/commands/hash_object.rs @@ -1,6 +1,3 @@ -use crate::commands::CommandArgs; -use crate::utils::{format_header, git_object_dir, ObjectType}; - use std::io::Write; use std::path::PathBuf; @@ -10,6 +7,9 @@ use flate2::write::ZlibEncoder; use flate2::Compression; use sha1::{Digest, Sha1}; +use crate::commands::CommandArgs; +use crate::utils::{format_header, git_object_dir, ObjectType}; + impl CommandArgs for HashObjectArgs { /// Hashes the object and writes it to the `.git/objects` directory if requested. /// @@ -90,16 +90,16 @@ pub(crate) struct HashObjectArgs { #[cfg(test)] mod tests { - use super::{write_blob, HashObjectArgs}; - use crate::commands::CommandArgs; - use crate::utils::test::TempPwd; - use crate::utils::ObjectType; - use std::fs; use std::path::PathBuf; use sha1::{Digest, Sha1}; + use super::{write_blob, HashObjectArgs}; + use crate::commands::CommandArgs; + use crate::utils::test::TempPwd; + use crate::utils::ObjectType; + #[test] fn run_hashes_blob_and_prints_hash() { // Create a temporary file with some content. diff --git a/src/commands/init.rs b/src/commands/init.rs index 4373ff0..7504157 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -1,8 +1,9 @@ -use crate::commands::CommandArgs; -use crate::utils::env; +use std::path::PathBuf; use clap::Parser; -use std::path::PathBuf; + +use crate::commands::CommandArgs; +use crate::utils::env; impl CommandArgs for InitArgs { fn run(self) -> anyhow::Result<()> { @@ -62,14 +63,14 @@ pub(crate) struct InitArgs { #[cfg(test)] mod tests { + use std::fs; + use std::path::PathBuf; + use super::InitArgs; use crate::commands::CommandArgs; use crate::utils::env; use crate::utils::test::{TempEnv, TempPwd}; - use std::fs; - use std::path::PathBuf; - const INITIAL_BRANCH: &str = "main"; const CUSTOM_GIT_DIR: &str = "custom_git_dir"; const CUSTOM_OBJECT_DIR: &str = "custom_object_dir"; diff --git a/src/utils.rs b/src/utils.rs index 96918d4..c6183be 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -74,7 +74,7 @@ pub(crate) fn git_dir() -> anyhow::Result { /// # Arguments /// /// * `check_exists` - Whether to check if the object directory exists, -/// exiting with an error if it does not +/// exiting with an error if it does not /// /// # Returns /// @@ -113,7 +113,7 @@ pub(crate) fn git_object_dir(check_exists: bool) -> anyhow::Result { /// /// * `hash` - The object hash /// * `check_exists` - Whether to check if the object exists, -/// exiting with an error if it does not +/// exiting with an error if it does not /// /// # Returns /// @@ -189,7 +189,7 @@ impl TryFrom<&[u8]> for ObjectType { _ => { let value = std::str::from_utf8(value).context("object type is not valid utf-8")?; anyhow::bail!("unknown object type: {}", value) - } + }, } } } From 2f305f4c3bd76274df675d70e407a8034bf5c330 Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Thu, 30 Jan 2025 10:10:03 +0000 Subject: [PATCH 05/14] chore(cat-file): Add pretty-print --- src/commands/cat_file.rs | 91 ++++++++++++++++++---------------------- src/utils.rs | 2 +- 2 files changed, 41 insertions(+), 52 deletions(-) diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index 525d512..0de175e 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -6,19 +6,22 @@ use clap::Args; use flate2::read::ZlibDecoder; use crate::commands::CommandArgs; -use crate::utils::{get_object_path, parse_header}; +use crate::utils::{get_object_path, parse_header, ObjectType}; impl CommandArgs for CatFileArgs { fn run(self) -> anyhow::Result<()> { + let mut stdout = std::io::stdout(); + + // We only need to read the header for the object type and size if self.show_type || self.size { - return read_header(&self, &mut std::io::stdout()); + return read_header(&self, &mut stdout); } - if self.exit_zero { - return read_object(&self, &mut std::io::stdout()); + if self.exit_zero || self.pretty_print { + return read_object(&self, &mut stdout); } - unreachable!("either -t, -s, or -e must be specified"); + unreachable!("either -t, -s, -e, or -p must be specified"); } } @@ -38,6 +41,12 @@ where zlib.read_until(0, &mut header)?; let header = parse_header(&header)?; + // Bail out if the object type is not supported + match header.parse_type()? { + ObjectType::Blob => {}, + unknown_type => anyhow::bail!("unsupported object type: {:?}", unknown_type), + } + // Read the object content let mut content = Vec::new(); zlib.read_to_end(&mut content)?; @@ -96,7 +105,7 @@ where unreachable!("either -t or -s must be specified"); } -#[derive(Args, Debug)] +#[derive(Args, Debug, Default)] pub(crate) struct CatFileArgs { /// show object type #[arg(short = 't', groups = ["meta", "flags"])] @@ -107,6 +116,9 @@ pub(crate) struct CatFileArgs { /// check if exists #[arg(short, groups = ["content", "flags"])] exit_zero: bool, + /// pretty-print content + #[arg(short, groups = ["content", "flags"])] + pretty_print: bool, /// allow -s and -t to work with broken/corrupt objects #[arg(long, requires = "meta")] allow_unknown_type: bool, @@ -133,6 +145,7 @@ mod tests { const OBJECT_HASH_INVALID_SIZE: &str = "5eacd92a2d45548f23ddee14fc6401a141f2dc9f"; // size: 0 const OBJECT_TYPE: &str = "blob"; + /// Get the compressed representation of [`OBJECT_CONTENT`] and its header fn compress_object() -> Vec { let object = format!( "{} {}\0{}", @@ -145,6 +158,7 @@ mod tests { zlib.finish().unwrap() } + /// Get the compressed representation of [`OBJECT_CONTENT`] with an unknown type in the header fn compress_object_unknown_type() -> Vec { let object = format!("unknown {}\0{}", OBJECT_CONTENT.len(), OBJECT_CONTENT); let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); @@ -152,6 +166,7 @@ mod tests { zlib.finish().unwrap() } + /// Get the compressed representation of [`OBJECT_CONTENT`] with an invalid size in the header fn compress_object_invalid_size() -> Vec { let object = format!("{} 0\0{}", OBJECT_TYPE, OBJECT_CONTENT); let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); @@ -174,11 +189,9 @@ mod tests { fs::write(&object_path, compress_object()).unwrap(); let args = CatFileArgs { - show_type: false, - size: false, - exit_zero: false, - allow_unknown_type: false, + pretty_print: true, object_hash: OBJECT_HASH.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -203,11 +216,9 @@ mod tests { fs::write(&object_path, compress_object()).unwrap(); let args = CatFileArgs { - show_type: false, - size: false, exit_zero: true, - allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -233,10 +244,8 @@ mod tests { let args = CatFileArgs { show_type: true, - size: false, - exit_zero: false, - allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -261,11 +270,9 @@ mod tests { fs::write(&object_path, compress_object()).unwrap(); let args = CatFileArgs { - show_type: false, size: true, - exit_zero: false, - allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -295,10 +302,9 @@ mod tests { let args = CatFileArgs { show_type: true, - size: false, - exit_zero: false, allow_unknown_type: true, object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -327,11 +333,10 @@ mod tests { fs::write(&object_path, compress_object_unknown_type()).unwrap(); let args = CatFileArgs { - show_type: false, size: true, - exit_zero: false, allow_unknown_type: true, object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -361,10 +366,8 @@ mod tests { let args = CatFileArgs { show_type: true, - size: false, - exit_zero: false, - allow_unknown_type: false, object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -392,11 +395,9 @@ mod tests { fs::write(&object_path, compress_object_unknown_type()).unwrap(); let args = CatFileArgs { - show_type: false, size: true, - exit_zero: false, - allow_unknown_type: false, object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -424,11 +425,9 @@ mod tests { fs::write(&object_path, compress_object_invalid_size()).unwrap(); let args = CatFileArgs { - show_type: false, - size: false, - exit_zero: false, - allow_unknown_type: false, + pretty_print: true, object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -456,11 +455,9 @@ mod tests { fs::write(&object_path, compress_object_invalid_size()).unwrap(); let args = CatFileArgs { - show_type: false, - size: false, - exit_zero: false, - allow_unknown_type: false, + pretty_print: true, object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -489,10 +486,8 @@ mod tests { let args = CatFileArgs { show_type: true, - size: false, - exit_zero: false, - allow_unknown_type: false, object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -521,11 +516,9 @@ mod tests { fs::write(&object_path, compress_object_invalid_size()).unwrap(); let args = CatFileArgs { - show_type: false, size: true, - exit_zero: false, - allow_unknown_type: false, object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + ..Default::default() }; let mut output = Vec::new(); @@ -543,11 +536,9 @@ mod tests { let _temp_pwd = TempPwd::new(); let args = CatFileArgs { - show_type: false, - size: false, - exit_zero: false, - allow_unknown_type: false, + pretty_print: true, object_hash: OBJECT_HASH.to_string(), + ..Default::default() }; let result = read_object(&args, &mut Vec::new()); @@ -562,11 +553,9 @@ mod tests { let _temp_pwd = TempPwd::new(); let args = CatFileArgs { - show_type: false, - size: false, - exit_zero: false, - allow_unknown_type: false, + size: true, object_hash: OBJECT_HASH.to_string(), + ..Default::default() }; let result = read_header(&args, &mut Vec::new()); diff --git a/src/utils.rs b/src/utils.rs index c6183be..00df0e7 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -13,7 +13,7 @@ where format!("{} {}\0", object_type, size) } -/// Read the header of a `.git/objects` file +/// Parse the header of a `.git/objects` file into the [`ObjectHeader`] struct. pub(crate) fn parse_header(header: &[u8]) -> anyhow::Result { // Split the header into type and size let mut header = header.splitn(2, |&b| b == b' '); From 220c76ae80ee4d827453285da602f366d97ae726 Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Thu, 30 Jan 2025 10:58:43 +0000 Subject: [PATCH 06/14] refactor(command-args): Add writer and create `CatFileFlags` struct --- src/commands/cat_file.rs | 205 +++++++++++++++++++++++++----------- src/commands/hash_object.rs | 139 ++++++++++++------------ src/commands/init.rs | 33 +++--- src/commands/mod.rs | 14 ++- 4 files changed, 240 insertions(+), 151 deletions(-) diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index 0de175e..78ba79b 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -9,16 +9,17 @@ use crate::commands::CommandArgs; use crate::utils::{get_object_path, parse_header, ObjectType}; impl CommandArgs for CatFileArgs { - fn run(self) -> anyhow::Result<()> { - let mut stdout = std::io::stdout(); - + fn run(self, writer: &mut W) -> anyhow::Result<()> + where + W: Write, + { // We only need to read the header for the object type and size - if self.show_type || self.size { - return read_header(&self, &mut stdout); + if self.flags.show_type || self.flags.size { + return read_header(&self, writer); } - if self.exit_zero || self.pretty_print { - return read_object(&self, &mut stdout); + if self.flags.exit_zero || self.flags.pretty_print { + return read_object(&self, writer); } unreachable!("either -t, -s, -e, or -p must be specified"); @@ -57,7 +58,7 @@ where } // Exit early if the object exists and passes validation - if args.exit_zero { + if args.flags.exit_zero { return Ok(()); } @@ -87,7 +88,7 @@ where } // If the object type is requested, print it and return - if args.show_type { + if args.flags.show_type { writer .write_all(header.object_type) .context("write object type to stdout")?; @@ -95,7 +96,7 @@ where } // If the object size is requested, print it and return - if args.size { + if args.flags.size { writer .write_all(header.size) .context("write object size to stdout")?; @@ -105,26 +106,33 @@ where unreachable!("either -t or -s must be specified"); } -#[derive(Args, Debug, Default)] +#[derive(Args, Debug)] pub(crate) struct CatFileArgs { + #[command(flatten)] + flags: CatFileFlags, + /// allow -s and -t to work with broken/corrupt objects + #[arg(long, requires = "meta")] + allow_unknown_type: bool, + /// the object to display + #[arg(name = "object")] + object_hash: String, +} + +#[derive(Args, Debug)] +#[group(id = "flags", required = true)] +struct CatFileFlags { /// show object type - #[arg(short = 't', groups = ["meta", "flags"])] + #[arg(short = 't', group = "meta")] show_type: bool, /// show object size - #[arg(short, groups = ["meta", "flags"])] + #[arg(short, group = "meta")] size: bool, /// check if exists - #[arg(short, groups = ["content", "flags"])] + #[arg(short)] exit_zero: bool, /// pretty-print content - #[arg(short, groups = ["content", "flags"])] + #[arg(short)] pretty_print: bool, - /// allow -s and -t to work with broken/corrupt objects - #[arg(long, requires = "meta")] - allow_unknown_type: bool, - /// the object to display - #[arg(name = "object")] - object_hash: String, } #[cfg(test)] @@ -135,7 +143,8 @@ mod tests { use flate2::write::ZlibEncoder; use flate2::Compression; - use crate::commands::cat_file::{read_header, read_object, CatFileArgs}; + use crate::commands::cat_file::{CatFileArgs, CatFileFlags}; + use crate::commands::CommandArgs; use crate::utils::env; use crate::utils::test::{TempEnv, TempPwd}; @@ -189,13 +198,18 @@ mod tests { fs::write(&object_path, compress_object()).unwrap(); let args = CatFileArgs { - pretty_print: true, + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_object(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_ok()); assert_eq!(output, OBJECT_CONTENT.as_bytes()); @@ -216,13 +230,18 @@ mod tests { fs::write(&object_path, compress_object()).unwrap(); let args = CatFileArgs { - exit_zero: true, + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: true, + pretty_print: false, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_object(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_ok()); assert!(output.is_empty()); @@ -243,13 +262,18 @@ mod tests { fs::write(&object_path, compress_object()).unwrap(); let args = CatFileArgs { - show_type: true, + flags: CatFileFlags { + show_type: true, + size: false, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_header(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_ok()); assert_eq!(output, OBJECT_TYPE.as_bytes()); @@ -270,13 +294,18 @@ mod tests { fs::write(&object_path, compress_object()).unwrap(); let args = CatFileArgs { - size: true, + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_header(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_ok()); assert_eq!(output, OBJECT_CONTENT.len().to_string().as_bytes()); @@ -301,14 +330,18 @@ mod tests { fs::write(&object_path, compress_object_unknown_type()).unwrap(); let args = CatFileArgs { - show_type: true, + flags: CatFileFlags { + show_type: true, + size: false, + exit_zero: false, + pretty_print: false, + }, allow_unknown_type: true, object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_header(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_ok()); assert_eq!(output, b"unknown"); @@ -333,14 +366,18 @@ mod tests { fs::write(&object_path, compress_object_unknown_type()).unwrap(); let args = CatFileArgs { - size: true, + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, allow_unknown_type: true, object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_header(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_ok()); assert_eq!(output, OBJECT_CONTENT.len().to_string().as_bytes()); @@ -365,13 +402,18 @@ mod tests { fs::write(&object_path, compress_object_unknown_type()).unwrap(); let args = CatFileArgs { - show_type: true, + flags: CatFileFlags { + show_type: true, + size: false, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_header(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_err()); } @@ -395,13 +437,18 @@ mod tests { fs::write(&object_path, compress_object_unknown_type()).unwrap(); let args = CatFileArgs { - size: true, + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_header(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_err()); } @@ -425,13 +472,18 @@ mod tests { fs::write(&object_path, compress_object_invalid_size()).unwrap(); let args = CatFileArgs { - pretty_print: true, + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_object(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_err()); } @@ -455,13 +507,18 @@ mod tests { fs::write(&object_path, compress_object_invalid_size()).unwrap(); let args = CatFileArgs { - pretty_print: true, + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_object(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_err()); } @@ -485,13 +542,18 @@ mod tests { fs::write(&object_path, compress_object_invalid_size()).unwrap(); let args = CatFileArgs { - show_type: true, + flags: CatFileFlags { + show_type: true, + size: false, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_header(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_ok()); assert_eq!(output, OBJECT_TYPE.as_bytes()); @@ -516,13 +578,18 @@ mod tests { fs::write(&object_path, compress_object_invalid_size()).unwrap(); let args = CatFileArgs { - size: true, + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), - ..Default::default() }; let mut output = Vec::new(); - let result = read_header(&args, &mut output); + let result = args.run(&mut output); assert!(result.is_ok()); assert_eq!(output, b"0"); @@ -536,12 +603,17 @@ mod tests { let _temp_pwd = TempPwd::new(); let args = CatFileArgs { - pretty_print: true, + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), - ..Default::default() }; - let result = read_object(&args, &mut Vec::new()); + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } @@ -553,12 +625,17 @@ mod tests { let _temp_pwd = TempPwd::new(); let args = CatFileArgs { - size: true, + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, object_hash: OBJECT_HASH.to_string(), - ..Default::default() }; - let result = read_header(&args, &mut Vec::new()); + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } } diff --git a/src/commands/hash_object.rs b/src/commands/hash_object.rs index b4f387d..35827c8 100644 --- a/src/commands/hash_object.rs +++ b/src/commands/hash_object.rs @@ -20,7 +20,10 @@ impl CommandArgs for HashObjectArgs { /// # Returns /// /// * `anyhow::Result<()>` - The result of the command execution. - fn run(self) -> anyhow::Result<()> { + fn run(self, writer: &mut W) -> anyhow::Result<()> + where + W: Write, + { let HashObjectArgs { write, path, @@ -34,23 +37,25 @@ impl CommandArgs for HashObjectArgs { blob.extend(content); // Hash blob with SHA-1. + // This is used to identify the blob in the object database. let hash = { let mut hasher = Sha1::new(); hasher.update(&blob); format!("{:x}", hasher.finalize()) }; - // Write blob to `.git/objects` directory if requested. + // Write blob to the object database if requested. if write { write_blob(&blob, &hash)?; } - println!("{}", hash); + // Display the hash of the blob. + writer.write_all(hash.as_bytes())?; Ok(()) } } -/// Writes the blob to the `.git/objects` directory. +/// Writes the blob to the object database. /// /// # Arguments /// @@ -61,8 +66,11 @@ impl CommandArgs for HashObjectArgs { /// /// * `anyhow::Result<()>` - The result of the write operation. fn write_blob(blob: &[u8], hash: &str) -> anyhow::Result<()> { + // Split the hash into directory and file name. + let (dir_name, file_name) = hash.split_at(2); + // Create the object directory if it doesn't exist. - let object_dir = git_object_dir(false)?.join(&hash[..2]); + let object_dir = git_object_dir(false)?.join(dir_name); std::fs::create_dir_all(&object_dir).context("create subdir in .git/objects")?; // Compress the blob with zlib. @@ -71,7 +79,7 @@ fn write_blob(blob: &[u8], hash: &str) -> anyhow::Result<()> { let compressed = zlib.finish().context("finish zlib")?; // Write the compressed blob to the object file. - let object_path = object_dir.join(&hash[2..]); + let object_path = object_dir.join(file_name); std::fs::write(object_path, compressed).context("write compressed blob") } @@ -93,19 +101,25 @@ mod tests { use std::fs; use std::path::PathBuf; - use sha1::{Digest, Sha1}; - use super::{write_blob, HashObjectArgs}; use crate::commands::CommandArgs; - use crate::utils::test::TempPwd; - use crate::utils::ObjectType; + use crate::utils::test::{TempEnv, TempPwd}; + use crate::utils::{env, ObjectType}; + + const OBJECT_CONTENT: &str = "Hello, World!"; + const FILE_NAME: &str = "testfile.txt"; + const OBJECT_HASH: &str = "b45ef6fec89518d314f546fd6c3025367b721684"; #[test] - fn run_hashes_blob_and_prints_hash() { + fn hashes_blob_and_displays_hash() { + // Unset environmental variables for testing + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + // Create a temporary file with some content. let temp_pwd = TempPwd::new(); - let file_path = temp_pwd.path().join("testfile.txt"); - fs::write(&file_path, b"test content").unwrap(); + let file_path = temp_pwd.path().join(FILE_NAME); + fs::write(&file_path, OBJECT_CONTENT).unwrap(); let args = HashObjectArgs { write: false, @@ -113,19 +127,26 @@ mod tests { object_type: ObjectType::Blob, }; - let result = args.run(); + let mut output = Vec::new(); + let result = args.run(&mut output); + assert!(result.is_ok()); + assert_eq!(output, OBJECT_HASH.as_bytes()); } #[test] - fn run_writes_blob_to_git_objects() { + fn writes_blob_to_object_database() { + // Unset environmental variables for testing + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + // Create a temporary file with some content. let temp_pwd = TempPwd::new(); - let file_path = temp_pwd.path().join("testfile.txt"); - fs::write(&file_path, b"test content").unwrap(); + let file_path = temp_pwd.path().join(FILE_NAME); + fs::write(&file_path, OBJECT_CONTENT).unwrap(); // Create the .git directory. - fs::create_dir(temp_pwd.path().join(".git")).unwrap(); + fs::create_dir_all(temp_pwd.path().join(".git/objects")).unwrap(); let args = HashObjectArgs { write: true, @@ -133,80 +154,60 @@ mod tests { object_type: ObjectType::Blob, }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); - // Expected hash of the blob. - let hash = { - let mut hasher = Sha1::new(); - hasher.update(b"blob 12\0test content"); - format!("{:x}", hasher.finalize()) - }; - - // Check that the object file was written to the `.git/objects` directory. - let object_dir = temp_pwd.path().join(".git/objects").join(&hash[..2]); - let object_path = object_dir.join(&hash[2..]); + // Check that the object file was written to the object database. + let (dir_name, file_name) = OBJECT_HASH.split_at(2); + let object_path = temp_pwd + .path() + .join(".git/objects") + .join(dir_name) + .join(file_name); assert!(object_path.exists()); } #[test] - fn run_fails_on_nonexistent_file() { + fn fails_on_nonexistent_file() { + // Unset environmental variables for testing + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + // Create a temporary directory for testing. + let _temp_pwd = TempPwd::new(); + let args = HashObjectArgs { write: false, path: PathBuf::from("nonexistent.txt"), object_type: ObjectType::Blob, }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } #[test] - fn write_blob_creates_object_directory() { - // Create a temporary directory for testing. - let temp_pwd = TempPwd::new(); - let blob = b"blob 12\0test content"; - - // Create the .git directory. - fs::create_dir(temp_pwd.path().join(".git")).unwrap(); + fn write_blob_creates_object_database() { + // Unset environmental variables for testing + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - // Expected hash of the blob. - let hash = { - let mut hasher = Sha1::new(); - hasher.update(blob); - format!("{:x}", hasher.finalize()) - }; - - let result = write_blob(blob, &hash); - assert!(result.is_ok()); - - // Check that the object directory was created. - let object_dir = temp_pwd.path().join(".git/objects").join(&hash[..2]); - assert!(object_dir.exists()); - } - - #[test] - fn write_blob_writes_compressed_blob() { // Create a temporary directory for testing. let temp_pwd = TempPwd::new(); - let blob = b"blob 12\0test content"; - + let blob = format!("blob {}\0{}", OBJECT_CONTENT.len(), OBJECT_CONTENT); // Create the .git directory. fs::create_dir(temp_pwd.path().join(".git")).unwrap(); - // Expected hash of the blob. - let hash = { - let mut hasher = Sha1::new(); - hasher.update(blob); - format!("{:x}", hasher.finalize()) - }; - - let result = write_blob(blob, &hash); + let result = write_blob(blob.as_bytes(), OBJECT_HASH); assert!(result.is_ok()); - // Check that the object file was written to the `.git/objects` directory. - let object_dir = temp_pwd.path().join(".git/objects").join(&hash[..2]); - let object_path = object_dir.join(&hash[2..]); - assert!(object_path.exists()); + // Check that the object directory and file were created. + let (dir_name, file_name) = OBJECT_HASH.split_at(2); + let object_dir = temp_pwd + .path() + .join(".git/objects") + .join(dir_name) + .join(file_name); + assert!(object_dir.exists()); } } diff --git a/src/commands/init.rs b/src/commands/init.rs index 7504157..f42a23b 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -1,3 +1,4 @@ +use std::io::Write; use std::path::PathBuf; use clap::Parser; @@ -6,7 +7,10 @@ use crate::commands::CommandArgs; use crate::utils::env; impl CommandArgs for InitArgs { - fn run(self) -> anyhow::Result<()> { + fn run(self, writer: &mut W) -> anyhow::Result<()> + where + W: Write, + { // Initializes a new git repository in the specified directory. let git_dir = if self.bare { if let Some(directory) = self.directory { @@ -36,10 +40,11 @@ impl CommandArgs for InitArgs { std::fs::write(git_dir.join("HEAD"), head)?; if !self.quiet { - println!( + let output = format!( "Initialized empty Git repository in {}", git_dir.canonicalize()?.to_str().unwrap() ); + writer.write_all(output.as_bytes())?; } Ok(()) } @@ -76,7 +81,7 @@ mod tests { const CUSTOM_OBJECT_DIR: &str = "custom_object_dir"; #[test] - fn init_repository() { + fn inits_repo() { let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); @@ -89,7 +94,7 @@ mod tests { initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(git_dir.exists()); assert!(git_dir.join("objects").exists()); @@ -101,7 +106,7 @@ mod tests { } #[test] - fn init_bare_repository() { + fn inits_bare_repo() { let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); @@ -113,7 +118,7 @@ mod tests { initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(temp_pwd.path().join("objects").exists()); assert!(temp_pwd.path().join("refs").exists()); @@ -124,7 +129,7 @@ mod tests { } #[test] - fn init_repository_with_branch() { + fn inits_repo_with_branch() { let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); @@ -138,7 +143,7 @@ mod tests { initial_branch: custom_branch.clone(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(git_dir.exists()); assert!(git_dir.join("HEAD").exists()); @@ -148,7 +153,7 @@ mod tests { } #[test] - fn init_repository_with_git_dir() { + fn inits_repo_with_custom_git_dir() { let _git_dir_env = TempEnv::new(env::GIT_DIR, Some(CUSTOM_GIT_DIR)); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); @@ -161,7 +166,7 @@ mod tests { initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(git_dir.exists()); assert!(git_dir.join("objects").exists()); @@ -173,7 +178,7 @@ mod tests { } #[test] - fn init_repository_with_object_dir() { + fn inits_repo_with_custom_git_object_dir() { let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, Some(CUSTOM_OBJECT_DIR)); @@ -186,14 +191,14 @@ mod tests { initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(git_dir.exists()); assert!(git_dir.join(CUSTOM_OBJECT_DIR).exists()); } #[test] - fn fail_on_invalid_dir() { + fn fail_on_invalid_init_path() { let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); @@ -204,7 +209,7 @@ mod tests { initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } } diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 87f7975..4de92f5 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,3 +1,5 @@ +use std::io::Write; + use clap::Subcommand; mod cat_file; @@ -6,10 +8,12 @@ mod init; impl Command { pub fn run(self) -> anyhow::Result<()> { + let mut stdout = std::io::stdout(); + match self { - Command::HashObject(args) => args.run(), - Command::Init(args) => args.run(), - Command::CatFile(args) => args.run(), + Command::HashObject(args) => args.run(&mut stdout), + Command::Init(args) => args.run(&mut stdout), + Command::CatFile(args) => args.run(&mut stdout), } } } @@ -22,5 +26,7 @@ pub(crate) enum Command { } pub(crate) trait CommandArgs { - fn run(self) -> anyhow::Result<()>; + fn run(self, writer: &mut W) -> anyhow::Result<()> + where + W: Write; } From 8142f02c6b40f23abecb35539bc02738afbcefef Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Thu, 30 Jan 2025 11:01:50 +0000 Subject: [PATCH 07/14] docs(cat-file): Document command --- README.md | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e013e31..e8ee2a6 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,25 @@ This is a simple attempt at re-creating some of the functionality of the `git` c ## Features - `hash-object` - Compute the hash of an object and optionally write it to the object database. - - `-w` flag to write the object to the object database. - - `-t` flag to specify the type of the object (supported: `blob`). - - `` argument to specify the file to hash. + - `-w` flag to write the object to the object database. + - `-t` flag to specify the type of the object (supported: `blob`). + - `` argument to specify the file to hash. - `init` - Create an empty Git repository. - - `--bare` flag to create a bare repository. - - `-b` or `--initial-branch` flag to specify the initial branch. - - `-q` or `--quiet` flag to suppress the output. - - `` argument to specify the directory to initialize. + - `--bare` flag to create a bare repository. + - `-b` or `--initial-branch` flag to specify the initial branch. + - `-q` or `--quiet` flag to suppress the output. + - `` argument to specify the directory to initialize. +- `cat-file` - Provide content or type and size information for repository objects. + - `-t` flag to show the type of the object. + - `-s` flag to show the size of the object. + - `-p` flag to show the content of the object (pretty-print) + - `--allow-unknown-type` flag to allow unknown object types (to be used with `-t` or `-s`). + - `` argument to specify the object to show. ## Testing -Due to the nature of the project, tests must be run sequentially. To run the tests, use the following command: +Due to the nature of the project, tests must be run sequentially. To run the tests, use the +following command: ```sh cargo test -- --test-threads=1 From fa6b4e9ad24f4c33c2cc4fed2cc07fafd34c366f Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Fri, 31 Jan 2025 19:45:13 +0000 Subject: [PATCH 08/14] chore(cat-file): Add tree pretty-print support --- object_structure.md | 39 +++++++++++ src/commands/cat_file.rs | 142 ++++++++++++++++++++++++++++----------- src/utils.rs | 21 ++++++ 3 files changed, 162 insertions(+), 40 deletions(-) create mode 100644 object_structure.md diff --git a/object_structure.md b/object_structure.md new file mode 100644 index 0000000..1f33873 --- /dev/null +++ b/object_structure.md @@ -0,0 +1,39 @@ +# Object Structure + +This document describes the structure of Git objects. + +## General Object Structure + +All objects are stored in the `.git/objects` directory (or `$GIT_DIR/$GIT_OBJECT_DIRECTORY`) and +have the following structure: + +``` + \0 +``` + +## Blob + +A blob object is a file. It contains the contents of the file. + +``` +blob \0 +``` + +## Tree + +A tree object is a directory. It contains a list of entries, each of which contains a mode, a +filename, and a hash of a tree or blob object. The entries are sorted by filename. + +``` +tree \0 +``` + +The content of a tree is a list of entries, each of which contains a mode, a filename, and a hash of +a tree or blob object. The entries do not have a separator between them. Note that the SHA-1 hash is +binary, not hex. + +The format of each entry is as follows: + +``` + \0<20-byte SHA-1 as binary> +``` \ No newline at end of file diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index 78ba79b..d172fed 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -6,31 +6,31 @@ use clap::Args; use flate2::read::ZlibDecoder; use crate::commands::CommandArgs; -use crate::utils::{get_object_path, parse_header, ObjectType}; +use crate::utils::{binary_to_hex_bytes, get_object_path, parse_header, ObjectType}; impl CommandArgs for CatFileArgs { fn run(self, writer: &mut W) -> anyhow::Result<()> where W: Write, { - // We only need to read the header for the object type and size - if self.flags.show_type || self.flags.size { - return read_header(&self, writer); + if self.flags.show_type { + return read_object_type(&self.object_hash, self.allow_unknown_type, writer); + } + if self.flags.size { + return read_object_size(&self.object_hash, self.allow_unknown_type, writer); } - if self.flags.exit_zero || self.flags.pretty_print { - return read_object(&self, writer); + return read_object_pretty(&self.object_hash, self.flags.exit_zero, writer); } - unreachable!("either -t, -s, -e, or -p must be specified"); } } -fn read_object(args: &CatFileArgs, writer: &mut W) -> anyhow::Result<()> +fn read_object_pretty(hash: &str, exit: bool, writer: &mut W) -> anyhow::Result<()> where W: Write, { - let object_path = get_object_path(&args.object_hash, true)?; + let object_path = get_object_path(hash, true)?; let file = File::open(object_path)?; // Create a zlib decoder to read the object header and content @@ -42,35 +42,111 @@ where zlib.read_until(0, &mut header)?; let header = parse_header(&header)?; - // Bail out if the object type is not supported - match header.parse_type()? { - ObjectType::Blob => {}, - unknown_type => anyhow::bail!("unsupported object type: {:?}", unknown_type), - } - // Read the object content - let mut content = Vec::new(); - zlib.read_to_end(&mut content)?; + let mut buf = Vec::new(); + let object_size = match header.parse_type()? { + ObjectType::Blob => zlib.read_to_end(&mut buf)?, + ObjectType::Tree => read_tree(&mut zlib, &mut buf)?, + // Bail out if the object type is not supported + t => anyhow::bail!("unsupported object type: {:?}", t), + }; // Ensure the object size matches the header - if header.parse_size()? != content.len() { + if header.parse_size()? != object_size { anyhow::bail!("object size does not match header"); } // Exit early if the object exists and passes validation - if args.flags.exit_zero { + if exit { return Ok(()); } // Output the object content to stdout - writer.write_all(&content).context("write object to stdout") + writer.write_all(&buf).context("write object to stdout") } -fn read_header(args: &CatFileArgs, writer: &mut W) -> anyhow::Result<()> +fn read_tree(zlib: &mut BufReader>, buf: &mut Vec) -> anyhow::Result { + let mut entry = Vec::new(); + let mut object_size = 0; + + loop { + // Read the entry mode + let mut mode = Vec::new(); + zlib.read_until(b' ', &mut mode)?; + // Exit the loop if the mode is empty + // This indicates the end of the tree + if mode.is_empty() { + break; + } + entry.extend(mode); + + // Read the entry name (file name) + let mut name = Vec::new(); + zlib.read_until(0, &mut name)?; + + // Read the entry hash + // Allocate enough space for a 40-byte hex hash + let mut hash = Vec::with_capacity(40); + zlib.take(20).read_to_end(&mut hash)?; + + // Add the entry size to the total size + object_size += entry.len() + hash.len() + name.len(); + // Convert the binary hash to hex + binary_to_hex_bytes(&mut hash); + + // Find the object type of the entry + let hash_str = std::str::from_utf8(&hash).context("object hash is not valid utf-8")?; + let mut object_type = Vec::new(); + read_object_type(hash_str, false, &mut object_type)?; + + // Append the remaining entry fields + entry.extend(object_type); + entry.push(b' '); + entry.extend(hash); + entry.push(b'\t'); + entry.extend(name); + entry.push(b'\n'); + + // Append the entry to the buffer + // and clear the entry for the next iteration + buf.extend_from_slice(&entry); + entry.clear(); + } + + Ok(object_size) +} + +fn read_object_type(hash: &str, allow_unknown_type: bool, writer: &mut W) -> anyhow::Result<()> where W: Write, { - let object_path = get_object_path(&args.object_hash, true)?; + let object_path = get_object_path(hash, true)?; + let file = File::open(object_path)?; + + // Create a zlib decoder to read the object header + let zlib = ZlibDecoder::new(file); + let mut zlib = BufReader::new(zlib); + + // Read the object header + let mut buf = Vec::new(); + zlib.read_until(b' ', &mut buf)?; + buf.pop(); // Remove the trailing space + + // Validate the object type + if !allow_unknown_type { + ObjectType::try_from(buf.as_slice())?; + } + + writer + .write_all(&buf) + .context("write object type to writer") +} + +fn read_object_size(hash: &str, allow_unknown_type: bool, writer: &mut W) -> anyhow::Result<()> +where + W: Write, +{ + let object_path = get_object_path(hash, true)?; let file = File::open(object_path)?; // Create a zlib decoder to read the object header @@ -82,28 +158,14 @@ where zlib.read_until(0, &mut buf)?; let header = parse_header(&buf)?; - if !args.allow_unknown_type { + if !allow_unknown_type { // Bail out if the object type fails to parse header.parse_type()?; } - // If the object type is requested, print it and return - if args.flags.show_type { - writer - .write_all(header.object_type) - .context("write object type to stdout")?; - return Ok(()); - } - - // If the object size is requested, print it and return - if args.flags.size { - writer - .write_all(header.size) - .context("write object size to stdout")?; - return Ok(()); - } - - unreachable!("either -t or -s must be specified"); + writer + .write_all(header.size) + .context("write object size to writer") } #[derive(Args, Debug)] diff --git a/src/utils.rs b/src/utils.rs index 00df0e7..3a72f78 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,6 +4,18 @@ use std::path::PathBuf; use anyhow::Context; use clap::ValueEnum; +const HEX_CHARS: &[u8] = b"0123456789abcdef"; + +/// Convert a binary slice to a hex slice. +pub(crate) fn binary_to_hex_bytes(bytes: &mut Vec) { + let n = bytes.len(); + for _ in 0..n { + let byte = bytes.remove(0); + bytes.push(HEX_CHARS[(byte >> 4) as usize]); + bytes.push(HEX_CHARS[(byte & 0xf) as usize]); + } +} + /// Format the header of a `.git/objects` file pub(crate) fn format_header(object_type: O, size: S) -> String where @@ -199,6 +211,8 @@ impl TryFrom<&[u8]> for ObjectType { pub(crate) mod test { use std::path::{Path, PathBuf}; + use super::binary_to_hex_bytes; + /// A temporary environment for testing. /// Changes the environment variable and restores it on drop. /// Tests must be run serially to avoid conflicts (`cargo test -- --test-threads=1`) @@ -298,6 +312,13 @@ pub(crate) mod test { std::env::set_current_dir(&self.old_pwd).unwrap(); } } + + #[test] + fn valid_binary_to_hex_bytes() { + let mut binary = vec![0x00, 0x01, 0x02, 0x03]; + binary_to_hex_bytes(&mut binary); + assert_eq!(binary, b"00010203"); + } } /// Environment variables used by the Git CLI From 9712296994369e760552b3c958b50a2299965c8d Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Fri, 31 Jan 2025 20:27:39 +0000 Subject: [PATCH 09/14] docs(object-structure): Add commit object alongside references to sources --- object_structure.md | 65 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/object_structure.md b/object_structure.md index 1f33873..6ccd070 100644 --- a/object_structure.md +++ b/object_structure.md @@ -4,36 +4,73 @@ This document describes the structure of Git objects. ## General Object Structure +> [Reference](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) (Object Storage) + All objects are stored in the `.git/objects` directory (or `$GIT_DIR/$GIT_OBJECT_DIRECTORY`) and have the following structure: ``` - \0 +{type} {size}\0{content} ``` +- `{type}` is the type of the object (blob, tree, commit, tag). +- `{size}` is the size of the content in bytes. +- `{content}` is the actual content of the object. + ## Blob -A blob object is a file. It contains the contents of the file. +> [Reference](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) (Object Storage) -``` -blob \0 -``` +A blob object is a file. Its content is just the file data. ## Tree -A tree object is a directory. It contains a list of entries, each of which contains a mode, a -filename, and a hash of a tree or blob object. The entries are sorted by filename. +> [Reference](https://stackoverflow.com/a/37105125/19244184) + +A tree object represents a directory. It contains a list of entries (no separator), each of which +can be either a blob or a tree object. + +The format of each entry is as follows: ``` -tree \0 +{mode} {filename}\0{hash} ``` -The content of a tree is a list of entries, each of which contains a mode, a filename, and a hash of -a tree or blob object. The entries do not have a separator between them. Note that the SHA-1 hash is -binary, not hex. +- `{mode}` is the file mode (e.g., `100644` for a file, `040000` for a directory). +- `{filename}` is the name of the file or directory. +- `{hash}` is the SHA-1 hash of the object represented in binary form. -The format of each entry is as follows: +## Commit + +> [Reference](https://stackoverflow.com/a/37438460/19244184) + +A commit object represents a commit. It contains a reference to a tree object, a list of parent +commits, an author, a committer, and a commit message. + +The content of a commit object is as follows: ``` - \0<20-byte SHA-1 as binary> -``` \ No newline at end of file +tree {tree_hash} +{parents} +author {author_name} {{author_email}} {author_date_seconds} {author_date_offset} +committer {committer_name} {{committer_email}} {committer_date_seconds} {committer_date_offset} + +{commit_message} +``` + +- `{tree_hash}` is the SHA-1 hash of the tree object. +- `{parents}` is a list of parent commit objects (if any) of the form: + ``` + parent {parent_1_hash} + parent {parent_2_hash} + ... + ``` +- `{author_name}` is the name of the author. +- `{author_email}` is the email address of the author. +- `{author_date_seconds}` is the author date in seconds since the Unix epoch. +- `{author_date_offset}` is the author date offset from UTC. +- `{committer_name}` is the name of the committer. +- `{committer_email}` is the email address of the committer. +- `{committer_date_seconds}` is the committer date in seconds since the Unix epoch. +- `{committer_date_offset}` is the committer date offset from UTC. +- `{commit_message}` is the commit message. \ No newline at end of file From 4ee22b64bec4e1d1c63e90a52f9f6e27fe809a5b Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Fri, 31 Jan 2025 21:51:42 +0000 Subject: [PATCH 10/14] docs(object-structure): Document tag object structure --- object_structure.md | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/object_structure.md b/object_structure.md index 6ccd070..8a58ac9 100644 --- a/object_structure.md +++ b/object_structure.md @@ -9,7 +9,7 @@ This document describes the structure of Git objects. All objects are stored in the `.git/objects` directory (or `$GIT_DIR/$GIT_OBJECT_DIRECTORY`) and have the following structure: -``` +```plaintext {type} {size}\0{content} ``` @@ -32,7 +32,7 @@ can be either a blob or a tree object. The format of each entry is as follows: -``` +```plaintext {mode} {filename}\0{hash} ``` @@ -49,18 +49,18 @@ commits, an author, a committer, and a commit message. The content of a commit object is as follows: -``` +```plaintext tree {tree_hash} {parents} -author {author_name} {{author_email}} {author_date_seconds} {author_date_offset} -committer {committer_name} {{committer_email}} {committer_date_seconds} {committer_date_offset} +author {author_name} <{author_email}> {author_date_seconds} {author_date_offset} +committer {committer_name} <{committer_email}> {committer_date_seconds} {committer_date_offset} {commit_message} ``` - `{tree_hash}` is the SHA-1 hash of the tree object. - `{parents}` is a list of parent commit objects (if any) of the form: - ``` + ```plaintext parent {parent_1_hash} parent {parent_2_hash} ... @@ -73,4 +73,32 @@ committer {committer_name} {{committer_email}} {committer_date_seconds} {committ - `{committer_email}` is the email address of the committer. - `{committer_date_seconds}` is the committer date in seconds since the Unix epoch. - `{committer_date_offset}` is the committer date offset from UTC. -- `{commit_message}` is the commit message. \ No newline at end of file +- `{commit_message}` is the commit message. + +## Tag + +> [Reference](https://stackoverflow.com/a/52193441/19244184) + +A tag object represents a tag. It contains a reference to an object (usually a commit), a tagger, +and +a tag message. + +The content of a tag object is as follows: + +```plaintext +object {object_hash} +type {object_type} +tag {tag_name} +tagger {tagger_name} <{tagger_email}> {tagger_date_seconds} {tagger_date_offset} + +{tag_message} +``` + +- `{object_hash}` is the SHA-1 hash of the object being tagged. +- `{object_type}` is the type of the object being tagged (e.g., `commit`). +- `{tag_name}` is the name of the tag. +- `{tagger_name}` is the name of the tagger. +- `{tagger_email}` is the email address of the tagger. +- `{tagger_date_seconds}` is the tagger date in seconds since the Unix epoch. +- `{tagger_date_offset}` is the tagger date offset from UTC. +- `{tag_message}` is the tag message. \ No newline at end of file From 873dd41c738b5830ae0695b611042c5153619738 Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Fri, 31 Jan 2025 21:52:17 +0000 Subject: [PATCH 11/14] chore(cat-file): Add support for commit and tag object pretty-print --- src/commands/cat_file.rs | 19 ++++++++++--------- src/utils.rs | 3 +-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index d172fed..30e39fa 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -45,10 +45,9 @@ where // Read the object content let mut buf = Vec::new(); let object_size = match header.parse_type()? { - ObjectType::Blob => zlib.read_to_end(&mut buf)?, - ObjectType::Tree => read_tree(&mut zlib, &mut buf)?, - // Bail out if the object type is not supported - t => anyhow::bail!("unsupported object type: {:?}", t), + ObjectType::Tree => read_tree_pretty(&mut zlib, &mut buf)?, + // Blobs, commits, and tags are pretty-printed as is + _ => zlib.read_to_end(&mut buf)?, }; // Ensure the object size matches the header @@ -65,13 +64,16 @@ where writer.write_all(&buf).context("write object to stdout") } -fn read_tree(zlib: &mut BufReader>, buf: &mut Vec) -> anyhow::Result { +fn read_tree_pretty( + zlib: &mut BufReader>, + buf: &mut Vec, +) -> anyhow::Result { let mut entry = Vec::new(); let mut object_size = 0; loop { // Read the entry mode - let mut mode = Vec::new(); + let mut mode = Vec::with_capacity(6); zlib.read_until(b' ', &mut mode)?; // Exit the loop if the mode is empty // This indicates the end of the tree @@ -108,9 +110,8 @@ fn read_tree(zlib: &mut BufReader>, buf: &mut Vec) -> anyh entry.push(b'\n'); // Append the entry to the buffer - // and clear the entry for the next iteration - buf.extend_from_slice(&entry); - entry.clear(); + // Doing so will also clear the entry buffer + buf.append(&mut entry); } Ok(object_size) diff --git a/src/utils.rs b/src/utils.rs index 3a72f78..01cc0ee 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -8,8 +8,7 @@ const HEX_CHARS: &[u8] = b"0123456789abcdef"; /// Convert a binary slice to a hex slice. pub(crate) fn binary_to_hex_bytes(bytes: &mut Vec) { - let n = bytes.len(); - for _ in 0..n { + for _ in 0..bytes.len() { let byte = bytes.remove(0); bytes.push(HEX_CHARS[(byte >> 4) as usize]); bytes.push(HEX_CHARS[(byte & 0xf) as usize]); From 87f8b8ac23467f6643009ffa57662474364ccb01 Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Sat, 1 Feb 2025 00:59:58 +0000 Subject: [PATCH 12/14] test(cat-file): Add tests for tree objects --- src/commands/cat_file.rs | 378 ++++++++++++++++++++++++--------------- src/utils.rs | 56 ++++-- 2 files changed, 274 insertions(+), 160 deletions(-) diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index 30e39fa..6a89e23 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -6,7 +6,7 @@ use clap::Args; use flate2::read::ZlibDecoder; use crate::commands::CommandArgs; -use crate::utils::{binary_to_hex_bytes, get_object_path, parse_header, ObjectType}; +use crate::utils::{get_object_path, hex, parse_header, ObjectType}; impl CommandArgs for CatFileArgs { fn run(self, writer: &mut W) -> anyhow::Result<()> @@ -68,10 +68,12 @@ fn read_tree_pretty( zlib: &mut BufReader>, buf: &mut Vec, ) -> anyhow::Result { - let mut entry = Vec::new(); + let mut entries = Vec::new(); let mut object_size = 0; loop { + let mut entry = Vec::new(); + // Read the entry mode let mut mode = Vec::with_capacity(6); zlib.read_until(b' ', &mut mode)?; @@ -94,7 +96,7 @@ fn read_tree_pretty( // Add the entry size to the total size object_size += entry.len() + hash.len() + name.len(); // Convert the binary hash to hex - binary_to_hex_bytes(&mut hash); + hex::encode_in_place(&mut hash); // Find the object type of the entry let hash_str = std::str::from_utf8(&hash).context("object hash is not valid utf-8")?; @@ -106,14 +108,16 @@ fn read_tree_pretty( entry.push(b' '); entry.extend(hash); entry.push(b'\t'); + name.pop(); // Remove the trailing null byte entry.extend(name); - entry.push(b'\n'); - // Append the entry to the buffer - // Doing so will also clear the entry buffer - buf.append(&mut entry); + // Append the entry to the list of entries + entries.push(entry); } + // Append the entries to the buffer + // joined by a newline character + buf.extend(entries.join(&b'\n')); Ok(object_size) } @@ -208,57 +212,137 @@ mod tests { use crate::commands::cat_file::{CatFileArgs, CatFileFlags}; use crate::commands::CommandArgs; - use crate::utils::env; use crate::utils::test::{TempEnv, TempPwd}; - - const OBJECT_CONTENT: &str = "Hello, World!"; - const OBJECT_HASH: &str = "b45ef6fec89518d314f546fd6c3025367b721684"; - const OBJECT_HASH_UNKNOWN_TYPE: &str = "de7a5d7e25b0b0700efda74301e3afddf222f2da"; // type: unknown - const OBJECT_HASH_INVALID_SIZE: &str = "5eacd92a2d45548f23ddee14fc6401a141f2dc9f"; // size: 0 - const OBJECT_TYPE: &str = "blob"; - - /// Get the compressed representation of [`OBJECT_CONTENT`] and its header - fn compress_object() -> Vec { + use crate::utils::{env, hex}; + + const BLOB_CONTENT: &str = "Hello, World!"; + const OBJECT_HASH: &str = "2f22503f99671604495c84465f0113d002193369"; + const OBJECT_PATH: &str = ".git/objects/2f/22503f99671604495c84465f0113d002193369"; + + /// Get the compressed representation of [`BLOB_CONTENT`] and its header + /// + /// # Arguments + /// + /// * `valid_type` - Whether the object type should be valid (`blob`) + /// * `valid_size` - Whether the object size should be valid (size of the content) + /// + /// # Returns + /// + /// The compressed representation of the blob object and its header + fn compress_blob(valid_type: bool, valid_size: bool) -> Vec { let object = format!( "{} {}\0{}", - OBJECT_TYPE, - OBJECT_CONTENT.len(), - OBJECT_CONTENT + if valid_type { "blob" } else { "unknown" }, + if valid_size { BLOB_CONTENT.len() } else { 0 }, + BLOB_CONTENT ); let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); zlib.write_all(object.as_bytes()).unwrap(); zlib.finish().unwrap() } - /// Get the compressed representation of [`OBJECT_CONTENT`] with an unknown type in the header - fn compress_object_unknown_type() -> Vec { - let object = format!("unknown {}\0{}", OBJECT_CONTENT.len(), OBJECT_CONTENT); + /// Get the compressed representation of a tree object and its header + /// + /// # Arguments + /// + /// * `object_hash` - The hash of the object to reference + /// * `valid_type` - Whether the object type should be valid (`tree`) + /// * `valid_size` - Whether the object size should be valid (size of the content) + /// + /// # Returns + /// + /// The compressed representation of the tree object and its header + fn compress_tree(object_hash: &str, valid_type: bool, valid_size: bool) -> Vec { + let content = tree_content(object_hash, false); + let mut object = format!( + "{} {}\0", + if valid_type { "tree" } else { "unknown" }, + if valid_size { content.len() } else { 0 } + ) + .into_bytes(); + object.extend(content); + let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); - zlib.write_all(object.as_bytes()).unwrap(); + zlib.write_all(&object).unwrap(); zlib.finish().unwrap() } - /// Get the compressed representation of [`OBJECT_CONTENT`] with an invalid size in the header - fn compress_object_invalid_size() -> Vec { - let object = format!("{} 0\0{}", OBJECT_TYPE, OBJECT_CONTENT); - let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); - zlib.write_all(object.as_bytes()).unwrap(); - zlib.finish().unwrap() + /// Get the content of a tree object + /// + /// # Arguments + /// + /// * `object_hash` - The hash of the object to reference + /// * `pretty` - Whether the content should be pretty-printed + /// + /// # Returns + /// + /// The content of the tree object + fn tree_content(object_hash: &str, pretty: bool) -> Vec { + if pretty { + format!("100644 blob {}\tfile.txt", object_hash).into_bytes() + } else { + let object_hash_binary = + hex::decode(object_hash.as_bytes()).expect("failed to convert hex to binary"); + let mut content = b"100644 file.txt\0".to_vec(); + content.extend(object_hash_binary); + content + } } #[test] - fn displays_object_content() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + fn displays_non_tree() { + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!(".git/objects/{}/{}", &OBJECT_HASH[..2], &OBJECT_HASH[2..]); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object()).unwrap(); + fs::write(&object_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert_eq!(output, BLOB_CONTENT.as_bytes()); + } + + #[test] + fn displays_tree() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let tree_path = temp_pwd.path().join(OBJECT_PATH); + let blob_hash_hex = "01c6a63b7fc32f6f49988a9a12b8d7d199febeab"; + + // Create the object path and write the hashed content + fs::create_dir_all(tree_path.parent().unwrap()).unwrap(); + fs::write(&tree_path, compress_tree(blob_hash_hex, true, true)).unwrap(); + + let blob_path = temp_pwd + .path() + .join(".git/objects") + .join(&blob_hash_hex[..2]) + .join(&blob_hash_hex[2..]); + + // Create the object path and write the hashed content + fs::create_dir(blob_path.parent().unwrap()).unwrap(); + fs::write(&blob_path, compress_blob(true, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -275,22 +359,21 @@ mod tests { let result = args.run(&mut output); assert!(result.is_ok()); - assert_eq!(output, OBJECT_CONTENT.as_bytes()); + assert_eq!(output, tree_content(blob_hash_hex, true)); } #[test] fn exits_successfully() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!(".git/objects/{}/{}", &OBJECT_HASH[..2], &OBJECT_HASH[2..]); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object()).unwrap(); + fs::write(&object_path, compress_blob(true, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -312,17 +395,16 @@ mod tests { #[test] fn displays_object_type() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!(".git/objects/{}/{}", &OBJECT_HASH[..2], &OBJECT_HASH[2..]); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object()).unwrap(); + fs::write(&object_path, compress_blob(true, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -339,22 +421,21 @@ mod tests { let result = args.run(&mut output); assert!(result.is_ok()); - assert_eq!(output, OBJECT_TYPE.as_bytes()); + assert_eq!(output, b"blob"); } #[test] fn displays_object_size() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!(".git/objects/{}/{}", &OBJECT_HASH[..2], &OBJECT_HASH[2..]); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object()).unwrap(); + fs::write(&object_path, compress_blob(true, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -371,26 +452,21 @@ mod tests { let result = args.run(&mut output); assert!(result.is_ok()); - assert_eq!(output, OBJECT_CONTENT.len().to_string().as_bytes()); + assert_eq!(output, BLOB_CONTENT.len().to_string().as_bytes()); } #[test] fn displays_object_type_with_unknown_type() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!( - ".git/objects/{}/{}", - &OBJECT_HASH_UNKNOWN_TYPE[..2], - &OBJECT_HASH_UNKNOWN_TYPE[2..] - ); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object_unknown_type()).unwrap(); + fs::write(&object_path, compress_blob(false, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -400,7 +476,7 @@ mod tests { pretty_print: false, }, allow_unknown_type: true, - object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + object_hash: OBJECT_HASH.to_string(), }; let mut output = Vec::new(); @@ -412,21 +488,16 @@ mod tests { #[test] fn displays_object_size_with_unknown_type() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!( - ".git/objects/{}/{}", - &OBJECT_HASH_UNKNOWN_TYPE[..2], - &OBJECT_HASH_UNKNOWN_TYPE[2..] - ); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object_unknown_type()).unwrap(); + fs::write(&object_path, compress_blob(false, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -436,33 +507,28 @@ mod tests { pretty_print: false, }, allow_unknown_type: true, - object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + object_hash: OBJECT_HASH.to_string(), }; let mut output = Vec::new(); let result = args.run(&mut output); assert!(result.is_ok()); - assert_eq!(output, OBJECT_CONTENT.len().to_string().as_bytes()); + assert_eq!(output, BLOB_CONTENT.len().to_string().as_bytes()); } #[test] fn fails_to_display_object_type_with_unknown_type() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!( - ".git/objects/{}/{}", - &OBJECT_HASH_UNKNOWN_TYPE[..2], - &OBJECT_HASH_UNKNOWN_TYPE[2..] - ); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object_unknown_type()).unwrap(); + fs::write(&object_path, compress_blob(false, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -472,32 +538,25 @@ mod tests { pretty_print: false, }, allow_unknown_type: false, - object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + object_hash: OBJECT_HASH.to_string(), }; - let mut output = Vec::new(); - let result = args.run(&mut output); - + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } #[test] fn fails_to_display_object_size_with_unknown_type() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!( - ".git/objects/{}/{}", - &OBJECT_HASH_UNKNOWN_TYPE[..2], - &OBJECT_HASH_UNKNOWN_TYPE[2..] - ); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object_unknown_type()).unwrap(); + fs::write(&object_path, compress_blob(false, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -507,32 +566,25 @@ mod tests { pretty_print: false, }, allow_unknown_type: false, - object_hash: OBJECT_HASH_UNKNOWN_TYPE.to_string(), + object_hash: OBJECT_HASH.to_string(), }; - let mut output = Vec::new(); - let result = args.run(&mut output); - + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } #[test] - fn fails_to_display_object_content_with_invalid_size() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + fn fails_to_display_non_tree_with_invalid_size() { + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!( - ".git/objects/{}/{}", - &OBJECT_HASH_INVALID_SIZE[..2], - &OBJECT_HASH_INVALID_SIZE[2..] - ); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object_invalid_size()).unwrap(); + fs::write(&object_path, compress_blob(true, false)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -542,32 +594,36 @@ mod tests { pretty_print: true, }, allow_unknown_type: false, - object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + object_hash: OBJECT_HASH.to_string(), }; - let mut output = Vec::new(); - let result = args.run(&mut output); - + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } #[test] - fn fails_to_display_object_content_with_unknown_type() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + fn fails_to_display_tree_with_invalid_size() { + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!( - ".git/objects/{}/{}", - &OBJECT_HASH_INVALID_SIZE[..2], - &OBJECT_HASH_INVALID_SIZE[2..] - ); - let object_path = temp_pwd.path().join(object_path); + let tree_path = temp_pwd.path().join(OBJECT_PATH); + let blob_hash_hex = "01c6a63b7fc32f6f49988a9a12b8d7d199febeab"; // Create the object path and write the hashed content - fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object_invalid_size()).unwrap(); + fs::create_dir_all(tree_path.parent().unwrap()).unwrap(); + fs::write(&tree_path, compress_tree(blob_hash_hex, true, false)).unwrap(); + + let blob_path = temp_pwd + .path() + .join(".git/objects") + .join(&blob_hash_hex[..2]) + .join(&blob_hash_hex[2..]); + + // Create the object path and write the hashed content + fs::create_dir(blob_path.parent().unwrap()).unwrap(); + fs::write(&blob_path, compress_blob(true, true)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -577,68 +633,92 @@ mod tests { pretty_print: true, }, allow_unknown_type: false, - object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + object_hash: OBJECT_HASH.to_string(), }; - let mut output = Vec::new(); - let result = args.run(&mut output); - + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } #[test] - fn displays_object_type_with_invalid_size() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + fn fails_to_display_non_tree_with_unknown_type() { + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!( - ".git/objects/{}/{}", - &OBJECT_HASH_INVALID_SIZE[..2], - &OBJECT_HASH_INVALID_SIZE[2..] - ); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object_invalid_size()).unwrap(); + fs::write(&object_path, compress_blob(true, false)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { - show_type: true, + show_type: false, size: false, exit_zero: false, - pretty_print: false, + pretty_print: true, }, allow_unknown_type: false, - object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + object_hash: OBJECT_HASH.to_string(), }; - let mut output = Vec::new(); - let result = args.run(&mut output); + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } - assert!(result.is_ok()); - assert_eq!(output, OBJECT_TYPE.as_bytes()); + #[test] + fn fails_to_display_tree_with_unknown_type() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let tree_path = temp_pwd.path().join(OBJECT_PATH); + let blob_hash_hex = "01c6a63b7fc32f6f49988a9a12b8d7d199febeab"; + + // Create the object path and write the hashed content + fs::create_dir_all(tree_path.parent().unwrap()).unwrap(); + fs::write(&tree_path, compress_tree(blob_hash_hex, false, true)).unwrap(); + + let blob_path = temp_pwd + .path() + .join(".git/objects") + .join(&blob_hash_hex[..2]) + .join(&blob_hash_hex[2..]); + + // Create the object path and write the hashed content + fs::create_dir(blob_path.parent().unwrap()).unwrap(); + fs::write(&blob_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); } #[test] fn displays_object_size_with_invalid_size() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let temp_pwd = TempPwd::new(); - let object_path = format!( - ".git/objects/{}/{}", - &OBJECT_HASH_INVALID_SIZE[..2], - &OBJECT_HASH_INVALID_SIZE[2..] - ); - let object_path = temp_pwd.path().join(object_path); + let object_path = temp_pwd.path().join(OBJECT_PATH); // Create the object path and write the hashed content fs::create_dir_all(object_path.parent().unwrap()).unwrap(); - fs::write(&object_path, compress_object_invalid_size()).unwrap(); + fs::write(&object_path, compress_blob(true, false)).unwrap(); let args = CatFileArgs { flags: CatFileFlags { @@ -648,7 +728,7 @@ mod tests { pretty_print: false, }, allow_unknown_type: false, - object_hash: OBJECT_HASH_INVALID_SIZE.to_string(), + object_hash: OBJECT_HASH.to_string(), }; let mut output = Vec::new(); @@ -659,8 +739,8 @@ mod tests { } #[test] - fn read_object_non_existent_hash() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + fn fails_to_display_object_with_invalid_hash() { + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let _temp_pwd = TempPwd::new(); @@ -681,8 +761,8 @@ mod tests { } #[test] - fn read_header_non_existent_hash() { - // Unset the GIT_DIR and GIT_OBJECT_DIRECTORY environment variables + fn fails_to_display_header_with_invalid_hash() { + // Unset environmental variables to avoid conflicts let _git_dir_env = TempEnv::new(env::GIT_DIR, None); let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let _temp_pwd = TempPwd::new(); diff --git a/src/utils.rs b/src/utils.rs index 01cc0ee..d10f8f5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,14 +4,40 @@ use std::path::PathBuf; use anyhow::Context; use clap::ValueEnum; -const HEX_CHARS: &[u8] = b"0123456789abcdef"; - -/// Convert a binary slice to a hex slice. -pub(crate) fn binary_to_hex_bytes(bytes: &mut Vec) { - for _ in 0..bytes.len() { - let byte = bytes.remove(0); - bytes.push(HEX_CHARS[(byte >> 4) as usize]); - bytes.push(HEX_CHARS[(byte & 0xf) as usize]); +pub(crate) mod hex { + use anyhow::Context; + + const HEX_CHARS: &[u8] = b"0123456789abcdef"; + + /// Convert a binary slice to a hex slice. + pub(crate) fn encode_in_place(bytes: &mut Vec) { + for _ in 0..bytes.len() { + let byte = bytes.remove(0); + bytes.push(HEX_CHARS[(byte >> 4) as usize]); + bytes.push(HEX_CHARS[(byte & 0xf) as usize]); + } + } + + /// Convert a hex slice to a binary slice. + #[allow(unused)] + pub(crate) fn decode(hex: &[u8]) -> anyhow::Result> { + let mut bytes = Vec::with_capacity(hex.len() / 2); + + if hex.len() & 1 != 0 { + anyhow::bail!("invalid hex string"); + } + + for chunk in hex.chunks(2) { + let high = (chunk[0] as char) + .to_digit(16) + .context("invalid hex character")?; + let low = (chunk[1] as char) + .to_digit(16) + .context("invalid hex character")?; + bytes.push(((high << 4) | low) as u8); + } + + Ok(bytes) } } @@ -210,7 +236,7 @@ impl TryFrom<&[u8]> for ObjectType { pub(crate) mod test { use std::path::{Path, PathBuf}; - use super::binary_to_hex_bytes; + use super::hex; /// A temporary environment for testing. /// Changes the environment variable and restores it on drop. @@ -313,11 +339,19 @@ pub(crate) mod test { } #[test] - fn valid_binary_to_hex_bytes() { + fn hex_encode_in_place() { let mut binary = vec![0x00, 0x01, 0x02, 0x03]; - binary_to_hex_bytes(&mut binary); + hex::encode_in_place(&mut binary); assert_eq!(binary, b"00010203"); } + + #[test] + fn hex_decode() { + let hex = b"00010203"; + let binary = hex::decode(hex); + assert!(binary.is_ok()); + assert_eq!(binary.unwrap(), vec![0x00, 0x01, 0x02, 0x03]); + } } /// Environment variables used by the Git CLI From e3162878d8701a4deecf13e4a75a126b88007fa9 Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Sat, 1 Feb 2025 01:14:51 +0000 Subject: [PATCH 13/14] refactor(utils): Clean util structure --- src/commands/cat_file.rs | 3 +- src/commands/hash_object.rs | 6 +- src/utils.rs | 361 ------------------------------------ src/utils/env.rs | 4 + src/utils/hex.rs | 54 ++++++ src/utils/mod.rs | 114 ++++++++++++ src/utils/objects.rs | 90 +++++++++ src/utils/test.rs | 105 +++++++++++ 8 files changed, 373 insertions(+), 364 deletions(-) delete mode 100644 src/utils.rs create mode 100644 src/utils/env.rs create mode 100644 src/utils/hex.rs create mode 100644 src/utils/mod.rs create mode 100644 src/utils/objects.rs create mode 100644 src/utils/test.rs diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index 6a89e23..e0d466a 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -6,7 +6,8 @@ use clap::Args; use flate2::read::ZlibDecoder; use crate::commands::CommandArgs; -use crate::utils::{get_object_path, hex, parse_header, ObjectType}; +use crate::utils::objects::{parse_header, ObjectType}; +use crate::utils::{get_object_path, hex}; impl CommandArgs for CatFileArgs { fn run(self, writer: &mut W) -> anyhow::Result<()> diff --git a/src/commands/hash_object.rs b/src/commands/hash_object.rs index 35827c8..db5f480 100644 --- a/src/commands/hash_object.rs +++ b/src/commands/hash_object.rs @@ -8,7 +8,8 @@ use flate2::Compression; use sha1::{Digest, Sha1}; use crate::commands::CommandArgs; -use crate::utils::{format_header, git_object_dir, ObjectType}; +use crate::utils::git_object_dir; +use crate::utils::objects::{format_header, ObjectType}; impl CommandArgs for HashObjectArgs { /// Hashes the object and writes it to the `.git/objects` directory if requested. @@ -103,8 +104,9 @@ mod tests { use super::{write_blob, HashObjectArgs}; use crate::commands::CommandArgs; + use crate::utils::env; + use crate::utils::objects::ObjectType; use crate::utils::test::{TempEnv, TempPwd}; - use crate::utils::{env, ObjectType}; const OBJECT_CONTENT: &str = "Hello, World!"; const FILE_NAME: &str = "testfile.txt"; diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index d10f8f5..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,361 +0,0 @@ -use std::fmt; -use std::path::PathBuf; - -use anyhow::Context; -use clap::ValueEnum; - -pub(crate) mod hex { - use anyhow::Context; - - const HEX_CHARS: &[u8] = b"0123456789abcdef"; - - /// Convert a binary slice to a hex slice. - pub(crate) fn encode_in_place(bytes: &mut Vec) { - for _ in 0..bytes.len() { - let byte = bytes.remove(0); - bytes.push(HEX_CHARS[(byte >> 4) as usize]); - bytes.push(HEX_CHARS[(byte & 0xf) as usize]); - } - } - - /// Convert a hex slice to a binary slice. - #[allow(unused)] - pub(crate) fn decode(hex: &[u8]) -> anyhow::Result> { - let mut bytes = Vec::with_capacity(hex.len() / 2); - - if hex.len() & 1 != 0 { - anyhow::bail!("invalid hex string"); - } - - for chunk in hex.chunks(2) { - let high = (chunk[0] as char) - .to_digit(16) - .context("invalid hex character")?; - let low = (chunk[1] as char) - .to_digit(16) - .context("invalid hex character")?; - bytes.push(((high << 4) | low) as u8); - } - - Ok(bytes) - } -} - -/// Format the header of a `.git/objects` file -pub(crate) fn format_header(object_type: O, size: S) -> String -where - O: fmt::Display, - S: fmt::Display, -{ - format!("{} {}\0", object_type, size) -} - -/// Parse the header of a `.git/objects` file into the [`ObjectHeader`] struct. -pub(crate) fn parse_header(header: &[u8]) -> anyhow::Result { - // Split the header into type and size - let mut header = header.splitn(2, |&b| b == b' '); - - let object_type = header.next().context("invalid object header")?; - let size = header.next().context("invalid object header")?; - let size = &size[..size.len().saturating_sub(1)]; // Remove the trailing null byte - - Ok(ObjectHeader { object_type, size }) -} - -/// Get the path of the current directory. -pub(crate) fn get_current_dir() -> anyhow::Result { - std::env::current_dir().context("get path of current directory") -} - -/// Get the path to the git directory. -/// This could be either of the following (in order of precedence): -/// -/// 1. `$GIT_DIR` -/// 2. `.git` -/// -/// # Returns -/// -/// The path to the git directory -pub(crate) fn git_dir() -> anyhow::Result { - let git_dir_path = std::env::var(env::GIT_DIR).unwrap_or_else(|_| ".git".to_string()); - let mut current_dir = get_current_dir()?; - - // Search for the git directory in the current directory and its parents - while current_dir.exists() { - let git_dir = current_dir.join(&git_dir_path); - - // Return the git directory if it exists - if git_dir.exists() { - return Ok(git_dir); - } - - let Some(parent_dir) = current_dir.parent() else { - break; - }; - - current_dir = parent_dir.to_path_buf(); - } - - anyhow::bail!( - "not a git repository (or any of the parent directories): {}", - git_dir_path - ) -} - -/// Get the path to the git object directory. -/// This could be either of the following (in order of precedence): -/// -/// 1. `/$GIT_OBJECT_DIRECTORY` -/// 2. `/objects` -/// -/// # Arguments -/// -/// * `check_exists` - Whether to check if the object directory exists, -/// exiting with an error if it does not -/// -/// # Returns -/// -/// The path to the git object directory -pub(crate) fn git_object_dir(check_exists: bool) -> anyhow::Result { - let git_dir = git_dir()?; - let git_object_dir = - std::env::var(env::GIT_OBJECT_DIRECTORY).unwrap_or_else(|_| "objects".to_string()); - let git_object_dir = git_dir.join(&git_object_dir); - - // Check if the object directory exists - if check_exists && !git_object_dir.exists() { - anyhow::bail!( - "{}/{} directory does not exist", - git_dir.display(), - git_object_dir.display() - ); - } - - Ok(git_object_dir) -} - -/// Get the path to a git object. -/// The path is constructed as follows: -/// -/// `//` -/// -/// # Example -/// -/// If the default git and object directories are used, -/// the path for object `e7a11a969c037e00a796aafeff6258501ec15e9a` would be: -/// -/// `.git/objects/e7/a11a969c037e00a796aafeff6258501ec15e9a` -/// -/// # Arguments -/// -/// * `hash` - The object hash -/// * `check_exists` - Whether to check if the object exists, -/// exiting with an error if it does not -/// -/// # Returns -/// -/// The path to the object file -pub(crate) fn get_object_path(hash: &str, check_exists: bool) -> anyhow::Result { - let object_dir = git_object_dir(check_exists)?; - let object_dir = object_dir.join(&hash[..2]); - let object_path = object_dir.join(&hash[2..]); - - // Check if the object exists - if check_exists && !object_path.exists() { - anyhow::bail!("{} is not a valid object", hash); - } - - Ok(object_path) -} - -/// The type of object in the Git object database -#[derive(Default, Debug, ValueEnum, Clone)] -pub(crate) enum ObjectType { - #[default] - Blob, - Tree, - Commit, - Tag, -} - -/// The header of a Git object -pub(crate) struct ObjectHeader<'a> { - /// The type of object - pub(crate) object_type: &'a [u8], - /// The size of the object in bytes - pub(crate) size: &'a [u8], -} - -impl ObjectHeader<'_> { - /// Parse the size of the object - pub(crate) fn parse_size(&self) -> anyhow::Result { - let size = std::str::from_utf8(self.size) - .context("object size is not valid utf-8")? - .parse::() - .context("object size is not a number")?; - - Ok(size) - } - - /// Parse the type of the object - pub(crate) fn parse_type(&self) -> anyhow::Result { - ObjectType::try_from(self.object_type) - } -} - -impl fmt::Display for ObjectType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ObjectType::Blob => write!(f, "blob"), - ObjectType::Tree => write!(f, "tree"), - ObjectType::Commit => write!(f, "commit"), - ObjectType::Tag => write!(f, "tag"), - } - } -} - -impl TryFrom<&[u8]> for ObjectType { - type Error = anyhow::Error; - - fn try_from(value: &[u8]) -> anyhow::Result { - match value { - b"blob" => Ok(ObjectType::Blob), - b"tree" => Ok(ObjectType::Tree), - b"commit" => Ok(ObjectType::Commit), - b"tag" => Ok(ObjectType::Tag), - _ => { - let value = std::str::from_utf8(value).context("object type is not valid utf-8")?; - anyhow::bail!("unknown object type: {}", value) - }, - } - } -} - -/// Utility structs and functions for testing -#[cfg(test)] -pub(crate) mod test { - use std::path::{Path, PathBuf}; - - use super::hex; - - /// A temporary environment for testing. - /// Changes the environment variable and restores it on drop. - /// Tests must be run serially to avoid conflicts (`cargo test -- --test-threads=1`) - /// - /// # Example - /// - /// ``` - /// # use crate::utils::test::TempEnv; - /// let temp_env = TempEnv::new("KEY", Some("VALUE")); - /// assert_eq!(std::env::var("KEY"), Ok("VALUE".to_string())); - /// - /// // The environment variable is restored when the `TempEnv` instance is dropped - /// drop(temp_env); - /// - /// // Setting the value to `None` unsets the environment variable - /// let temp_env = TempEnv::new("KEY", None); - /// assert!(std::env::var("KEY").is_err()); - /// - /// drop(temp_env); - /// ``` - pub(crate) struct TempEnv { - /// The environment variable's key - key: String, - /// The old value of the environment variable - old_value: Option, - } - - impl TempEnv { - /// Create a new temporary environment variable. - /// - /// * If `value` is `Some`, the environment variable is set to that value. - /// * If `value` is `None`, the environment variable is unset. - pub(crate) fn new(key: S, value: Option<&str>) -> Self - where - S: Into, - { - let key = key.into(); - let old_value = std::env::var(&key).ok(); - - if let Some(value) = value { - std::env::set_var(&key, value); - } else { - std::env::remove_var(&key); - } - - TempEnv { key, old_value } - } - } - - impl Drop for TempEnv { - fn drop(&mut self) { - if let Some(value) = &self.old_value { - std::env::set_var(&self.key, value); - } else { - std::env::remove_var(&self.key); - } - } - } - - /// A temporary directory for testing. - /// Changes the current directory to the temporary directory and restores it on drop. - /// - /// # Example - /// - /// ``` - /// # use crate::utils::test::TempPwd; - /// let temp_pwd = TempPwd::new(); - /// assert_eq!(std::env::current_dir().unwrap(), temp_pwd.temp_pwd.path()); - /// - /// // The current directory is restored when the `TempPwd` instance is dropped - /// drop(temp_pwd); - /// ``` - pub(crate) struct TempPwd { - old_pwd: PathBuf, - temp_pwd: tempfile::TempDir, - } - - impl TempPwd { - pub(crate) fn new() -> Self { - let old_pwd = std::env::current_dir().unwrap(); - let temp_pwd = tempfile::tempdir().unwrap(); - - // Change the current directory to the temporary directory - std::env::set_current_dir(&temp_pwd).unwrap(); - - Self { old_pwd, temp_pwd } - } - - pub(crate) fn path(&self) -> &Path { - self.temp_pwd.path() - } - } - - impl Drop for TempPwd { - fn drop(&mut self) { - // Restore the current directory - std::env::set_current_dir(&self.old_pwd).unwrap(); - } - } - - #[test] - fn hex_encode_in_place() { - let mut binary = vec![0x00, 0x01, 0x02, 0x03]; - hex::encode_in_place(&mut binary); - assert_eq!(binary, b"00010203"); - } - - #[test] - fn hex_decode() { - let hex = b"00010203"; - let binary = hex::decode(hex); - assert!(binary.is_ok()); - assert_eq!(binary.unwrap(), vec![0x00, 0x01, 0x02, 0x03]); - } -} - -/// Environment variables used by the Git CLI -pub(crate) mod env { - pub(crate) const GIT_DIR: &str = "GIT_DIR"; - pub(crate) const GIT_OBJECT_DIRECTORY: &str = "GIT_OBJECT_DIRECTORY"; -} diff --git a/src/utils/env.rs b/src/utils/env.rs new file mode 100644 index 0000000..a4a9347 --- /dev/null +++ b/src/utils/env.rs @@ -0,0 +1,4 @@ +//! Environment variables used by the Git CLI + +pub(crate) const GIT_DIR: &str = "GIT_DIR"; +pub(crate) const GIT_OBJECT_DIRECTORY: &str = "GIT_OBJECT_DIRECTORY"; diff --git a/src/utils/hex.rs b/src/utils/hex.rs new file mode 100644 index 0000000..702122c --- /dev/null +++ b/src/utils/hex.rs @@ -0,0 +1,54 @@ +use anyhow::Context; + +const HEX_CHARS: &[u8] = b"0123456789abcdef"; + +/// Convert a binary slice to a hex slice. +pub(crate) fn encode_in_place(bytes: &mut Vec) { + for _ in 0..bytes.len() { + let byte = bytes.remove(0); + bytes.push(HEX_CHARS[(byte >> 4) as usize]); + bytes.push(HEX_CHARS[(byte & 0xf) as usize]); + } +} + +/// Convert a hex slice to a binary slice. +#[allow(unused)] +pub(crate) fn decode(hex: &[u8]) -> anyhow::Result> { + let mut bytes = Vec::with_capacity(hex.len() / 2); + + if hex.len() & 1 != 0 { + anyhow::bail!("invalid hex string"); + } + + for chunk in hex.chunks(2) { + let high = (chunk[0] as char) + .to_digit(16) + .context("invalid hex character")?; + let low = (chunk[1] as char) + .to_digit(16) + .context("invalid hex character")?; + bytes.push(((high << 4) | low) as u8); + } + + Ok(bytes) +} + +#[cfg(test)] +mod tests { + use crate::utils::hex; + + #[test] + fn hex_encode_in_place() { + let mut binary = vec![0x00, 0x01, 0x02, 0x03]; + hex::encode_in_place(&mut binary); + assert_eq!(binary, b"00010203"); + } + + #[test] + fn hex_decode() { + let hex = b"00010203"; + let binary = hex::decode(hex); + assert!(binary.is_ok()); + assert_eq!(binary.unwrap(), vec![0x00, 0x01, 0x02, 0x03]); + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..595cec7 --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1,114 @@ +use std::path::PathBuf; + +use anyhow::Context; + +pub(crate) mod env; +pub(crate) mod hex; +pub(crate) mod objects; +pub(crate) mod test; + +/// Get the path of the current directory. +pub(crate) fn get_current_dir() -> anyhow::Result { + std::env::current_dir().context("get path of current directory") +} + +/// Get the path to the git directory. +/// This could be either of the following (in order of precedence): +/// +/// 1. `$GIT_DIR` +/// 2. `.git` +/// +/// # Returns +/// +/// The path to the git directory +pub(crate) fn git_dir() -> anyhow::Result { + let git_dir_path = std::env::var(env::GIT_DIR).unwrap_or_else(|_| ".git".to_string()); + let mut current_dir = get_current_dir()?; + + // Search for the git directory in the current directory and its parents + while current_dir.exists() { + let git_dir = current_dir.join(&git_dir_path); + + // Return the git directory if it exists + if git_dir.exists() { + return Ok(git_dir); + } + + let Some(parent_dir) = current_dir.parent() else { + break; + }; + + current_dir = parent_dir.to_path_buf(); + } + + anyhow::bail!( + "not a git repository (or any of the parent directories): {}", + git_dir_path + ) +} + +/// Get the path to the git object directory. +/// This could be either of the following (in order of precedence): +/// +/// 1. `/$GIT_OBJECT_DIRECTORY` +/// 2. `/objects` +/// +/// # Arguments +/// +/// * `check_exists` - Whether to check if the object directory exists, +/// exiting with an error if it does not +/// +/// # Returns +/// +/// The path to the git object directory +pub(crate) fn git_object_dir(check_exists: bool) -> anyhow::Result { + let git_dir = git_dir()?; + let git_object_dir = + std::env::var(env::GIT_OBJECT_DIRECTORY).unwrap_or_else(|_| "objects".to_string()); + let git_object_dir = git_dir.join(&git_object_dir); + + // Check if the object directory exists + if check_exists && !git_object_dir.exists() { + anyhow::bail!( + "{}/{} directory does not exist", + git_dir.display(), + git_object_dir.display() + ); + } + + Ok(git_object_dir) +} + +/// Get the path to a git object. +/// The path is constructed as follows: +/// +/// `//` +/// +/// # Example +/// +/// If the default git and object directories are used, +/// the path for object `e7a11a969c037e00a796aafeff6258501ec15e9a` would be: +/// +/// `.git/objects/e7/a11a969c037e00a796aafeff6258501ec15e9a` +/// +/// # Arguments +/// +/// * `hash` - The object hash +/// * `check_exists` - Whether to check if the object exists, +/// exiting with an error if it does not +/// +/// # Returns +/// +/// The path to the object file +pub(crate) fn get_object_path(hash: &str, check_exists: bool) -> anyhow::Result { + let object_dir = git_object_dir(check_exists)?; + let object_dir = object_dir.join(&hash[..2]); + let object_path = object_dir.join(&hash[2..]); + + // Check if the object exists + if check_exists && !object_path.exists() { + anyhow::bail!("{} is not a valid object", hash); + } + + Ok(object_path) +} diff --git a/src/utils/objects.rs b/src/utils/objects.rs new file mode 100644 index 0000000..2aa2123 --- /dev/null +++ b/src/utils/objects.rs @@ -0,0 +1,90 @@ +//! Utilities for working with Git objects + +use std::fmt; + +use anyhow::Context; +use clap::ValueEnum; + +/// Format the header of a `.git/objects` file +pub(crate) fn format_header(object_type: O, size: S) -> String +where + O: fmt::Display, + S: fmt::Display, +{ + format!("{} {}\0", object_type, size) +} + +/// Parse the header of a `.git/objects` file into the [`ObjectHeader`] struct. +pub(crate) fn parse_header(header: &[u8]) -> anyhow::Result { + // Split the header into type and size + let mut header = header.splitn(2, |&b| b == b' '); + + let object_type = header.next().context("invalid object header")?; + let size = header.next().context("invalid object header")?; + let size = &size[..size.len().saturating_sub(1)]; // Remove the trailing null byte + + Ok(ObjectHeader { object_type, size }) +} + +/// The type of object in the Git object database +#[derive(Default, Debug, ValueEnum, Clone)] +pub(crate) enum ObjectType { + #[default] + Blob, + Tree, + Commit, + Tag, +} + +/// The header of a Git object +pub(crate) struct ObjectHeader<'a> { + /// The type of object + pub(crate) object_type: &'a [u8], + /// The size of the object in bytes + pub(crate) size: &'a [u8], +} + +impl ObjectHeader<'_> { + /// Parse the size of the object + pub(crate) fn parse_size(&self) -> anyhow::Result { + let size = std::str::from_utf8(self.size) + .context("object size is not valid utf-8")? + .parse::() + .context("object size is not a number")?; + + Ok(size) + } + + /// Parse the type of the object + pub(crate) fn parse_type(&self) -> anyhow::Result { + ObjectType::try_from(self.object_type) + } +} + +impl fmt::Display for ObjectType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ObjectType::Blob => write!(f, "blob"), + ObjectType::Tree => write!(f, "tree"), + ObjectType::Commit => write!(f, "commit"), + ObjectType::Tag => write!(f, "tag"), + } + } +} + +impl TryFrom<&[u8]> for ObjectType { + type Error = anyhow::Error; + + fn try_from(value: &[u8]) -> anyhow::Result { + match value { + b"blob" => Ok(ObjectType::Blob), + b"tree" => Ok(ObjectType::Tree), + b"commit" => Ok(ObjectType::Commit), + b"tag" => Ok(ObjectType::Tag), + _ => { + let value = std::str::from_utf8(value).context("object type is not valid utf-8")?; + anyhow::bail!("unknown object type: {}", value) + }, + } + } +} diff --git a/src/utils/test.rs b/src/utils/test.rs new file mode 100644 index 0000000..dd5ccf7 --- /dev/null +++ b/src/utils/test.rs @@ -0,0 +1,105 @@ +//! Utility structs and functions for testing + +#![cfg(test)] + +use std::path::{Path, PathBuf}; + +/// A temporary environment for testing. +/// Changes the environment variable and restores it on drop. +/// Tests must be run serially to avoid conflicts (`cargo test -- --test-threads=1`) +/// +/// # Example +/// +/// ``` +/// # use crate::utils::test::TempEnv; +/// let temp_env = TempEnv::new("KEY", Some("VALUE")); +/// assert_eq!(std::env::var("KEY"), Ok("VALUE".to_string())); +/// +/// // The environment variable is restored when the `TempEnv` instance is dropped +/// drop(temp_env); +/// +/// // Setting the value to `None` unsets the environment variable +/// let temp_env = TempEnv::new("KEY", None); +/// assert!(std::env::var("KEY").is_err()); +/// +/// drop(temp_env); +/// ``` +pub(crate) struct TempEnv { + /// The environment variable's key + key: String, + /// The old value of the environment variable + old_value: Option, +} + +impl TempEnv { + /// Create a new temporary environment variable. + /// + /// * If `value` is `Some`, the environment variable is set to that value. + /// * If `value` is `None`, the environment variable is unset. + pub(crate) fn new(key: S, value: Option<&str>) -> Self + where + S: Into, + { + let key = key.into(); + let old_value = std::env::var(&key).ok(); + + if let Some(value) = value { + std::env::set_var(&key, value); + } else { + std::env::remove_var(&key); + } + + TempEnv { key, old_value } + } +} + +impl Drop for TempEnv { + fn drop(&mut self) { + if let Some(value) = &self.old_value { + std::env::set_var(&self.key, value); + } else { + std::env::remove_var(&self.key); + } + } +} + +/// A temporary directory for testing. +/// Changes the current directory to the temporary directory and restores it on drop. +/// +/// # Example +/// +/// ``` +/// # use crate::utils::test::TempPwd; +/// let temp_pwd = TempPwd::new(); +/// assert_eq!(std::env::current_dir().unwrap(), temp_pwd.temp_pwd.path()); +/// +/// // The current directory is restored when the `TempPwd` instance is dropped +/// drop(temp_pwd); +/// ``` +pub(crate) struct TempPwd { + old_pwd: PathBuf, + temp_pwd: tempfile::TempDir, +} + +impl TempPwd { + pub(crate) fn new() -> Self { + let old_pwd = std::env::current_dir().unwrap(); + let temp_pwd = tempfile::tempdir().unwrap(); + + // Change the current directory to the temporary directory + std::env::set_current_dir(&temp_pwd).unwrap(); + + Self { old_pwd, temp_pwd } + } + + pub(crate) fn path(&self) -> &Path { + self.temp_pwd.path() + } +} + +impl Drop for TempPwd { + fn drop(&mut self) { + // Restore the current directory + std::env::set_current_dir(&self.old_pwd).unwrap(); + } +} From f2d9e5b7a6851efeaaa865f68a84d69b0e92951e Mon Sep 17 00:00:00 2001 From: nick <59822256+Archasion@users.noreply.github.com> Date: Sat, 1 Feb 2025 17:05:09 +0000 Subject: [PATCH 14/14] refactor(cat-file-flags): Rename arg group --- src/commands/cat_file.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs index e0d466a..76553f7 100644 --- a/src/commands/cat_file.rs +++ b/src/commands/cat_file.rs @@ -179,7 +179,7 @@ pub(crate) struct CatFileArgs { #[command(flatten)] flags: CatFileFlags, /// allow -s and -t to work with broken/corrupt objects - #[arg(long, requires = "meta")] + #[arg(long, requires = "header")] allow_unknown_type: bool, /// the object to display #[arg(name = "object")] @@ -190,10 +190,10 @@ pub(crate) struct CatFileArgs { #[group(id = "flags", required = true)] struct CatFileFlags { /// show object type - #[arg(short = 't', group = "meta")] + #[arg(short = 't', group = "header")] show_type: bool, /// show object size - #[arg(short, group = "meta")] + #[arg(short, group = "header")] size: bool, /// check if exists #[arg(short)]