diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d97d814..b78fae1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ concurrency: env: CARGO_TERM_COLOR: always RUST_VERSION_STABLE: stable + RUST_VERSION_NIGHTLY: nightly jobs: test: @@ -51,7 +52,7 @@ jobs: - name: Install Rust toolchain uses: actions-rs/toolchain@v1 with: - toolchain: ${{ env.RUST_VERSION_STABLE }} + toolchain: ${{ env.RUST_VERSION_NIGHTLY }} profile: minimal components: rustfmt, clippy override: true diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..4ebe4c1 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,18 @@ +# Stable +match_block_trailing_comma = true +newline_style = "Unix" +use_field_init_shorthand = true +use_try_shorthand = true + +# Nightly +imports_granularity = "Module" +combine_control_expr = false +condense_wildcard_suffixes = true +format_code_in_doc_comments = true +format_macro_matchers = true +hex_literal_case = "Lower" +normalize_comments = true +normalize_doc_attributes = true +overflow_delimited_expr = true +reorder_impl_items = true +group_imports = "StdExternalCrate" diff --git a/README.md b/README.md index e013e31..e8ee2a6 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,25 @@ This is a simple attempt at re-creating some of the functionality of the `git` c ## Features - `hash-object` - Compute the hash of an object and optionally write it to the object database. - - `-w` flag to write the object to the object database. - - `-t` flag to specify the type of the object (supported: `blob`). - - `` argument to specify the file to hash. + - `-w` flag to write the object to the object database. + - `-t` flag to specify the type of the object (supported: `blob`). + - `` argument to specify the file to hash. - `init` - Create an empty Git repository. - - `--bare` flag to create a bare repository. - - `-b` or `--initial-branch` flag to specify the initial branch. - - `-q` or `--quiet` flag to suppress the output. - - `` argument to specify the directory to initialize. + - `--bare` flag to create a bare repository. + - `-b` or `--initial-branch` flag to specify the initial branch. + - `-q` or `--quiet` flag to suppress the output. + - `` argument to specify the directory to initialize. +- `cat-file` - Provide content or type and size information for repository objects. + - `-t` flag to show the type of the object. + - `-s` flag to show the size of the object. + - `-p` flag to show the content of the object (pretty-print) + - `--allow-unknown-type` flag to allow unknown object types (to be used with `-t` or `-s`). + - `` argument to specify the object to show. ## Testing -Due to the nature of the project, tests must be run sequentially. To run the tests, use the following command: +Due to the nature of the project, tests must be run sequentially. To run the tests, use the +following command: ```sh cargo test -- --test-threads=1 diff --git a/object_structure.md b/object_structure.md new file mode 100644 index 0000000..8a58ac9 --- /dev/null +++ b/object_structure.md @@ -0,0 +1,104 @@ +# Object Structure + +This document describes the structure of Git objects. + +## General Object Structure + +> [Reference](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) (Object Storage) + +All objects are stored in the `.git/objects` directory (or `$GIT_DIR/$GIT_OBJECT_DIRECTORY`) and +have the following structure: + +```plaintext +{type} {size}\0{content} +``` + +- `{type}` is the type of the object (blob, tree, commit, tag). +- `{size}` is the size of the content in bytes. +- `{content}` is the actual content of the object. + +## Blob + +> [Reference](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) (Object Storage) + +A blob object is a file. Its content is just the file data. + +## Tree + +> [Reference](https://stackoverflow.com/a/37105125/19244184) + +A tree object represents a directory. It contains a list of entries (no separator), each of which +can be either a blob or a tree object. + +The format of each entry is as follows: + +```plaintext +{mode} {filename}\0{hash} +``` + +- `{mode}` is the file mode (e.g., `100644` for a file, `040000` for a directory). +- `{filename}` is the name of the file or directory. +- `{hash}` is the SHA-1 hash of the object represented in binary form. + +## Commit + +> [Reference](https://stackoverflow.com/a/37438460/19244184) + +A commit object represents a commit. It contains a reference to a tree object, a list of parent +commits, an author, a committer, and a commit message. + +The content of a commit object is as follows: + +```plaintext +tree {tree_hash} +{parents} +author {author_name} <{author_email}> {author_date_seconds} {author_date_offset} +committer {committer_name} <{committer_email}> {committer_date_seconds} {committer_date_offset} + +{commit_message} +``` + +- `{tree_hash}` is the SHA-1 hash of the tree object. +- `{parents}` is a list of parent commit objects (if any) of the form: + ```plaintext + parent {parent_1_hash} + parent {parent_2_hash} + ... + ``` +- `{author_name}` is the name of the author. +- `{author_email}` is the email address of the author. +- `{author_date_seconds}` is the author date in seconds since the Unix epoch. +- `{author_date_offset}` is the author date offset from UTC. +- `{committer_name}` is the name of the committer. +- `{committer_email}` is the email address of the committer. +- `{committer_date_seconds}` is the committer date in seconds since the Unix epoch. +- `{committer_date_offset}` is the committer date offset from UTC. +- `{commit_message}` is the commit message. + +## Tag + +> [Reference](https://stackoverflow.com/a/52193441/19244184) + +A tag object represents a tag. It contains a reference to an object (usually a commit), a tagger, +and +a tag message. + +The content of a tag object is as follows: + +```plaintext +object {object_hash} +type {object_type} +tag {tag_name} +tagger {tagger_name} <{tagger_email}> {tagger_date_seconds} {tagger_date_offset} + +{tag_message} +``` + +- `{object_hash}` is the SHA-1 hash of the object being tagged. +- `{object_type}` is the type of the object being tagged (e.g., `commit`). +- `{tag_name}` is the name of the tag. +- `{tagger_name}` is the name of the tagger. +- `{tagger_email}` is the email address of the tagger. +- `{tagger_date_seconds}` is the tagger date in seconds since the Unix epoch. +- `{tagger_date_offset}` is the tagger date offset from UTC. +- `{tag_message}` is the tag message. \ No newline at end of file diff --git a/src/commands/cat_file.rs b/src/commands/cat_file.rs new file mode 100644 index 0000000..76553f7 --- /dev/null +++ b/src/commands/cat_file.rs @@ -0,0 +1,785 @@ +use std::fs::File; +use std::io::{BufRead, BufReader, Read, Write}; + +use anyhow::Context; +use clap::Args; +use flate2::read::ZlibDecoder; + +use crate::commands::CommandArgs; +use crate::utils::objects::{parse_header, ObjectType}; +use crate::utils::{get_object_path, hex}; + +impl CommandArgs for CatFileArgs { + fn run(self, writer: &mut W) -> anyhow::Result<()> + where + W: Write, + { + if self.flags.show_type { + return read_object_type(&self.object_hash, self.allow_unknown_type, writer); + } + if self.flags.size { + return read_object_size(&self.object_hash, self.allow_unknown_type, writer); + } + if self.flags.exit_zero || self.flags.pretty_print { + return read_object_pretty(&self.object_hash, self.flags.exit_zero, writer); + } + unreachable!("either -t, -s, -e, or -p must be specified"); + } +} + +fn read_object_pretty(hash: &str, exit: bool, writer: &mut W) -> anyhow::Result<()> +where + W: Write, +{ + let object_path = get_object_path(hash, true)?; + let file = File::open(object_path)?; + + // Create a zlib decoder to read the object header and content + let zlib = ZlibDecoder::new(file); + let mut zlib = BufReader::new(zlib); + + // Read the object header + let mut header = Vec::new(); + zlib.read_until(0, &mut header)?; + let header = parse_header(&header)?; + + // Read the object content + let mut buf = Vec::new(); + let object_size = match header.parse_type()? { + ObjectType::Tree => read_tree_pretty(&mut zlib, &mut buf)?, + // Blobs, commits, and tags are pretty-printed as is + _ => zlib.read_to_end(&mut buf)?, + }; + + // Ensure the object size matches the header + if header.parse_size()? != object_size { + anyhow::bail!("object size does not match header"); + } + + // Exit early if the object exists and passes validation + if exit { + return Ok(()); + } + + // Output the object content to stdout + writer.write_all(&buf).context("write object to stdout") +} + +fn read_tree_pretty( + zlib: &mut BufReader>, + buf: &mut Vec, +) -> anyhow::Result { + let mut entries = Vec::new(); + let mut object_size = 0; + + loop { + let mut entry = Vec::new(); + + // Read the entry mode + let mut mode = Vec::with_capacity(6); + zlib.read_until(b' ', &mut mode)?; + // Exit the loop if the mode is empty + // This indicates the end of the tree + if mode.is_empty() { + break; + } + entry.extend(mode); + + // Read the entry name (file name) + let mut name = Vec::new(); + zlib.read_until(0, &mut name)?; + + // Read the entry hash + // Allocate enough space for a 40-byte hex hash + let mut hash = Vec::with_capacity(40); + zlib.take(20).read_to_end(&mut hash)?; + + // Add the entry size to the total size + object_size += entry.len() + hash.len() + name.len(); + // Convert the binary hash to hex + hex::encode_in_place(&mut hash); + + // Find the object type of the entry + let hash_str = std::str::from_utf8(&hash).context("object hash is not valid utf-8")?; + let mut object_type = Vec::new(); + read_object_type(hash_str, false, &mut object_type)?; + + // Append the remaining entry fields + entry.extend(object_type); + entry.push(b' '); + entry.extend(hash); + entry.push(b'\t'); + name.pop(); // Remove the trailing null byte + entry.extend(name); + + // Append the entry to the list of entries + entries.push(entry); + } + + // Append the entries to the buffer + // joined by a newline character + buf.extend(entries.join(&b'\n')); + Ok(object_size) +} + +fn read_object_type(hash: &str, allow_unknown_type: bool, writer: &mut W) -> anyhow::Result<()> +where + W: Write, +{ + let object_path = get_object_path(hash, true)?; + let file = File::open(object_path)?; + + // Create a zlib decoder to read the object header + let zlib = ZlibDecoder::new(file); + let mut zlib = BufReader::new(zlib); + + // Read the object header + let mut buf = Vec::new(); + zlib.read_until(b' ', &mut buf)?; + buf.pop(); // Remove the trailing space + + // Validate the object type + if !allow_unknown_type { + ObjectType::try_from(buf.as_slice())?; + } + + writer + .write_all(&buf) + .context("write object type to writer") +} + +fn read_object_size(hash: &str, allow_unknown_type: bool, writer: &mut W) -> anyhow::Result<()> +where + W: Write, +{ + let object_path = get_object_path(hash, true)?; + let file = File::open(object_path)?; + + // Create a zlib decoder to read the object header + let zlib = ZlibDecoder::new(file); + let mut zlib = BufReader::new(zlib); + + // Read the object header + let mut buf = Vec::new(); + zlib.read_until(0, &mut buf)?; + let header = parse_header(&buf)?; + + if !allow_unknown_type { + // Bail out if the object type fails to parse + header.parse_type()?; + } + + writer + .write_all(header.size) + .context("write object size to writer") +} + +#[derive(Args, Debug)] +pub(crate) struct CatFileArgs { + #[command(flatten)] + flags: CatFileFlags, + /// allow -s and -t to work with broken/corrupt objects + #[arg(long, requires = "header")] + allow_unknown_type: bool, + /// the object to display + #[arg(name = "object")] + object_hash: String, +} + +#[derive(Args, Debug)] +#[group(id = "flags", required = true)] +struct CatFileFlags { + /// show object type + #[arg(short = 't', group = "header")] + show_type: bool, + /// show object size + #[arg(short, group = "header")] + size: bool, + /// check if exists + #[arg(short)] + exit_zero: bool, + /// pretty-print content + #[arg(short)] + pretty_print: bool, +} + +#[cfg(test)] +mod tests { + use std::fs; + use std::io::Write; + + use flate2::write::ZlibEncoder; + use flate2::Compression; + + use crate::commands::cat_file::{CatFileArgs, CatFileFlags}; + use crate::commands::CommandArgs; + use crate::utils::test::{TempEnv, TempPwd}; + use crate::utils::{env, hex}; + + const BLOB_CONTENT: &str = "Hello, World!"; + const OBJECT_HASH: &str = "2f22503f99671604495c84465f0113d002193369"; + const OBJECT_PATH: &str = ".git/objects/2f/22503f99671604495c84465f0113d002193369"; + + /// Get the compressed representation of [`BLOB_CONTENT`] and its header + /// + /// # Arguments + /// + /// * `valid_type` - Whether the object type should be valid (`blob`) + /// * `valid_size` - Whether the object size should be valid (size of the content) + /// + /// # Returns + /// + /// The compressed representation of the blob object and its header + fn compress_blob(valid_type: bool, valid_size: bool) -> Vec { + let object = format!( + "{} {}\0{}", + if valid_type { "blob" } else { "unknown" }, + if valid_size { BLOB_CONTENT.len() } else { 0 }, + BLOB_CONTENT + ); + let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); + zlib.write_all(object.as_bytes()).unwrap(); + zlib.finish().unwrap() + } + + /// Get the compressed representation of a tree object and its header + /// + /// # Arguments + /// + /// * `object_hash` - The hash of the object to reference + /// * `valid_type` - Whether the object type should be valid (`tree`) + /// * `valid_size` - Whether the object size should be valid (size of the content) + /// + /// # Returns + /// + /// The compressed representation of the tree object and its header + fn compress_tree(object_hash: &str, valid_type: bool, valid_size: bool) -> Vec { + let content = tree_content(object_hash, false); + let mut object = format!( + "{} {}\0", + if valid_type { "tree" } else { "unknown" }, + if valid_size { content.len() } else { 0 } + ) + .into_bytes(); + object.extend(content); + + let mut zlib = ZlibEncoder::new(Vec::new(), Compression::default()); + zlib.write_all(&object).unwrap(); + zlib.finish().unwrap() + } + + /// Get the content of a tree object + /// + /// # Arguments + /// + /// * `object_hash` - The hash of the object to reference + /// * `pretty` - Whether the content should be pretty-printed + /// + /// # Returns + /// + /// The content of the tree object + fn tree_content(object_hash: &str, pretty: bool) -> Vec { + if pretty { + format!("100644 blob {}\tfile.txt", object_hash).into_bytes() + } else { + let object_hash_binary = + hex::decode(object_hash.as_bytes()).expect("failed to convert hex to binary"); + let mut content = b"100644 file.txt\0".to_vec(); + content.extend(object_hash_binary); + content + } + } + + #[test] + fn displays_non_tree() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert_eq!(output, BLOB_CONTENT.as_bytes()); + } + + #[test] + fn displays_tree() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let tree_path = temp_pwd.path().join(OBJECT_PATH); + let blob_hash_hex = "01c6a63b7fc32f6f49988a9a12b8d7d199febeab"; + + // Create the object path and write the hashed content + fs::create_dir_all(tree_path.parent().unwrap()).unwrap(); + fs::write(&tree_path, compress_tree(blob_hash_hex, true, true)).unwrap(); + + let blob_path = temp_pwd + .path() + .join(".git/objects") + .join(&blob_hash_hex[..2]) + .join(&blob_hash_hex[2..]); + + // Create the object path and write the hashed content + fs::create_dir(blob_path.parent().unwrap()).unwrap(); + fs::write(&blob_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert_eq!(output, tree_content(blob_hash_hex, true)); + } + + #[test] + fn exits_successfully() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: true, + pretty_print: false, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert!(output.is_empty()); + } + + #[test] + fn displays_object_type() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: true, + size: false, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert_eq!(output, b"blob"); + } + + #[test] + fn displays_object_size() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert_eq!(output, BLOB_CONTENT.len().to_string().as_bytes()); + } + + #[test] + fn displays_object_type_with_unknown_type() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(false, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: true, + size: false, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: true, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert_eq!(output, b"unknown"); + } + + #[test] + fn displays_object_size_with_unknown_type() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(false, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: true, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert_eq!(output, BLOB_CONTENT.len().to_string().as_bytes()); + } + + #[test] + fn fails_to_display_object_type_with_unknown_type() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(false, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: true, + size: false, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_object_size_with_unknown_type() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(false, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_non_tree_with_invalid_size() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(true, false)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_tree_with_invalid_size() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let tree_path = temp_pwd.path().join(OBJECT_PATH); + let blob_hash_hex = "01c6a63b7fc32f6f49988a9a12b8d7d199febeab"; + + // Create the object path and write the hashed content + fs::create_dir_all(tree_path.parent().unwrap()).unwrap(); + fs::write(&tree_path, compress_tree(blob_hash_hex, true, false)).unwrap(); + + let blob_path = temp_pwd + .path() + .join(".git/objects") + .join(&blob_hash_hex[..2]) + .join(&blob_hash_hex[2..]); + + // Create the object path and write the hashed content + fs::create_dir(blob_path.parent().unwrap()).unwrap(); + fs::write(&blob_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_non_tree_with_unknown_type() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(true, false)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_tree_with_unknown_type() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let tree_path = temp_pwd.path().join(OBJECT_PATH); + let blob_hash_hex = "01c6a63b7fc32f6f49988a9a12b8d7d199febeab"; + + // Create the object path and write the hashed content + fs::create_dir_all(tree_path.parent().unwrap()).unwrap(); + fs::write(&tree_path, compress_tree(blob_hash_hex, false, true)).unwrap(); + + let blob_path = temp_pwd + .path() + .join(".git/objects") + .join(&blob_hash_hex[..2]) + .join(&blob_hash_hex[2..]); + + // Create the object path and write the hashed content + fs::create_dir(blob_path.parent().unwrap()).unwrap(); + fs::write(&blob_path, compress_blob(true, true)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } + + #[test] + fn displays_object_size_with_invalid_size() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + let temp_pwd = TempPwd::new(); + let object_path = temp_pwd.path().join(OBJECT_PATH); + + // Create the object path and write the hashed content + fs::create_dir_all(object_path.parent().unwrap()).unwrap(); + fs::write(&object_path, compress_blob(true, false)).unwrap(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let mut output = Vec::new(); + let result = args.run(&mut output); + + assert!(result.is_ok()); + assert_eq!(output, b"0"); + } + + #[test] + fn fails_to_display_object_with_invalid_hash() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + let _temp_pwd = TempPwd::new(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: false, + exit_zero: false, + pretty_print: true, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } + + #[test] + fn fails_to_display_header_with_invalid_hash() { + // Unset environmental variables to avoid conflicts + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + let _temp_pwd = TempPwd::new(); + + let args = CatFileArgs { + flags: CatFileFlags { + show_type: false, + size: true, + exit_zero: false, + pretty_print: false, + }, + allow_unknown_type: false, + object_hash: OBJECT_HASH.to_string(), + }; + + let result = args.run(&mut Vec::new()); + assert!(result.is_err()); + } +} diff --git a/src/commands/hash_object.rs b/src/commands/hash_object.rs index 4fb5d70..db5f480 100644 --- a/src/commands/hash_object.rs +++ b/src/commands/hash_object.rs @@ -1,15 +1,16 @@ -use crate::commands::{git_object_dir, CommandArgs}; - -use std::fmt; use std::io::Write; use std::path::PathBuf; use anyhow::Context; -use clap::{Parser, ValueEnum}; +use clap::Parser; use flate2::write::ZlibEncoder; use flate2::Compression; use sha1::{Digest, Sha1}; +use crate::commands::CommandArgs; +use crate::utils::git_object_dir; +use crate::utils::objects::{format_header, ObjectType}; + impl CommandArgs for HashObjectArgs { /// Hashes the object and writes it to the `.git/objects` directory if requested. /// @@ -20,7 +21,10 @@ impl CommandArgs for HashObjectArgs { /// # Returns /// /// * `anyhow::Result<()>` - The result of the command execution. - fn run(self) -> anyhow::Result<()> { + fn run(self, writer: &mut W) -> anyhow::Result<()> + where + W: Write, + { let HashObjectArgs { write, path, @@ -29,28 +33,30 @@ impl CommandArgs for HashObjectArgs { // Create blob from header and file content. let content = std::fs::read(&path).context(format!("read {}", path.display()))?; - let header = format!("{} {}\0", object_type, content.len()); + let header = format_header(object_type, content.len()); let mut blob = header.into_bytes(); blob.extend(content); // Hash blob with SHA-1. + // This is used to identify the blob in the object database. let hash = { let mut hasher = Sha1::new(); hasher.update(&blob); format!("{:x}", hasher.finalize()) }; - // Write blob to `.git/objects` directory if requested. + // Write blob to the object database if requested. if write { write_blob(&blob, &hash)?; } - println!("{}", hash); + // Display the hash of the blob. + writer.write_all(hash.as_bytes())?; Ok(()) } } -/// Writes the blob to the `.git/objects` directory. +/// Writes the blob to the object database. /// /// # Arguments /// @@ -61,8 +67,11 @@ impl CommandArgs for HashObjectArgs { /// /// * `anyhow::Result<()>` - The result of the write operation. fn write_blob(blob: &[u8], hash: &str) -> anyhow::Result<()> { + // Split the hash into directory and file name. + let (dir_name, file_name) = hash.split_at(2); + // Create the object directory if it doesn't exist. - let object_dir = git_object_dir()?.join(&hash[..2]); + let object_dir = git_object_dir(false)?.join(dir_name); std::fs::create_dir_all(&object_dir).context("create subdir in .git/objects")?; // Compress the blob with zlib. @@ -71,22 +80,14 @@ fn write_blob(blob: &[u8], hash: &str) -> anyhow::Result<()> { let compressed = zlib.finish().context("finish zlib")?; // Write the compressed blob to the object file. - let object_path = object_dir.join(&hash[2..]); + let object_path = object_dir.join(file_name); std::fs::write(object_path, compressed).context("write compressed blob") } -impl fmt::Display for ObjectType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ObjectType::Blob => write!(f, "blob"), - } - } -} - #[derive(Parser, Debug)] pub(crate) struct HashObjectArgs { /// object type - #[arg(short = 't', value_enum, default_value_t, value_name = "type")] + #[arg(short = 't', value_enum, default_value_t, name = "type")] object_type: ObjectType, /// write the object into the object database #[arg(short)] @@ -96,45 +97,31 @@ pub(crate) struct HashObjectArgs { path: PathBuf, } -#[derive(Debug, Default, Clone, ValueEnum)] -enum ObjectType { - #[default] - Blob, -} - #[cfg(test)] mod tests { - use super::*; use std::fs; + use std::path::PathBuf; - /// A temporary directory for testing. - /// Changes the current directory to the temporary directory and restores it on drop. - struct TempDir { - old_dir: PathBuf, - dir: tempfile::TempDir, - } - - impl TempDir { - fn new() -> Self { - let old_dir = std::env::current_dir().unwrap(); - let dir = tempfile::tempdir().unwrap(); - std::env::set_current_dir(&dir).unwrap(); - Self { old_dir, dir } - } - } + use super::{write_blob, HashObjectArgs}; + use crate::commands::CommandArgs; + use crate::utils::env; + use crate::utils::objects::ObjectType; + use crate::utils::test::{TempEnv, TempPwd}; - impl Drop for TempDir { - fn drop(&mut self) { - std::env::set_current_dir(&self.old_dir).unwrap(); - } - } + const OBJECT_CONTENT: &str = "Hello, World!"; + const FILE_NAME: &str = "testfile.txt"; + const OBJECT_HASH: &str = "b45ef6fec89518d314f546fd6c3025367b721684"; #[test] - fn run_hashes_blob_and_prints_hash() { + fn hashes_blob_and_displays_hash() { + // Unset environmental variables for testing + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + // Create a temporary file with some content. - let temp_dir = TempDir::new(); - let file_path = temp_dir.dir.path().join("testfile.txt"); - fs::write(&file_path, b"test content").unwrap(); + let temp_pwd = TempPwd::new(); + let file_path = temp_pwd.path().join(FILE_NAME); + fs::write(&file_path, OBJECT_CONTENT).unwrap(); let args = HashObjectArgs { write: false, @@ -142,19 +129,26 @@ mod tests { object_type: ObjectType::Blob, }; - let result = args.run(); + let mut output = Vec::new(); + let result = args.run(&mut output); + assert!(result.is_ok()); + assert_eq!(output, OBJECT_HASH.as_bytes()); } #[test] - fn run_writes_blob_to_git_objects() { + fn writes_blob_to_object_database() { + // Unset environmental variables for testing + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + // Create a temporary file with some content. - let temp_dir = TempDir::new(); - let file_path = temp_dir.dir.path().join("testfile.txt"); - fs::write(&file_path, b"test content").unwrap(); + let temp_pwd = TempPwd::new(); + let file_path = temp_pwd.path().join(FILE_NAME); + fs::write(&file_path, OBJECT_CONTENT).unwrap(); // Create the .git directory. - fs::create_dir(temp_dir.dir.path().join(".git")).unwrap(); + fs::create_dir_all(temp_pwd.path().join(".git/objects")).unwrap(); let args = HashObjectArgs { write: true, @@ -162,80 +156,60 @@ mod tests { object_type: ObjectType::Blob, }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); - // Expected hash of the blob. - let hash = { - let mut hasher = Sha1::new(); - hasher.update(b"blob 12\0test content"); - format!("{:x}", hasher.finalize()) - }; - - // Check that the object file was written to the `.git/objects` directory. - let object_dir = temp_dir.dir.path().join(".git/objects").join(&hash[..2]); - let object_path = object_dir.join(&hash[2..]); + // Check that the object file was written to the object database. + let (dir_name, file_name) = OBJECT_HASH.split_at(2); + let object_path = temp_pwd + .path() + .join(".git/objects") + .join(dir_name) + .join(file_name); assert!(object_path.exists()); } #[test] - fn run_fails_on_nonexistent_file() { + fn fails_on_nonexistent_file() { + // Unset environmental variables for testing + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); + + // Create a temporary directory for testing. + let _temp_pwd = TempPwd::new(); + let args = HashObjectArgs { write: false, path: PathBuf::from("nonexistent.txt"), object_type: ObjectType::Blob, }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } #[test] - fn write_blob_creates_object_directory() { - // Create a temporary directory for testing. - let temp_dir = TempDir::new(); - let blob = b"blob 12\0test content"; - - // Create the .git directory. - fs::create_dir(temp_dir.dir.path().join(".git")).unwrap(); - - // Expected hash of the blob. - let hash = { - let mut hasher = Sha1::new(); - hasher.update(blob); - format!("{:x}", hasher.finalize()) - }; - - let result = write_blob(blob, &hash); - assert!(result.is_ok()); + fn write_blob_creates_object_database() { + // Unset environmental variables for testing + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - // Check that the object directory was created. - let object_dir = temp_dir.dir.path().join(".git/objects").join(&hash[..2]); - assert!(object_dir.exists()); - } - - #[test] - fn write_blob_writes_compressed_blob() { // Create a temporary directory for testing. - let temp_dir = TempDir::new(); - let blob = b"blob 12\0test content"; - + let temp_pwd = TempPwd::new(); + let blob = format!("blob {}\0{}", OBJECT_CONTENT.len(), OBJECT_CONTENT); // Create the .git directory. - fs::create_dir(temp_dir.dir.path().join(".git")).unwrap(); - - // Expected hash of the blob. - let hash = { - let mut hasher = Sha1::new(); - hasher.update(blob); - format!("{:x}", hasher.finalize()) - }; + fs::create_dir(temp_pwd.path().join(".git")).unwrap(); - let result = write_blob(blob, &hash); + let result = write_blob(blob.as_bytes(), OBJECT_HASH); assert!(result.is_ok()); - // Check that the object file was written to the `.git/objects` directory. - let object_dir = temp_dir.dir.path().join(".git/objects").join(&hash[..2]); - let object_path = object_dir.join(&hash[2..]); - assert!(object_path.exists()); + // Check that the object directory and file were created. + let (dir_name, file_name) = OBJECT_HASH.split_at(2); + let object_dir = temp_pwd + .path() + .join(".git/objects") + .join(dir_name) + .join(file_name); + assert!(object_dir.exists()); } } diff --git a/src/commands/init.rs b/src/commands/init.rs index 6dce603..f42a23b 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -1,27 +1,33 @@ -use crate::commands::CommandArgs; +use std::io::Write; +use std::path::PathBuf; use clap::Parser; -use std::path::PathBuf; + +use crate::commands::CommandArgs; +use crate::utils::env; impl CommandArgs for InitArgs { - fn run(self) -> anyhow::Result<()> { + fn run(self, writer: &mut W) -> anyhow::Result<()> + where + W: Write, + { // Initializes a new git repository in the specified directory. let git_dir = if self.bare { if let Some(directory) = self.directory { directory } else { let directory = std::env::current_dir()?; - let git_dir = std::env::var("GIT_DIR").unwrap_or_else(|_| ".".to_string()); + let git_dir = std::env::var(env::GIT_DIR).unwrap_or_else(|_| ".".to_string()); directory.join(git_dir) } } else { let directory = self.directory.unwrap_or_else(|| ".".into()); - let git_dir = std::env::var("GIT_DIR").unwrap_or_else(|_| ".git".to_string()); + let git_dir = std::env::var(env::GIT_DIR).unwrap_or_else(|_| ".git".to_string()); directory.join(git_dir) }; // The directory where git objects are stored. - let git_object_dir = std::env::var("GIT_OBJECT_DIRECTORY") + let git_object_dir = std::env::var(env::GIT_OBJECT_DIRECTORY) .map(|object_dir| git_dir.join(object_dir)) .unwrap_or_else(|_| git_dir.join("objects")); @@ -34,10 +40,11 @@ impl CommandArgs for InitArgs { std::fs::write(git_dir.join("HEAD"), head)?; if !self.quiet { - println!( + let output = format!( "Initialized empty Git repository in {}", git_dir.canonicalize()?.to_str().unwrap() ); + writer.write_all(output.as_bytes())?; } Ok(()) } @@ -61,73 +68,33 @@ pub(crate) struct InitArgs { #[cfg(test)] mod tests { - use super::*; use std::fs; - use tempfile::tempdir; + use std::path::PathBuf; + + use super::InitArgs; + use crate::commands::CommandArgs; + use crate::utils::env; + use crate::utils::test::{TempEnv, TempPwd}; const INITIAL_BRANCH: &str = "main"; const CUSTOM_GIT_DIR: &str = "custom_git_dir"; const CUSTOM_OBJECT_DIR: &str = "custom_object_dir"; - struct TempEnv { - old_git_dir: Option, - old_git_object_dir: Option, - } - - impl TempEnv { - fn new(git_dir: Option<&str>, git_object_dir: Option<&str>) -> Self { - let old_git_dir = std::env::var("GIT_DIR").ok(); - let old_git_object_dir = std::env::var("GIT_OBJECT_DIRECTORY").ok(); - - if let Some(git_dir) = git_dir { - std::env::set_var("GIT_DIR", git_dir); - } else { - std::env::remove_var("GIT_DIR"); - } - - if let Some(git_object_dir) = git_object_dir { - std::env::set_var("GIT_OBJECT_DIRECTORY", git_object_dir); - } else { - std::env::remove_var("GIT_OBJECT_DIRECTORY"); - } - - TempEnv { - old_git_dir, - old_git_object_dir, - } - } - } - - impl Drop for TempEnv { - fn drop(&mut self) { - if let Some(git_dir) = &self.old_git_dir { - std::env::set_var("GIT_DIR", git_dir); - } else { - std::env::remove_var("GIT_DIR"); - } - - if let Some(git_object_dir) = &self.old_git_object_dir { - std::env::set_var("GIT_OBJECT_DIRECTORY", git_object_dir); - } else { - std::env::remove_var("GIT_OBJECT_DIRECTORY"); - } - } - } - #[test] - fn init_repository() { - let _env = TempEnv::new(None, None); + fn inits_repo() { + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - let temp_dir = tempdir().unwrap(); - let git_dir = temp_dir.path().join(".git"); + let temp_pwd = TempPwd::new(); + let git_dir = temp_pwd.path().join(".git"); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: false, quiet: true, initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(git_dir.exists()); assert!(git_dir.join("objects").exists()); @@ -139,42 +106,44 @@ mod tests { } #[test] - fn init_bare_repository() { - let _env = TempEnv::new(None, None); + fn inits_bare_repo() { + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - let temp_dir = tempdir().unwrap(); + let temp_pwd = TempPwd::new(); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: true, quiet: true, initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); - assert!(temp_dir.path().join("objects").exists()); - assert!(temp_dir.path().join("refs").exists()); - assert!(temp_dir.path().join("HEAD").exists()); + assert!(temp_pwd.path().join("objects").exists()); + assert!(temp_pwd.path().join("refs").exists()); + assert!(temp_pwd.path().join("HEAD").exists()); - let head_content = fs::read_to_string(temp_dir.path().join("HEAD")).unwrap(); + let head_content = fs::read_to_string(temp_pwd.path().join("HEAD")).unwrap(); assert_eq!(head_content, "ref: refs/heads/main\n"); } #[test] - fn init_repository_with_branch() { - let _env = TempEnv::new(None, None); + fn inits_repo_with_branch() { + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - let temp_dir = tempdir().unwrap(); - let git_dir = temp_dir.path().join(".git"); + let temp_pwd = TempPwd::new(); + let git_dir = temp_pwd.path().join(".git"); let custom_branch = "develop".to_string(); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: false, quiet: true, initial_branch: custom_branch.clone(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(git_dir.exists()); assert!(git_dir.join("HEAD").exists()); @@ -184,19 +153,20 @@ mod tests { } #[test] - fn init_repository_with_git_dir() { - let _env = TempEnv::new(Some(CUSTOM_GIT_DIR), None); + fn inits_repo_with_custom_git_dir() { + let _git_dir_env = TempEnv::new(env::GIT_DIR, Some(CUSTOM_GIT_DIR)); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); - let temp_dir = tempdir().unwrap(); - let git_dir = temp_dir.path().join(CUSTOM_GIT_DIR); + let temp_pwd = TempPwd::new(); + let git_dir = temp_pwd.path().join(CUSTOM_GIT_DIR); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: false, quiet: true, initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(git_dir.exists()); assert!(git_dir.join("objects").exists()); @@ -208,27 +178,29 @@ mod tests { } #[test] - fn init_repository_with_object_dir() { - let _env = TempEnv::new(None, Some(CUSTOM_OBJECT_DIR)); + fn inits_repo_with_custom_git_object_dir() { + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, Some(CUSTOM_OBJECT_DIR)); - let temp_dir = tempdir().unwrap(); - let git_dir = temp_dir.path().join(".git"); + let temp_pwd = TempPwd::new(); + let git_dir = temp_pwd.path().join(".git"); let args = InitArgs { - directory: Some(temp_dir.path().to_path_buf()), + directory: Some(temp_pwd.path().to_path_buf()), bare: false, quiet: true, initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_ok()); assert!(git_dir.exists()); assert!(git_dir.join(CUSTOM_OBJECT_DIR).exists()); } #[test] - fn fail_on_invalid_dir() { - let _env = TempEnv::new(None, None); + fn fail_on_invalid_init_path() { + let _git_dir_env = TempEnv::new(env::GIT_DIR, None); + let _git_object_dir_env = TempEnv::new(env::GIT_OBJECT_DIRECTORY, None); let args = InitArgs { directory: Some(PathBuf::from("/invalid/directory")), @@ -237,7 +209,7 @@ mod tests { initial_branch: INITIAL_BRANCH.to_string(), }; - let result = args.run(); + let result = args.run(&mut Vec::new()); assert!(result.is_err()); } } diff --git a/src/commands/mod.rs b/src/commands/mod.rs index fa397e8..4de92f5 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,16 +1,19 @@ -use std::path::PathBuf; +use std::io::Write; -use anyhow::Context; use clap::Subcommand; +mod cat_file; mod hash_object; mod init; impl Command { pub fn run(self) -> anyhow::Result<()> { + let mut stdout = std::io::stdout(); + match self { - Command::HashObject(args) => args.run(), - Command::Init(args) => args.run(), + Command::HashObject(args) => args.run(&mut stdout), + Command::Init(args) => args.run(&mut stdout), + Command::CatFile(args) => args.run(&mut stdout), } } } @@ -19,40 +22,11 @@ impl Command { pub(crate) enum Command { HashObject(hash_object::HashObjectArgs), Init(init::InitArgs), + CatFile(cat_file::CatFileArgs), } pub(crate) trait CommandArgs { - fn run(self) -> anyhow::Result<()>; -} - -fn get_current_dir() -> anyhow::Result { - std::env::current_dir().context("get path of current directory") -} - -fn git_dir() -> anyhow::Result { - let git_dir_path = std::env::var("GIT_DIR").unwrap_or_else(|_| ".git".to_string()); - let mut current_dir = get_current_dir()?; - println!("current_dir: {:?}", current_dir); - - while current_dir.exists() { - let git_dir = current_dir.join(&git_dir_path); - - if git_dir.exists() { - return Ok(git_dir); - } - - current_dir = current_dir - .parent() - .context("get path of parent directory")? - .to_path_buf(); - } - - anyhow::bail!("not a git repository (or any of the parent directories): .git") -} - -fn git_object_dir() -> anyhow::Result { - let git_object_dir_path = - std::env::var("GIT_OBJECT_DIRECTORY").unwrap_or_else(|_| "objects".to_string()); - - git_dir().map(|git_dir| git_dir.join(git_object_dir_path)) + fn run(self, writer: &mut W) -> anyhow::Result<()> + where + W: Write; } diff --git a/src/main.rs b/src/main.rs index 0b63083..a279165 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod commands; +mod utils; use clap::Parser; use commands::Command; diff --git a/src/utils/env.rs b/src/utils/env.rs new file mode 100644 index 0000000..a4a9347 --- /dev/null +++ b/src/utils/env.rs @@ -0,0 +1,4 @@ +//! Environment variables used by the Git CLI + +pub(crate) const GIT_DIR: &str = "GIT_DIR"; +pub(crate) const GIT_OBJECT_DIRECTORY: &str = "GIT_OBJECT_DIRECTORY"; diff --git a/src/utils/hex.rs b/src/utils/hex.rs new file mode 100644 index 0000000..702122c --- /dev/null +++ b/src/utils/hex.rs @@ -0,0 +1,54 @@ +use anyhow::Context; + +const HEX_CHARS: &[u8] = b"0123456789abcdef"; + +/// Convert a binary slice to a hex slice. +pub(crate) fn encode_in_place(bytes: &mut Vec) { + for _ in 0..bytes.len() { + let byte = bytes.remove(0); + bytes.push(HEX_CHARS[(byte >> 4) as usize]); + bytes.push(HEX_CHARS[(byte & 0xf) as usize]); + } +} + +/// Convert a hex slice to a binary slice. +#[allow(unused)] +pub(crate) fn decode(hex: &[u8]) -> anyhow::Result> { + let mut bytes = Vec::with_capacity(hex.len() / 2); + + if hex.len() & 1 != 0 { + anyhow::bail!("invalid hex string"); + } + + for chunk in hex.chunks(2) { + let high = (chunk[0] as char) + .to_digit(16) + .context("invalid hex character")?; + let low = (chunk[1] as char) + .to_digit(16) + .context("invalid hex character")?; + bytes.push(((high << 4) | low) as u8); + } + + Ok(bytes) +} + +#[cfg(test)] +mod tests { + use crate::utils::hex; + + #[test] + fn hex_encode_in_place() { + let mut binary = vec![0x00, 0x01, 0x02, 0x03]; + hex::encode_in_place(&mut binary); + assert_eq!(binary, b"00010203"); + } + + #[test] + fn hex_decode() { + let hex = b"00010203"; + let binary = hex::decode(hex); + assert!(binary.is_ok()); + assert_eq!(binary.unwrap(), vec![0x00, 0x01, 0x02, 0x03]); + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..595cec7 --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1,114 @@ +use std::path::PathBuf; + +use anyhow::Context; + +pub(crate) mod env; +pub(crate) mod hex; +pub(crate) mod objects; +pub(crate) mod test; + +/// Get the path of the current directory. +pub(crate) fn get_current_dir() -> anyhow::Result { + std::env::current_dir().context("get path of current directory") +} + +/// Get the path to the git directory. +/// This could be either of the following (in order of precedence): +/// +/// 1. `$GIT_DIR` +/// 2. `.git` +/// +/// # Returns +/// +/// The path to the git directory +pub(crate) fn git_dir() -> anyhow::Result { + let git_dir_path = std::env::var(env::GIT_DIR).unwrap_or_else(|_| ".git".to_string()); + let mut current_dir = get_current_dir()?; + + // Search for the git directory in the current directory and its parents + while current_dir.exists() { + let git_dir = current_dir.join(&git_dir_path); + + // Return the git directory if it exists + if git_dir.exists() { + return Ok(git_dir); + } + + let Some(parent_dir) = current_dir.parent() else { + break; + }; + + current_dir = parent_dir.to_path_buf(); + } + + anyhow::bail!( + "not a git repository (or any of the parent directories): {}", + git_dir_path + ) +} + +/// Get the path to the git object directory. +/// This could be either of the following (in order of precedence): +/// +/// 1. `/$GIT_OBJECT_DIRECTORY` +/// 2. `/objects` +/// +/// # Arguments +/// +/// * `check_exists` - Whether to check if the object directory exists, +/// exiting with an error if it does not +/// +/// # Returns +/// +/// The path to the git object directory +pub(crate) fn git_object_dir(check_exists: bool) -> anyhow::Result { + let git_dir = git_dir()?; + let git_object_dir = + std::env::var(env::GIT_OBJECT_DIRECTORY).unwrap_or_else(|_| "objects".to_string()); + let git_object_dir = git_dir.join(&git_object_dir); + + // Check if the object directory exists + if check_exists && !git_object_dir.exists() { + anyhow::bail!( + "{}/{} directory does not exist", + git_dir.display(), + git_object_dir.display() + ); + } + + Ok(git_object_dir) +} + +/// Get the path to a git object. +/// The path is constructed as follows: +/// +/// `//` +/// +/// # Example +/// +/// If the default git and object directories are used, +/// the path for object `e7a11a969c037e00a796aafeff6258501ec15e9a` would be: +/// +/// `.git/objects/e7/a11a969c037e00a796aafeff6258501ec15e9a` +/// +/// # Arguments +/// +/// * `hash` - The object hash +/// * `check_exists` - Whether to check if the object exists, +/// exiting with an error if it does not +/// +/// # Returns +/// +/// The path to the object file +pub(crate) fn get_object_path(hash: &str, check_exists: bool) -> anyhow::Result { + let object_dir = git_object_dir(check_exists)?; + let object_dir = object_dir.join(&hash[..2]); + let object_path = object_dir.join(&hash[2..]); + + // Check if the object exists + if check_exists && !object_path.exists() { + anyhow::bail!("{} is not a valid object", hash); + } + + Ok(object_path) +} diff --git a/src/utils/objects.rs b/src/utils/objects.rs new file mode 100644 index 0000000..2aa2123 --- /dev/null +++ b/src/utils/objects.rs @@ -0,0 +1,90 @@ +//! Utilities for working with Git objects + +use std::fmt; + +use anyhow::Context; +use clap::ValueEnum; + +/// Format the header of a `.git/objects` file +pub(crate) fn format_header(object_type: O, size: S) -> String +where + O: fmt::Display, + S: fmt::Display, +{ + format!("{} {}\0", object_type, size) +} + +/// Parse the header of a `.git/objects` file into the [`ObjectHeader`] struct. +pub(crate) fn parse_header(header: &[u8]) -> anyhow::Result { + // Split the header into type and size + let mut header = header.splitn(2, |&b| b == b' '); + + let object_type = header.next().context("invalid object header")?; + let size = header.next().context("invalid object header")?; + let size = &size[..size.len().saturating_sub(1)]; // Remove the trailing null byte + + Ok(ObjectHeader { object_type, size }) +} + +/// The type of object in the Git object database +#[derive(Default, Debug, ValueEnum, Clone)] +pub(crate) enum ObjectType { + #[default] + Blob, + Tree, + Commit, + Tag, +} + +/// The header of a Git object +pub(crate) struct ObjectHeader<'a> { + /// The type of object + pub(crate) object_type: &'a [u8], + /// The size of the object in bytes + pub(crate) size: &'a [u8], +} + +impl ObjectHeader<'_> { + /// Parse the size of the object + pub(crate) fn parse_size(&self) -> anyhow::Result { + let size = std::str::from_utf8(self.size) + .context("object size is not valid utf-8")? + .parse::() + .context("object size is not a number")?; + + Ok(size) + } + + /// Parse the type of the object + pub(crate) fn parse_type(&self) -> anyhow::Result { + ObjectType::try_from(self.object_type) + } +} + +impl fmt::Display for ObjectType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ObjectType::Blob => write!(f, "blob"), + ObjectType::Tree => write!(f, "tree"), + ObjectType::Commit => write!(f, "commit"), + ObjectType::Tag => write!(f, "tag"), + } + } +} + +impl TryFrom<&[u8]> for ObjectType { + type Error = anyhow::Error; + + fn try_from(value: &[u8]) -> anyhow::Result { + match value { + b"blob" => Ok(ObjectType::Blob), + b"tree" => Ok(ObjectType::Tree), + b"commit" => Ok(ObjectType::Commit), + b"tag" => Ok(ObjectType::Tag), + _ => { + let value = std::str::from_utf8(value).context("object type is not valid utf-8")?; + anyhow::bail!("unknown object type: {}", value) + }, + } + } +} diff --git a/src/utils/test.rs b/src/utils/test.rs new file mode 100644 index 0000000..dd5ccf7 --- /dev/null +++ b/src/utils/test.rs @@ -0,0 +1,105 @@ +//! Utility structs and functions for testing + +#![cfg(test)] + +use std::path::{Path, PathBuf}; + +/// A temporary environment for testing. +/// Changes the environment variable and restores it on drop. +/// Tests must be run serially to avoid conflicts (`cargo test -- --test-threads=1`) +/// +/// # Example +/// +/// ``` +/// # use crate::utils::test::TempEnv; +/// let temp_env = TempEnv::new("KEY", Some("VALUE")); +/// assert_eq!(std::env::var("KEY"), Ok("VALUE".to_string())); +/// +/// // The environment variable is restored when the `TempEnv` instance is dropped +/// drop(temp_env); +/// +/// // Setting the value to `None` unsets the environment variable +/// let temp_env = TempEnv::new("KEY", None); +/// assert!(std::env::var("KEY").is_err()); +/// +/// drop(temp_env); +/// ``` +pub(crate) struct TempEnv { + /// The environment variable's key + key: String, + /// The old value of the environment variable + old_value: Option, +} + +impl TempEnv { + /// Create a new temporary environment variable. + /// + /// * If `value` is `Some`, the environment variable is set to that value. + /// * If `value` is `None`, the environment variable is unset. + pub(crate) fn new(key: S, value: Option<&str>) -> Self + where + S: Into, + { + let key = key.into(); + let old_value = std::env::var(&key).ok(); + + if let Some(value) = value { + std::env::set_var(&key, value); + } else { + std::env::remove_var(&key); + } + + TempEnv { key, old_value } + } +} + +impl Drop for TempEnv { + fn drop(&mut self) { + if let Some(value) = &self.old_value { + std::env::set_var(&self.key, value); + } else { + std::env::remove_var(&self.key); + } + } +} + +/// A temporary directory for testing. +/// Changes the current directory to the temporary directory and restores it on drop. +/// +/// # Example +/// +/// ``` +/// # use crate::utils::test::TempPwd; +/// let temp_pwd = TempPwd::new(); +/// assert_eq!(std::env::current_dir().unwrap(), temp_pwd.temp_pwd.path()); +/// +/// // The current directory is restored when the `TempPwd` instance is dropped +/// drop(temp_pwd); +/// ``` +pub(crate) struct TempPwd { + old_pwd: PathBuf, + temp_pwd: tempfile::TempDir, +} + +impl TempPwd { + pub(crate) fn new() -> Self { + let old_pwd = std::env::current_dir().unwrap(); + let temp_pwd = tempfile::tempdir().unwrap(); + + // Change the current directory to the temporary directory + std::env::set_current_dir(&temp_pwd).unwrap(); + + Self { old_pwd, temp_pwd } + } + + pub(crate) fn path(&self) -> &Path { + self.temp_pwd.path() + } +} + +impl Drop for TempPwd { + fn drop(&mut self) { + // Restore the current directory + std::env::set_current_dir(&self.old_pwd).unwrap(); + } +}