-
Notifications
You must be signed in to change notification settings - Fork 5
Prototype extended data #660
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,8 @@ | |
| import os | ||
| import pathlib | ||
| from abc import ABC, abstractmethod | ||
| from typing import List, Optional, Sequence, Tuple | ||
| from contextlib import suppress | ||
| from typing import Iterable, List, Optional, Sequence, Tuple | ||
|
|
||
| from halo import Halo | ||
| from patch_ng import fromfile | ||
|
|
@@ -13,8 +14,13 @@ | |
| from dfetch.manifest.project import ProjectEntry | ||
| from dfetch.manifest.version import Version | ||
| from dfetch.project.abstract_check_reporter import AbstractCheckReporter | ||
| from dfetch.project.metadata import Metadata | ||
| from dfetch.util.util import hash_directory, safe_rm | ||
| from dfetch.project.metadata import FileInfo, Metadata | ||
| from dfetch.util.util import ( | ||
| hash_directory, | ||
| hash_file_normalized, | ||
| recursive_listdir, | ||
| safe_rm, | ||
| ) | ||
| from dfetch.util.versions import latest_tag_from_list | ||
|
|
||
| logger = get_logger(__name__) | ||
|
|
@@ -111,7 +117,20 @@ def update(self, force: bool = False) -> None: | |
|
|
||
| if os.path.exists(self.local_path): | ||
| logger.debug(f"Clearing destination {self.local_path}") | ||
| safe_rm(self.local_path) | ||
|
|
||
| with suppress(TypeError): | ||
| metadata_files = Metadata.from_file(self.__metadata.path).files | ||
|
Comment on lines
+121
to
+122
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unexplained Error Suppression
Tell me moreWhat is the issue?Silent error suppression without explaining why TypeError is expected or can be safely ignored. Why this mattersCode maintainers will have to dig through the codebase to understand why this error is suppressed, making the code harder to understand and maintain. Suggested change ∙ Feature PreviewAdd a comment explaining the rationale: # Suppress TypeError when metadata file is invalid or has old format without 'files' field
with suppress(TypeError):
metadata_files = Metadata.from_file(self.__metadata.path).files💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
|
|
||
| if metadata_files: | ||
| for file in metadata_files: | ||
| full_path = os.path.join(self.local_path, file.path) | ||
| safe_rm(full_path) | ||
| parent_dir = os.path.dirname(full_path) | ||
| # remove parent if empty | ||
| if not os.listdir(parent_dir): | ||
| safe_rm(parent_dir) | ||
| else: | ||
| safe_rm(self.local_path) | ||
|
|
||
| with Halo( | ||
| text=f"Fetching {self.__project.name} {to_fetch}", | ||
|
|
@@ -130,10 +149,34 @@ def update(self, force: bool = False) -> None: | |
| else: | ||
| logger.warning(f"Skipping non-existent patch {self.__project.patch}") | ||
|
|
||
| if os.path.isfile(self.local_path): | ||
| files_list = ( | ||
| FileInfo( | ||
| os.path.basename(self.local_path), | ||
| hash_file_normalized(os.path.join(self.local_path)).hexdigest(), | ||
| oct(os.stat(os.path.join(self.local_path)).st_mode)[-3:], | ||
| ), | ||
| ) | ||
|
Comment on lines
+153
to
+159
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Redundant file operations
Tell me moreWhat is the issue?Redundant os.path.join calls and file stat operations Why this mattersMultiple system calls to the same file wastes I/O operations which impacts performance, especially when dealing with many files Suggested change ∙ Feature PreviewCache the joined path and file stat results: full_path = os.path.join(self.local_path)
stat_result = os.stat(full_path)
files_list = (
FileInfo(
os.path.basename(self.local_path),
hash_file_normalized(full_path).hexdigest(),
oct(stat_result.st_mode)[-3:],
),
)💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
| else: | ||
| all_files = ( | ||
| file_path | ||
| for file_path in recursive_listdir(self.local_path) | ||
| if file_path is not self.__metadata.FILENAME | ||
| ) | ||
| files_list = ( | ||
| FileInfo( | ||
| os.path.relpath(file_path, self.local_path), | ||
| hash_file_normalized(file_path).hexdigest(), | ||
| oct(os.stat(file_path).st_mode)[-3:], | ||
| ) | ||
| for file_path in all_files | ||
| ) | ||
|
|
||
| self.__metadata.fetched( | ||
| actually_fetched, | ||
| hash_=hash_directory(self.local_path, skiplist=[self.__metadata.FILENAME]), | ||
| patch_=applied_patch, | ||
| files=files_list, | ||
| ) | ||
|
|
||
| logger.debug(f"Writing repo metadata to: {self.__metadata.path}") | ||
|
|
@@ -289,23 +332,24 @@ def on_disk_version(self) -> Optional[Version]: | |
| ) | ||
| return None | ||
|
|
||
| def _on_disk_hash(self) -> Optional[str]: | ||
| def _on_disk_hash(self) -> Tuple[Iterable[FileInfo], Optional[str]]: | ||
| """Get the hash of the project on disk. | ||
|
|
||
| Returns: | ||
| Str: Could be None if no on disk version | ||
| """ | ||
| if not os.path.exists(self.__metadata.path): | ||
| return None | ||
| return [], None | ||
|
|
||
| try: | ||
| return Metadata.from_file(self.__metadata.path).hash | ||
| metadata = Metadata.from_file(self.__metadata.path) | ||
| return metadata.files, metadata.hash | ||
| except TypeError: | ||
| logger.warning( | ||
| f"{pathlib.Path(self.__metadata.path).relative_to(os.getcwd()).as_posix()}" | ||
| " is an invalid metadata file, not checking local hash!" | ||
| ) | ||
| return None | ||
| return [], None | ||
|
|
||
| def _check_for_newer_version(self) -> Optional[Version]: | ||
| """Check if a newer version is available on the given branch. | ||
|
|
@@ -345,11 +389,24 @@ def _are_there_local_changes(self) -> bool: | |
| Bool: True if there are local changes, false if no were detected or no hash was found. | ||
| """ | ||
| logger.debug(f"Checking if there were local changes in {self.local_path}") | ||
| on_disk_hash = self._on_disk_hash() | ||
|
|
||
| return bool(on_disk_hash) and on_disk_hash != hash_directory( | ||
| self.local_path, skiplist=[self.__metadata.FILENAME] | ||
| ) | ||
| file_info, on_disk_hash = self._on_disk_hash() | ||
|
|
||
| if not file_info: | ||
| return bool(on_disk_hash) and on_disk_hash != hash_directory( | ||
| self.local_path, skiplist=[self.__metadata.FILENAME] | ||
| ) | ||
|
Comment on lines
+395
to
+398
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Complex Boolean Logic
Tell me moreWhat is the issue?Complex boolean expression with unclear fallback logic mixing multiple conditions. Why this mattersThe nested conditions and boolean operations make it difficult to understand the flow and intention of the code at a glance. Suggested change ∙ Feature PreviewSplit into more explicit conditions: if not file_info:
if not on_disk_hash:
return False
current_hash = hash_directory(self.local_path, skiplist=[self.__metadata.FILENAME])
return on_disk_hash != current_hash💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
|
|
||
| for file in file_info: | ||
| full_path = os.path.join(self.local_path, file.path) | ||
| if hash_file_normalized(full_path).hexdigest() != file.hash: | ||
|
Comment on lines
+401
to
+402
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing File Existence Check
Tell me moreWhat is the issue?The code doesn't check if the file exists before attempting to hash it, which could cause crashes. Why this mattersIf a file was deleted but still exists in the metadata, this will raise an unhandled exception when trying to hash a non-existent file. Suggested change ∙ Feature PreviewAdd file existence check before hashing: full_path = os.path.join(self.local_path, file.path)
if not os.path.exists(full_path):
logger.debug(f"File {full_path} no longer exists!")
return True
if hash_file_normalized(full_path).hexdigest() != file.hash:💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
| logger.debug(f"The hash of {full_path} changed!") | ||
| return True | ||
| if oct(os.stat(full_path).st_mode)[-3:] != file.permissions: | ||
| logger.debug(f"The file permissions of {full_path} changed!") | ||
| return True | ||
|
|
||
| return False | ||
|
|
||
| @abstractmethod | ||
| def _fetch_impl(self, version: Version) -> Version: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| import fnmatch | ||
| import hashlib | ||
| import os | ||
| import re | ||
| import shutil | ||
| import stat | ||
| from contextlib import contextmanager | ||
|
|
@@ -104,6 +105,21 @@ def find_file(name: str, path: str = ".") -> List[str]: | |
| ] | ||
|
|
||
|
|
||
| def recursive_listdir(directory): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing Type Hints
Tell me moreWhat is the issue?Missing type hints for both the parameter and return type in the recursive_listdir function. Why this mattersType hints help with code understanding, IDE support, and static type checking. Their absence makes it harder to understand what the function expects and returns without diving into the implementation. Suggested change ∙ Feature Previewdef recursive_listdir(directory: str) -> Generator[str, None, None]:💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
| """List all entries in the current directory.""" | ||
| entries = os.listdir(directory) | ||
|
Comment on lines
+108
to
+110
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing Error Handling in Directory Traversal
Tell me moreWhat is the issue?The function recursive_listdir() doesn't handle potential permission errors or broken symlinks when accessing directories. Why this mattersIf the function encounters a directory without read permissions or a broken symlink, it will raise an unhandled OSError/PermissionError, causing the entire directory traversal to fail. Suggested change ∙ Feature PreviewAdd error handling to gracefully skip inaccessible directories: def recursive_listdir(directory):
"""List all entries in the current directory."""
try:
entries = os.listdir(directory)
for entry in entries:
full_path = os.path.join(directory, entry)
try:
if os.path.isdir(full_path):
yield from recursive_listdir(full_path)
else:
yield full_path
except (OSError, PermissionError):
continue
except (OSError, PermissionError):
return💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
|
|
||
| for entry in entries: | ||
| full_path = os.path.join(directory, entry) | ||
|
Comment on lines
+110
to
+113
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Directory Traversal Vulnerability
Tell me moreWhat is the issue?The recursive_listdir function is vulnerable to directory traversal attacks if the input directory path is not validated. Why this mattersWithout path validation, malicious input could potentially access files outside the intended directory tree through symbolic links or relative paths. Suggested change ∙ Feature PreviewAdd path validation and resolve symbolic links: directory = os.path.abspath(directory)
if not os.path.realpath(directory).startswith(os.path.realpath(safe_root)):
raise ValueError("Access denied: Directory outside allowed path")💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
|
|
||
| if os.path.isdir(full_path): | ||
| # If the entry is a directory, recurse into it | ||
| yield from recursive_listdir(full_path) | ||
| else: | ||
| # If the entry is a file, yield its path | ||
| yield full_path | ||
|
|
||
|
|
||
| def hash_directory(path: str, skiplist: Optional[List[str]]) -> str: | ||
| """Hash a directory with all its files.""" | ||
| digest = hashlib.md5() # nosec | ||
|
|
@@ -131,3 +147,22 @@ def hash_file(file_path: str, digest: HASH) -> HASH: | |
| buf = f_obj.read(1024 * 1024) | ||
|
|
||
| return digest | ||
|
|
||
|
|
||
| def hash_file_normalized(file_path: str) -> "hashlib._Hash": | ||
| """ | ||
| hash a file's contents, ignoring line feed differences (line ending normalization) | ||
| """ | ||
| digest = hashlib.sha1(usedforsecurity=False) | ||
|
|
||
| if os.path.isfile(file_path): | ||
| normalize_re = re.compile(b"\r\n|\r") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Undocumented Regex Pattern
Tell me moreWhat is the issue?Regular expression pattern is defined inside the function without explanation of what it matches. Why this mattersComplex regex patterns without documentation or clear variable names make the code harder to understand and maintain. Suggested change ∙ Feature Preview# Define at module level with clear name
LINE_ENDING_PATTERN = re.compile(b"\r\n|\r") # Matches Windows (CRLF) and old Mac (CR) line endings💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
|
|
||
| with open(file_path, "rb") as f_obj: | ||
| buf = f_obj.read(1024 * 1024) | ||
| while buf: | ||
| normalized_buf = normalize_re.sub(b"\n", buf) | ||
| digest.update(normalized_buf) # nosec | ||
| buf = f_obj.read(1024 * 1024) | ||
|
|
||
| return digest | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Incorrect return type annotation for files property
Tell me more
What is the issue?
The files property returns Optional[Iterable[FileInfo]] but is annotated to return Iterable[FileInfo]
Why this matters
This type mismatch could cause runtime errors when consumers expect a non-None return value but receive None
Suggested change ∙ Feature Preview
Update the return type annotation to match the actual return type:
💬 Looking for more details? Reply to this comment to chat with Korbit.