From 2cf95443d49c8d5547bba148a394cb7c24e28591 Mon Sep 17 00:00:00 2001 From: Jacobo de Vera Date: Sun, 4 Jan 2026 16:57:23 +0000 Subject: [PATCH 1/2] feat: add explicit output format flag --- README.md | 6 ++ ebookatty/cli.py | 134 +++++++++++++++++++++++++++++----------- ebookatty/metadata.py | 7 ++- tests/test_ebookatty.py | 39 ++++++++++++ 4 files changed, 147 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 821b090..50663bd 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,12 @@ __example 3__ ebookatty /path/to/specific/ebook.azw3 ``` +### Output options + +* `-o/--output ` writes metadata to a file. When the extension is `.json` or `.csv`, the data is emitted in that format. Use `-o -` to route the formatted output to STDOUT without creating a file. +* `-F/--format {table,json,csv}` explicitly selects the output format and overrides any extension-derived choice. `table` matches the default on-screen view shown above. +* When an output file is chosen, nothing is printed to the screen so the data only exists in one place. + __example output__ ``` diff --git a/ebookatty/cli.py b/ebookatty/cli.py index 4e82479..591bed1 100644 --- a/ebookatty/cli.py +++ b/ebookatty/cli.py @@ -24,9 +24,9 @@ import sys from glob import glob from pathlib import Path -from typing import List +from typing import List, Optional, TextIO -from ebookatty import MetadataFetcher +from ebookatty.metadata import MetadataFetcher, format_output def find_matches(files: List[str]) -> List[str]: @@ -49,6 +49,88 @@ def find_matches(files: List[str]) -> List[str]: return matches +def _determine_format(selected_format: Optional[str], output_target: Optional[str]) -> str: + """Resolve the format that should be used for emitting metadata.""" + if selected_format: + return selected_format + if output_target and output_target != "-": + suffix = Path(output_target).suffix.lower() + if suffix == ".json": + return "json" + if suffix == ".csv": + return "csv" + return "table" + + +def _normalize_csv_value(record): + if isinstance(record, list): + record = record[0] + if isinstance(record, int): + record = str(record) + if isinstance(record, bytes): # pragma: nocover + try: + record = str(record[0], encoding="utf8", errors="ignore") + except Exception: # pragma: nocover + return "" + return record if isinstance(record, str) else str(record or "") + + +def _write_csv(datas, stream: TextIO) -> None: + d = set() + for row in datas: + for key in row.keys(): + d.add(key) + headers = list(d) + layers = [headers] + for row in datas: + layer = [] + for header in headers: + record = row.get(header, "") + record = _normalize_csv_value(record) + layer.append(record) + layers.append(layer) + for layer in layers: + try: + stream.write(",".join(layer) + "\n") + except Exception: # pragma: nocover + continue + + +def _write_json(datas, stream: TextIO) -> None: + json.dump(datas, stream) + stream.write("\n") + + +def _write_table(datas, stream: TextIO) -> None: + for row in datas: + if not row: + continue + table = format_output(row, emit=False) + stream.write(table) + + +def _emit_output(datas, output_format: str, output_target: Optional[str]) -> None: + if output_target and output_target != "-": + kwargs = {"encoding": "utf-8"} + if output_format == "csv": + kwargs["errors"] = "ignore" + stream: TextIO = open(Path(output_target), "w", **kwargs) + close_stream = True + else: + stream = sys.stdout + close_stream = False + try: + if output_format == "json": + _write_json(datas, stream) + elif output_format == "csv": + _write_csv(datas, stream) + elif output_format == "table": + _write_table(datas, stream) + finally: + if close_stream: + stream.close() + + def execute(): """ Execute the program. @@ -64,50 +146,30 @@ def execute(): parser.add_argument( "-o", "--output", - help="file path where metadata will be written. Acceptable formats include json and csv and are determined based on the file extension. Default is None", + help="file path where metadata will be written. Use '-' to send data to STDOUT. Acceptable formats include json and csv and are determined based on the file extension. Default is None", action="store", ) + parser.add_argument( + "-F", + "--format", + choices=["table", "json", "csv"], + help="output format to use for metadata. Defaults to 'table' (plain text) unless an output file extension specifies json or csv.", + dest="output_format", + ) if len(sys.argv[1:]) == 0: sys.argv.append("-h") args = parser.parse_args(sys.argv[1:]) file_list = args.file matches = find_matches(file_list) datas = [] + output_format = _determine_format(args.output_format, args.output) + show_table_now = output_format == "table" and not args.output for match in matches: fetcher = MetadataFetcher(match) data = fetcher.get_metadata() datas.append(data) - if not args.output: + if show_table_now: fetcher.show_metadata() - if args.output: - path = Path(args.output) - if path.suffix == ".json": - json.dump(datas, open(path, "wt")) - elif path.suffix == ".csv": - d = set() - for row in datas: - for key in row.keys(): - d.add(key) - headers = list(d) - layers = [headers] - for row in datas: - layer = [] - for header in headers: - record = row.get(header, "") - if isinstance(record, list): - record = record[0] - if isinstance(record, int): - record = str(record) - if isinstance(record, bytes): # pragma: nocover - try: - record = str(record[0], encoding="utf8", errors="ignore") - except: - continue - layer.append(record) - layers.append(layer) - with open(path, "wt", encoding="utf-8", errors="ignore") as fd: - for layer in layers: - try: - fd.write(",".join(layer) + "\n") - except: - continue + needs_output = bool(args.output) or output_format in ("json", "csv") + if needs_output: + _emit_output(datas, output_format, args.output) diff --git a/ebookatty/metadata.py b/ebookatty/metadata.py index 3669b4e..4555624 100644 --- a/ebookatty/metadata.py +++ b/ebookatty/metadata.py @@ -103,7 +103,7 @@ def fetch_metadata(path: Union[str | Path]) -> Dict[str, str]: return None -def format_output(book: dict) -> str: +def format_output(book: dict, emit: bool = True) -> str: """ Format the output for printing to STDOUT. @@ -147,8 +147,9 @@ def format_output(book: dict) -> str: output.insert(0, "\n" + ("-" * long_line)) output.append(("-" * long_line) + "\n") final = "\n".join(output) - print(final) - return output + if emit: + print(final) + return final def text_sections(section_size: int, text: str) -> Generator: diff --git a/tests/test_ebookatty.py b/tests/test_ebookatty.py index da32ec5..1e9590e 100644 --- a/tests/test_ebookatty.py +++ b/tests/test_ebookatty.py @@ -1,3 +1,4 @@ +import json import sys import shutil import os @@ -62,6 +63,44 @@ def test_cli(testdir, flag, outdir, pattern, ext): assert not out +def test_cli_stdout_json(testdir, capsys): + files = os.path.join(testdir, "*.epub") + sys.argv = ["ebookatty", files, "--format", "json"] + execute() + captured = capsys.readouterr() + payload = json.loads(captured.out) + assert isinstance(payload, list) + assert payload + + +def test_cli_stdout_csv(testdir, capsys): + files = os.path.join(testdir, "*.epub") + sys.argv = ["ebookatty", files, "--format", "csv"] + execute() + captured = capsys.readouterr() + lines = [line for line in captured.out.strip().splitlines() if line] + assert lines + assert "," in lines[0] + + +def test_cli_stdout_table_via_dash(testdir, capsys): + files = os.path.join(testdir, "*.epub") + sys.argv = ["ebookatty", files, "-o", "-"] + execute() + captured = capsys.readouterr() + assert "---" in captured.out + + +def test_cli_format_precedence(testdir, outdir): + files = os.path.join(testdir, "*.epub") + outfile = os.path.join(outdir, "forced.json.csv") + sys.argv = ["ebookatty", files, "-o", outfile, "--format", "json"] + execute() + with open(outfile, "r", encoding="utf-8") as fd: + payload = json.load(fd) + assert isinstance(payload, list) + + def test_main_execute(): import sys args = ["ebookatty"] From 29b7712ccfc6b3bd5106f347f6866fb303849623 Mon Sep 17 00:00:00 2001 From: Jacobo de Vera Date: Sun, 4 Jan 2026 16:58:41 +0000 Subject: [PATCH 2/2] fix: remove package.json dependency from setup --- setup.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/setup.py b/setup.py index d9789b4..d8bb6fb 100644 --- a/setup.py +++ b/setup.py @@ -1,25 +1,25 @@ +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import json -import os -from setuptools import find_packages, setup +from pathlib import Path -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -PACKAGE_JSON = os.path.join(BASE_DIR,"package.json") +from setuptools import find_packages, setup -info = json.load(open("package.json")) +BASE_DIR = Path(__file__).parent +DESCRIPTION = "Tool for extracting metadata from common ebook formats" +LONG_DESCRIPTION = (BASE_DIR / "README.md").read_text(encoding="utf-8") -long_description = open("README.md",encoding="utf-8").read() setup( - name=info["name"], - version=info["version"], - description=info["description"], - long_description=long_description, + name="ebookatty", + version="0.3.1", + description=DESCRIPTION, + long_description=LONG_DESCRIPTION, + long_description_content_type="text/markdown", classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", @@ -32,19 +32,17 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", - ], - keywords=info["keywords"], - author=info["author"], - author_email=info["email"], + keywords=["ebook", "metadata", "mobi", "epub"], + author="alexpdev", + author_email="alexpdev@protonmail.com", entry_points={"console_scripts": ["ebookatty = ebookatty.__main__:main"]}, - url=info["url"], + url="https://github.com/alexpdev/ebookatty", project_urls={"Source Code": "https://github.com/alexpdev/ebookatty"}, - license=info["license"], + license="GNU LGPL v3", packages=find_packages(exclude=["tests", "env"]), include_package_data=True, python_requires=">=3.6", - setup_requires=["setuptools"], zip_safe=False, test_suite="complete", )