Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ __example 3__
ebookatty /path/to/specific/ebook.azw3
```

### Output options

* `-o/--output <path>` writes metadata to a file. When the extension is `.json` or `.csv`, the data is emitted in that format. Use `-o -` to route the formatted output to STDOUT without creating a file.
* `-F/--format {table,json,csv}` explicitly selects the output format and overrides any extension-derived choice. `table` matches the default on-screen view shown above.
* When an output file is chosen, nothing is printed to the screen so the data only exists in one place.


__example output__
```
Expand Down
134 changes: 98 additions & 36 deletions ebookatty/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import sys
from glob import glob
from pathlib import Path
from typing import List
from typing import List, Optional, TextIO

from ebookatty import MetadataFetcher
from ebookatty.metadata import MetadataFetcher, format_output


def find_matches(files: List[str]) -> List[str]:
Expand All @@ -49,6 +49,88 @@ def find_matches(files: List[str]) -> List[str]:
return matches


def _determine_format(selected_format: Optional[str], output_target: Optional[str]) -> str:
"""Resolve the format that should be used for emitting metadata."""
if selected_format:
return selected_format
if output_target and output_target != "-":
suffix = Path(output_target).suffix.lower()
if suffix == ".json":
return "json"
if suffix == ".csv":
return "csv"
return "table"


def _normalize_csv_value(record):
if isinstance(record, list):
record = record[0]
if isinstance(record, int):
record = str(record)
if isinstance(record, bytes): # pragma: nocover
try:
record = str(record[0], encoding="utf8", errors="ignore")
except Exception: # pragma: nocover
return ""
return record if isinstance(record, str) else str(record or "")


def _write_csv(datas, stream: TextIO) -> None:
d = set()
for row in datas:
for key in row.keys():
d.add(key)
headers = list(d)
layers = [headers]
for row in datas:
layer = []
for header in headers:
record = row.get(header, "")
record = _normalize_csv_value(record)
layer.append(record)
layers.append(layer)
for layer in layers:
try:
stream.write(",".join(layer) + "\n")
except Exception: # pragma: nocover
continue


def _write_json(datas, stream: TextIO) -> None:
json.dump(datas, stream)
stream.write("\n")


def _write_table(datas, stream: TextIO) -> None:
for row in datas:
if not row:
continue
table = format_output(row, emit=False)
stream.write(table)


def _emit_output(datas, output_format: str, output_target: Optional[str]) -> None:
if output_target and output_target != "-":
kwargs = {"encoding": "utf-8"}
if output_format == "csv":
kwargs["errors"] = "ignore"
stream: TextIO = open(Path(output_target), "w", **kwargs)
close_stream = True
else:
stream = sys.stdout
close_stream = False
try:
if output_format == "json":
_write_json(datas, stream)
elif output_format == "csv":
_write_csv(datas, stream)
elif output_format == "table":
_write_table(datas, stream)
finally:
if close_stream:
stream.close()


def execute():
"""
Execute the program.
Expand All @@ -64,50 +146,30 @@ def execute():
parser.add_argument(
"-o",
"--output",
help="file path where metadata will be written. Acceptable formats include json and csv and are determined based on the file extension. Default is None",
help="file path where metadata will be written. Use '-' to send data to STDOUT. Acceptable formats include json and csv and are determined based on the file extension. Default is None",
action="store",
)
parser.add_argument(
"-F",
"--format",
choices=["table", "json", "csv"],
help="output format to use for metadata. Defaults to 'table' (plain text) unless an output file extension specifies json or csv.",
dest="output_format",
)
if len(sys.argv[1:]) == 0:
sys.argv.append("-h")
args = parser.parse_args(sys.argv[1:])
file_list = args.file
matches = find_matches(file_list)
datas = []
output_format = _determine_format(args.output_format, args.output)
show_table_now = output_format == "table" and not args.output
for match in matches:
fetcher = MetadataFetcher(match)
data = fetcher.get_metadata()
datas.append(data)
if not args.output:
if show_table_now:
fetcher.show_metadata()
if args.output:
path = Path(args.output)
if path.suffix == ".json":
json.dump(datas, open(path, "wt"))
elif path.suffix == ".csv":
d = set()
for row in datas:
for key in row.keys():
d.add(key)
headers = list(d)
layers = [headers]
for row in datas:
layer = []
for header in headers:
record = row.get(header, "")
if isinstance(record, list):
record = record[0]
if isinstance(record, int):
record = str(record)
if isinstance(record, bytes): # pragma: nocover
try:
record = str(record[0], encoding="utf8", errors="ignore")
except:
continue
layer.append(record)
layers.append(layer)
with open(path, "wt", encoding="utf-8", errors="ignore") as fd:
for layer in layers:
try:
fd.write(",".join(layer) + "\n")
except:
continue
needs_output = bool(args.output) or output_format in ("json", "csv")
if needs_output:
_emit_output(datas, output_format, args.output)
7 changes: 4 additions & 3 deletions ebookatty/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def fetch_metadata(path: Union[str | Path]) -> Dict[str, str]:
return None


def format_output(book: dict) -> str:
def format_output(book: dict, emit: bool = True) -> str:
"""
Format the output for printing to STDOUT.

Expand Down Expand Up @@ -147,8 +147,9 @@ def format_output(book: dict) -> str:
output.insert(0, "\n" + ("-" * long_line))
output.append(("-" * long_line) + "\n")
final = "\n".join(output)
print(final)
return output
if emit:
print(final)
return final


def text_sections(section_size: int, text: str) -> Generator:
Expand Down
34 changes: 16 additions & 18 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json
import os
from setuptools import find_packages, setup
from pathlib import Path

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
PACKAGE_JSON = os.path.join(BASE_DIR,"package.json")
from setuptools import find_packages, setup

info = json.load(open("package.json"))
BASE_DIR = Path(__file__).parent
DESCRIPTION = "Tool for extracting metadata from common ebook formats"
LONG_DESCRIPTION = (BASE_DIR / "README.md").read_text(encoding="utf-8")

long_description = open("README.md",encoding="utf-8").read()

setup(
name=info["name"],
version=info["version"],
description=info["description"],
long_description=long_description,
name="ebookatty",
version="0.3.1",
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
long_description_content_type="text/markdown",
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
Expand All @@ -32,19 +32,17 @@
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",

],
keywords=info["keywords"],
author=info["author"],
author_email=info["email"],
keywords=["ebook", "metadata", "mobi", "epub"],
author="alexpdev",
author_email="alexpdev@protonmail.com",
entry_points={"console_scripts": ["ebookatty = ebookatty.__main__:main"]},
url=info["url"],
url="https://github.com/alexpdev/ebookatty",
project_urls={"Source Code": "https://github.com/alexpdev/ebookatty"},
license=info["license"],
license="GNU LGPL v3",
packages=find_packages(exclude=["tests", "env"]),
include_package_data=True,
python_requires=">=3.6",
setup_requires=["setuptools"],
zip_safe=False,
test_suite="complete",
)
39 changes: 39 additions & 0 deletions tests/test_ebookatty.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import sys
import shutil
import os
Expand Down Expand Up @@ -62,6 +63,44 @@ def test_cli(testdir, flag, outdir, pattern, ext):
assert not out


def test_cli_stdout_json(testdir, capsys):
files = os.path.join(testdir, "*.epub")
sys.argv = ["ebookatty", files, "--format", "json"]
execute()
captured = capsys.readouterr()
payload = json.loads(captured.out)
assert isinstance(payload, list)
assert payload


def test_cli_stdout_csv(testdir, capsys):
files = os.path.join(testdir, "*.epub")
sys.argv = ["ebookatty", files, "--format", "csv"]
execute()
captured = capsys.readouterr()
lines = [line for line in captured.out.strip().splitlines() if line]
assert lines
assert "," in lines[0]


def test_cli_stdout_table_via_dash(testdir, capsys):
files = os.path.join(testdir, "*.epub")
sys.argv = ["ebookatty", files, "-o", "-"]
execute()
captured = capsys.readouterr()
assert "---" in captured.out


def test_cli_format_precedence(testdir, outdir):
files = os.path.join(testdir, "*.epub")
outfile = os.path.join(outdir, "forced.json.csv")
sys.argv = ["ebookatty", files, "-o", outfile, "--format", "json"]
execute()
with open(outfile, "r", encoding="utf-8") as fd:
payload = json.load(fd)
assert isinstance(payload, list)


def test_main_execute():
import sys
args = ["ebookatty"]
Expand Down