Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/github-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev"]
use-crypto-lib: ["cryptography"]
include:
- python-version: "3.7"
Expand Down Expand Up @@ -90,7 +90,7 @@ jobs:
cache-dependency-path: '**/requirements/ci.txt'
- name: Setup Python (3.11+)
uses: actions/setup-python@v5
if: matrix.python-version == '3.11' || matrix.python-version == '3.12'
if: matrix.python-version == '3.11' || matrix.python-version == '3.12' || matrix.python-version == '3.13-dev'
with:
python-version: ${{ matrix.python-version }}
allow-prereleases: true
Expand All @@ -106,7 +106,7 @@ jobs:
- name: Install requirements (Python 3.11+)
run: |
pip install -r requirements/ci-3.11.txt
if: matrix.python-version == '3.11' || matrix.python-version == '3.12'
if: matrix.python-version == '3.11' || matrix.python-version == '3.12' || matrix.python-version == '3.13-dev'
- name: Remove pycryptodome and cryptography
run: |
pip uninstall pycryptodome cryptography -y
Expand Down Expand Up @@ -215,8 +215,8 @@ jobs:
- name: Check Number of Downloaded Files
run: |
downloaded_files_count=$(find \.coverage* -type f | wc -l)
if [ $downloaded_files_count -eq 8 ]; then
echo "The expected number of files (8) were downloaded."
if [ $downloaded_files_count -eq 9 ]; then
echo "The expected number of files (9) were downloaded."
else
echo "ERROR: Expected 8 files, but found $downloaded_files_count files."
exit 1
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ on:
permissions:
contents: write

env:
HEAD_COMMIT_MESSAGE: ${{ github.event.head_commit.message }}

jobs:
build_and_publish:
name: Publish a new version
Expand All @@ -24,15 +27,15 @@ jobs:
- name: Extract version from commit message
id: extract_version
run: |
VERSION=$(echo "${{ github.event.head_commit.message }}" | grep -oP '(?<=REL: )\d+\.\d+\.\d+')
VERSION=$(echo "$HEAD_COMMIT_MESSAGE" | grep -oP '(?<=REL: )\d+\.\d+\.\d+')
echo "version=$VERSION" >> $GITHUB_OUTPUT

- name: Extract tag message from commit message
id: extract_message
run: |
VERSION="${{ steps.extract_version.outputs.version }}"
delimiter="$(openssl rand -hex 8)"
MESSAGE=$(echo "${{ github.event.head_commit.message }}" | sed "0,/REL: $VERSION/s///" )
MESSAGE=$(echo "$HEAD_COMMIT_MESSAGE" | sed "0,/REL: $VERSION/s///" )
echo "message<<${delimiter}" >> $GITHUB_OUTPUT
echo "$MESSAGE" >> $GITHUB_OUTPUT
echo "${delimiter}" >> $GITHUB_OUTPUT
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ history and [GitHub's 'Contributors' feature](https://github.com/py-pdf/pypdf/gr
* [ediamondscience](https://github.com/ediamondscience)
* [Ermeson, Felipe](https://github.com/FelipeErmeson)
* [Freitag, François](https://github.com/francoisfreitag)
* [Gagnon, William G.](https://github.com/williamgagnon)
* [Górny, Michał](https://github.com/mgorny)
* [Grillo, Miguel](https://github.com/Ineffable22)
* [Gutteridge, David H.](https://github.com/dhgutteridge)
Expand Down
30 changes: 11 additions & 19 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from typing import Any, Dict, List, Tuple, Union, cast

from ._codecs import adobe_glyphs, charset_encoding
from ._utils import b_, logger_error, logger_warning
from ._utils import logger_error, logger_warning
from .generic import (
DecodedStreamObject,
DictionaryObject,
IndirectObject,
NullObject,
StreamObject,
)
Expand Down Expand Up @@ -258,7 +257,7 @@ def prepare_cm(ft: DictionaryObject) -> bytes:
tu = ft["/ToUnicode"]
cm: bytes
if isinstance(tu, StreamObject):
cm = b_(cast(DecodedStreamObject, ft["/ToUnicode"]).get_data())
cm = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
elif isinstance(tu, str) and tu.startswith("/Identity"):
# the full range 0000-FFFF will be processed
cm = b"beginbfrange\n<0000> <0001> <0000>\nendbfrange"
Expand Down Expand Up @@ -448,34 +447,27 @@ def compute_space_width(
en: int = cast(int, ft["/LastChar"])
if st > space_code or en < space_code:
raise Exception("Not in range")
if w[space_code - st] == 0:
if w[space_code - st].get_object() == 0:
raise Exception("null width")
sp_width = w[space_code - st]
sp_width = w[space_code - st].get_object()
except Exception:
if "/FontDescriptor" in ft and "/MissingWidth" in cast(
DictionaryObject, ft["/FontDescriptor"]
):
sp_width = ft["/FontDescriptor"]["/MissingWidth"] # type: ignore
sp_width = ft["/FontDescriptor"]["/MissingWidth"].get_object() # type: ignore
else:
# will consider width of char as avg(width)/2
m = 0
cpt = 0
for x in w:
if x > 0:
m += x
for xx in w:
xx = xx.get_object()
if xx > 0:
m += xx
cpt += 1
sp_width = m / max(1, cpt) / 2

if isinstance(sp_width, IndirectObject):
# According to
# 'Table 122 - Entries common to all font descriptors (continued)'
# the MissingWidth should be a number, but according to #2286 it can
# be an indirect object
obj = sp_width.get_object()
if obj is None or isinstance(obj, NullObject):
return 0.0
return obj # type: ignore

if sp_width is None or isinstance(sp_width, NullObject):
sp_width = 0.0
return sp_width


Expand Down
3 changes: 1 addition & 2 deletions pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
from ._page import PageObject, _VirtualList
from ._page_labels import index2label as page_index2page_label
from ._utils import (
b_,
deprecate_with_replacement,
logger_warning,
parse_iso8824_date,
Expand Down Expand Up @@ -1258,7 +1257,7 @@ def xfa(self) -> Optional[Dict[str, Any]]:
if isinstance(f, IndirectObject):
field = cast(Optional[EncodedStreamObject], f.get_object())
if field:
es = zlib.decompress(b_(field._data))
es = zlib.decompress(field._data)
retval[tag] = es
return retval

Expand Down
6 changes: 3 additions & 3 deletions pypdf/_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
rc4_encrypt,
)

from ._utils import b_, logger_warning
from ._utils import logger_warning
from .generic import (
ArrayObject,
ByteStringObject,
Expand Down Expand Up @@ -78,7 +78,7 @@ def encrypt_object(self, obj: PdfObject) -> PdfObject:
elif isinstance(obj, StreamObject):
obj2 = StreamObject()
obj2.update(obj)
obj2.set_data(self.stm_crypt.encrypt(b_(obj._data)))
obj2.set_data(self.stm_crypt.encrypt(obj._data))
for key, value in obj.items(): # Dont forget the Stream dict.
obj2[key] = self.encrypt_object(value)
obj = obj2
Expand All @@ -96,7 +96,7 @@ def decrypt_object(self, obj: PdfObject) -> PdfObject:
data = self.str_crypt.decrypt(obj.original_bytes)
obj = create_string_object(data)
elif isinstance(obj, StreamObject):
obj._data = self.stm_crypt.decrypt(b_(obj._data))
obj._data = self.stm_crypt.decrypt(obj._data)
for key, value in obj.items(): # Dont forget the Stream dict.
obj[key] = self.decrypt_object(value)
elif isinstance(obj, DictionaryObject):
Expand Down
10 changes: 9 additions & 1 deletion pypdf/_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
from ._utils import (
StrByteType,
deprecate_with_replacement,
str_,
)
from ._writer import PdfWriter
from .constants import GoToActionArguments, TypArguments, TypFitArguments
Expand Down Expand Up @@ -82,6 +81,15 @@ def __init__(self, pagedata: PageObject, src: PdfReader, id: int) -> None:
self.id = id


# transfered from _utils : as this function is only required here
# and merger will be soon deprecated
def str_(b: Any) -> str: # pragma: no cover
if isinstance(b, bytes):
return b.decode("latin-1")
else:
return str(b) # will return b.__str__() if defined


class PdfMerger:
"""
Use :class:`PdfWriter` instead.
Expand Down
16 changes: 8 additions & 8 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ def _add_transformation_matrix(
FloatObject(e),
FloatObject(f),
],
" cm",
b"cm",
],
)
return contents
Expand All @@ -870,7 +870,7 @@ def _get_contents_as_bytes(self) -> Optional[bytes]:
if isinstance(obj, list):
return b"".join(x.get_object().get_data() for x in obj)
else:
return cast(bytes, cast(EncodedStreamObject, obj).get_data())
return cast(EncodedStreamObject, obj).get_data()
else:
return None

Expand Down Expand Up @@ -1063,11 +1063,11 @@ def _merge_page(
rect.height,
],
),
"re",
b"re",
),
)
page2content.operations.insert(1, ([], "W"))
page2content.operations.insert(2, ([], "n"))
page2content.operations.insert(1, ([], b"W"))
page2content.operations.insert(2, ([], b"n"))
if page2transformation is not None:
page2content = page2transformation(page2content)
page2content = PageObject._content_stream_rename(
Expand Down Expand Up @@ -1201,11 +1201,11 @@ def _merge_page_writer(
rect.height,
],
),
"re",
b"re",
),
)
page2content.operations.insert(1, ([], "W"))
page2content.operations.insert(2, ([], "n"))
page2content.operations.insert(1, ([], b"W"))
page2content.operations.insert(2, ([], b"n"))
if page2transformation is not None:
page2content = page2transformation(page2content)
page2content = PageObject._content_stream_rename(
Expand Down
5 changes: 2 additions & 3 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
from ._utils import (
StrByteType,
StreamType,
b_,
logger_warning,
read_non_whitespace,
read_previous_line,
Expand Down Expand Up @@ -328,7 +327,7 @@ def _get_object_from_stream(
assert cast(str, obj_stm["/Type"]) == "/ObjStm"
# /N is the number of indirect objects in the stream
assert idx < obj_stm["/N"]
stream_data = BytesIO(b_(obj_stm.get_data()))
stream_data = BytesIO(obj_stm.get_data())
for i in range(obj_stm["/N"]): # type: ignore
read_non_whitespace(stream_data)
stream_data.seek(-1, 1)
Expand Down Expand Up @@ -932,7 +931,7 @@ def _read_pdf15_xref_stream(
xrefstream = cast(ContentStream, read_object(stream, self))
assert cast(str, xrefstream["/Type"]) == "/XRef"
self.cache_indirect_object(generation, idnum, xrefstream)
stream_data = BytesIO(b_(xrefstream.get_data()))
stream_data = BytesIO(xrefstream.get_data())
# Index pairs specify the subsections in the dictionary. If
# none create one subsection that spans everything.
idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
Expand Down
26 changes: 17 additions & 9 deletions pypdf/_text_extraction/_layout_mode/_font.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Font constants and classes for "layout" mode text operations"""

from dataclasses import dataclass, field
from typing import Any, Dict, Sequence, Union
from typing import Any, Dict, Sequence, Union, cast

from ...errors import ParseError
from ...generic import IndirectObject
from ._font_widths import STANDARD_WIDTHS

Expand Down Expand Up @@ -58,6 +59,7 @@ def __post_init__(self) -> None:
skip_count = 0
_w = d_font.get("/W", [])
for idx, w_entry in enumerate(_w):
w_entry = w_entry.get_object()
if skip_count:
skip_count -= 1
continue
Expand All @@ -66,32 +68,38 @@ def __post_init__(self) -> None:
# warning and or use reader's "strict" to force an ex???
continue
# check for format (1): `int [int int int int ...]`
if isinstance(_w[idx + 1], Sequence):
start_idx, width_list = _w[idx : idx + 2]
w_next_entry = _w[idx + 1].get_object()
if isinstance(w_next_entry, Sequence):
start_idx, width_list = w_entry, w_next_entry
self.width_map.update(
{
ord_map[_cidx]: _width
for _cidx, _width in zip(
range(start_idx, start_idx + len(width_list), 1),
range(cast(int, start_idx), cast(int, start_idx) + len(width_list), 1),
width_list,
)
if _cidx in ord_map
}
)
skip_count = 1
# check for format (2): `int int int`
if not isinstance(_w[idx + 1], Sequence) and not isinstance(
_w[idx + 2], Sequence
):
start_idx, stop_idx, const_width = _w[idx : idx + 3]
elif isinstance(w_next_entry, (int, float)) and isinstance(_w[idx + 2].get_object(), (int, float)):
start_idx, stop_idx, const_width = w_entry, w_next_entry, _w[idx + 2].get_object()
self.width_map.update(
{
ord_map[_cidx]: const_width
for _cidx in range(start_idx, stop_idx + 1, 1)
for _cidx in range(cast(int, start_idx), cast(int, stop_idx + 1), 1)
if _cidx in ord_map
}
)
skip_count = 2
else:
# Note: this doesn't handle the case of out of bounds (reaching the end of the width definitions
# while expecting more elements). This raises an IndexError which is sufficient.
raise ParseError(
f"Invalid font width definition. Next elements: {w_entry}, {w_next_entry}, {_w[idx + 2]}"
) # pragma: no cover

if not self.width_map and "/BaseFont" in self.font_dictionary:
for key in STANDARD_WIDTHS:
if self.font_dictionary["/BaseFont"].startswith(f"/{key}"):
Expand Down
Loading