Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 44 additions & 25 deletions magic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,27 @@ class Magic:
Magic is a wrapper around the libmagic C library.
"""

def __init__(self, mime=False, magic_file=None, mime_encoding=False,
keep_going=False, uncompress=False, raw=False, extension=False,
follow_symlinks=False, check_tar=True, check_soft=True,
check_apptype=True, check_elf=True, check_text=True,
check_cdf=True, check_csv=True, check_encoding=True,
check_json=True, check_simh=True):
def __init__(
self,
mime=False,
magic_file=None,
mime_encoding=False,
keep_going=False,
uncompress=False,
raw=False,
extension=False,
follow_symlinks=False,
check_tar=True,
check_soft=True,
check_apptype=True,
check_elf=True,
check_text=True,
check_cdf=True,
check_csv=True,
check_encoding=True,
check_json=True,
check_simh=True,
):
"""
Create a new libmagic wrapper.

Expand Down Expand Up @@ -101,7 +116,9 @@ def __init__(self, mime=False, magic_file=None, mime_encoding=False,
# MAGIC_EXTENSION was added in 523 or 524, so bail if
# it doesn't appear to be available
if extension and (not _has_version or version() < 524):
raise NotImplementedError('MAGIC_EXTENSION is not supported in this version of libmagic')
raise NotImplementedError(
"MAGIC_EXTENSION is not supported in this version of libmagic"
)

# For https://github.com/ahupp/python-magic/issues/190
# libmagic has fixed internal limits that some files exceed, causing
Expand All @@ -128,7 +145,7 @@ def from_buffer(self, buf):
# which is not what libmagic expects
# NEXTBREAK: only take bytes
if type(buf) == str and str != bytes:
buf = buf.encode('utf-8', errors='replace')
buf = buf.encode("utf-8", errors="replace")
return maybe_decode(magic_buffer(self.cookie, buf))
except MagicException as e:
return self._handle509Bug(e)
Expand Down Expand Up @@ -176,7 +193,7 @@ def __del__(self):
# incorrect fix for a threading problem, however I'm leaving
# it in because it's harmless and I'm slightly afraid to
# remove it.
if hasattr(self, 'cookie') and self.cookie and magic_close:
if hasattr(self, "cookie") and self.cookie and magic_close:
magic_close(self.cookie)
self.cookie = None

Expand All @@ -192,7 +209,7 @@ def _get_magic_type(mime):


def from_file(filename, mime=False):
""""
"""
Accepts a filename and returns the detected filetype. Return
value is the mimetype if mime=True, otherwise a human readable
name.
Expand Down Expand Up @@ -230,7 +247,9 @@ def from_descriptor(fd, mime=False):
m = _get_magic_type(mime)
return m.from_descriptor(fd)


from . import loader

libmagic = loader.load_lib()

magic_t = ctypes.c_void_p
Expand Down Expand Up @@ -261,20 +280,23 @@ def maybe_decode(s):
else:
# backslashreplace here because sometimes libmagic will return metadata in the charset
# of the file, which is unknown to us (e.g the title of a Word doc)
return s.decode('utf-8', 'backslashreplace')
return s.decode("utf-8", "backslashreplace")


try:
from os import PathLike

def unpath(filename):
if isinstance(filename, PathLike):
return filename.__fspath__()
else:
return filename
except ImportError:

def unpath(filename):
return filename


def coerce_filename(filename):
if filename is None:
return None
Expand All @@ -286,12 +308,11 @@ def coerce_filename(filename):
# then you'll get inconsistent behavior (crashes) depending on the user's
# LANG environment variable
# NEXTBREAK: remove
is_unicode = (sys.version_info[0] <= 2 and
isinstance(filename, unicode)) or \
(sys.version_info[0] >= 3 and
isinstance(filename, str))
is_unicode = (sys.version_info[0] <= 2 and isinstance(filename, unicode)) or (
sys.version_info[0] >= 3 and isinstance(filename, str)
)
if is_unicode:
return filename.encode('utf-8', 'surrogateescape')
return filename.encode("utf-8", "surrogateescape")
else:
return filename

Expand Down Expand Up @@ -370,7 +391,7 @@ def magic_load(cookie, filename):
magic_compile.argtypes = [magic_t, c_char_p]

_has_param = False
if hasattr(libmagic, 'magic_setparam') and hasattr(libmagic, 'magic_getparam'):
if hasattr(libmagic, "magic_setparam") and hasattr(libmagic, "magic_getparam"):
_has_param = True
_magic_setparam = libmagic.magic_setparam
_magic_setparam.restype = c_int
Expand Down Expand Up @@ -443,8 +464,8 @@ def version():
MAGIC_NO_CHECK_CDF = 0x0040000 # Don't check for CDF files
MAGIC_NO_CHECK_CSV = 0x0080000 # Don't check for CSV files
MAGIC_NO_CHECK_ENCODING = 0x0200000 # Don't check text encodings
MAGIC_NO_CHECK_JSON = 0x0400000 # Don't check for JSON files
MAGIC_NO_CHECK_SIMH = 0x0800000 # Don't check for SIMH tape files
MAGIC_NO_CHECK_JSON = 0x0400000 # Don't check for JSON files
MAGIC_NO_CHECK_SIMH = 0x0800000 # Don't check for SIMH tape files

MAGIC_PARAM_INDIR_MAX = 0 # Recursion limit for indirect magic
MAGIC_PARAM_NAME_MAX = 1 # Use count limit for name/use magic
Expand All @@ -468,22 +489,20 @@ def _(*args, **kwargs):
warnings.warn(
"Using compatibility mode with libmagic's python binding. "
"See https://github.com/ahupp/python-magic/blob/master/COMPAT.md for details.",
PendingDeprecationWarning)
PendingDeprecationWarning,
)

return fn(*args, **kwargs)

return _

fn = ['detect_from_filename',
'detect_from_content',
'detect_from_fobj',
'open']
fn = ["detect_from_filename", "detect_from_content", "detect_from_fobj", "open"]
for fname in fn:
to_module[fname] = deprecation_wrapper(compat.__dict__[fname])

# copy constants over, ensuring there's no conflicts
is_const_re = re.compile("^[A-Z_]+$")
allowed_inconsistent = set(['MAGIC_MIME'])
allowed_inconsistent = set(["MAGIC_MIME"])
for name, value in compat.__dict__.items():
if is_const_re.match(name):
if name in to_module:
Expand Down
20 changes: 19 additions & 1 deletion magic/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,25 @@ class Magic:
flags: int = ...
cookie: Any = ...
lock: threading.Lock = ...
def __init__(self, mime: bool = ..., magic_file: Optional[Any] = ..., mime_encoding: bool = ..., keep_going: bool = ..., uncompress: bool = ..., raw: bool = ..., extension: bool = ..., follow_symlinks: bool = ..., check_tar: bool = ..., check_soft: bool = ..., check_apptype: bool = ..., check_elf: bool = ..., check_text: bool = ..., check_encoding: bool = ..., check_json: bool = ..., check_simh: bool = ...) -> None: ...
def __init__(
self,
mime: bool = ...,
magic_file: Optional[Any] = ...,
mime_encoding: bool = ...,
keep_going: bool = ...,
uncompress: bool = ...,
raw: bool = ...,
extension: bool = ...,
follow_symlinks: bool = ...,
check_tar: bool = ...,
check_soft: bool = ...,
check_apptype: bool = ...,
check_elf: bool = ...,
check_text: bool = ...,
check_encoding: bool = ...,
check_json: bool = ...,
check_simh: bool = ...,
) -> None: ...
def from_buffer(self, buf: Union[bytes, str]) -> Text: ...
def from_file(self, filename: Union[bytes, str, PathLike]) -> Text: ...
def from_descriptor(self, fd: int, mime: bool = ...) -> Text: ...
Expand Down
3 changes: 3 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
exclude = ["magic/compat.py"]


22 changes: 12 additions & 10 deletions test/libmagic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,20 @@
import os.path

# magic_descriptor is broken (?) in centos 7, so don't run those tests
SKIP_FROM_DESCRIPTOR = bool(os.environ.get('SKIP_FROM_DESCRIPTOR'))
SKIP_FROM_DESCRIPTOR = bool(os.environ.get("SKIP_FROM_DESCRIPTOR"))

TESTDATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'testdata'))
TESTDATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "testdata"))


class MagicTestCase(unittest.TestCase):
filename = os.path.join(TESTDATA_DIR, 'test.pdf')
expected_mime_type = 'application/pdf'
expected_encoding = 'us-ascii'
expected_name = ('PDF document, version 1.2', 'PDF document, version 1.2, 2 pages', 'PDF document, version 1.2, 2 page(s)')
filename = os.path.join(TESTDATA_DIR, "test.pdf")
expected_mime_type = "application/pdf"
expected_encoding = "us-ascii"
expected_name = (
"PDF document, version 1.2",
"PDF document, version 1.2, 2 pages",
"PDF document, version 1.2, 2 page(s)",
)

def assert_result(self, result):
self.assertEqual(result.mime_type, self.expected_mime_type)
Expand All @@ -27,11 +31,9 @@ def test_detect_from_filename(self):
self.assert_result(result)

def test_detect_from_fobj(self):

if SKIP_FROM_DESCRIPTOR:
self.skipTest("magic_descriptor is broken in this version of libmagic")


with open(self.filename) as fobj:
result = magic.detect_from_fobj(fobj)
self.assert_result(result)
Expand All @@ -41,10 +43,10 @@ def test_detect_from_content(self):
# this avoids hitting a bug in python3+libfile bindings
# see https://github.com/ahupp/python-magic/issues/152
# for a similar issue
with open(self.filename, 'rb') as fobj:
with open(self.filename, "rb") as fobj:
result = magic.detect_from_content(fobj.read(4096))
self.assert_result(result)


if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()