From 489da4a2da8afb8ce8a8d4d682390f2ec98937ea Mon Sep 17 00:00:00 2001
From: ross-spencer <all.along.the.watchtower2001+github@gmail.com>
Date: Tue, 11 Nov 2025 22:47:21 +0100
Subject: [PATCH 1/7] Add PRONOM export of registry

This feature demonstrates how to use JSONID as a high-level language for
defining PRONOM signatures. JSONID's registry is output as a PRONOM signature
file and should identify JSON objects via DROID.

NB. the format isn't compatible with Siegfried as of yet. It uses modern
DROID syntax.

A small number of other fixes have been made at the same time as I worked
throuhg the export functionality. CHanges are only minor, e.g. making sure
json2json continued to work whre an earlier commit introduced an error.
---
 .gitignore                    |   1 +
 justfile                      |  26 ++
 src/jsonid/export.py          | 115 +++++-
 src/jsonid/export_helpers.py  |  30 ++
 src/jsonid/file_processing.py |  21 ++
 src/jsonid/helpers.py         |  25 +-
 src/jsonid/jsonid.py          |   9 +-
 src/jsonid/pronom.py          | 668 ++++++++++++++++++++++++++++++++++
 src/utils/json2json.py        |   4 +-
 tests/test_pronom_export.py   | 420 +++++++++++++++++++++
 10 files changed, 1303 insertions(+), 16 deletions(-)
 create mode 100644 src/jsonid/export_helpers.py
 create mode 100644 src/jsonid/pronom.py
 create mode 100644 tests/test_pronom_export.py

diff --git a/.gitignore b/.gitignore
index 07e77b2..6071354 100644
--- a/.gitignore
+++ b/.gitignore
@@ -141,3 +141,4 @@ jsonid-integration-files/
 
 # Secreta
 token.pypi
+jsonid_pronom.xml
diff --git a/justfile b/justfile
index bfb4a32..9be2689 100644
--- a/justfile
+++ b/justfile
@@ -83,6 +83,10 @@ check-debug:
 @hexdump file:
    hexdump -v -e '"\\\x" 1/1 "%02x"' {{file}}
 
+# Hexdump bytes only.
+@hexdump-plain file:
+    hexdump -ve '1/1 "%.2x"' {{file}}
+
 # code coverage
 coverage:
  python -m tox -e coverage
@@ -102,3 +106,25 @@ lookup-ref-ex:
 # lookup ref
 @lookup-ref ref:
  python jsonid.py lookup {{ref}}
+
+# export as PRONOM
+pronom:
+ python jsonid.py --pronom --debug
+
+# move pronom to .droid folder
+pronom-to-droid:
+ ~/.droid6/clean.sh
+ rm -f ~/.droid6/signature_files/jsonid_pronom.xml
+ cp jsonid_pronom.xml ~/.droid6/signature_files/
+
+dir := `pwd`
+
+# load pronom to siegfried via roy
+roy:
+ @echo {{dir}}
+ roy build --noreports -extend {{dir}}/jsonid_pronom.xml
+
+# droid-cli
+droid-cli path: pronom-to-droid
+ java -jar ~/dp/droid/droid-binary-6.8.0-bin/droid-command-line-6.8.0.jar -s 1
+ java -jar ~/dp/droid/droid-binary-6.8.0-bin/droid-command-line-6.8.0.jar -ri  {{path}}
diff --git a/src/jsonid/export.py b/src/jsonid/export.py
index c062aa8..b128b5a 100644
--- a/src/jsonid/export.py
+++ b/src/jsonid/export.py
@@ -1,25 +1,35 @@
 """Functions to support export."""
 
+import copy
 import datetime
 import json
 import logging
 from datetime import timezone
+from typing import Final
 
 try:
+    import pronom
     import registry_data
     import version
 except ModuleNotFoundError:
     try:
-        from src.jsonid import registry_data, version
+        from src.jsonid import pronom, registry_data, version
     except ModuleNotFoundError:
-        from jsonid import registry_data, version
+        from jsonid import pronom, registry_data, version
 
 logger = logging.getLogger(__name__)
 
 
+PRONOM_FILENAME: Final[str] = "jsonid_pronom.xml"
+
+
+class PRONOMException(Exception):
+    """Exception class if we can't create a PRONOM signature as expected."""
+
+
 def exportJSON() -> None:  # pylint: disable=C0103
     """Export to JSON."""
-    logger.debug("exporting registry ad JSON")
+    logger.debug("exporting registry as JSON")
     data = registry_data.registry()
     json_obj = []
     id_ = {
@@ -35,3 +45,102 @@ def exportJSON() -> None:  # pylint: disable=C0103
     for datum in data:
         json_obj.append(datum.json())
     print(json.dumps(json_obj, indent=2))
+
+
+def export_pronom() -> None:
+    """Export a PRONOM compatible set of signatures.
+
+    Export is done in two phases. A set of proposed "Baseline" JSON
+    signatures to catch many JSON instances.
+
+    Second the JSONID registry is exported.
+
+    Every export has a priority over the other so that there should
+    be no multiple identification results.
+    """
+
+    # pylint: disable=R0914; too-many local variables.
+
+    logger.debug("exporting registry as PRONOM")
+
+    reg_data = registry_data.registry()
+    formats = []
+
+    encodings = ("UTF-8", "UTF-16", "UTF-16BE", "UTF-32LE")
+    priorities = []
+
+    increment_id = 0
+
+    for encoding in encodings:
+        all_baseline = pronom.create_baseline_json_sequences(encoding)
+        for baseline in all_baseline:
+            increment_id += 1
+            fmt = pronom.Format(
+                id=increment_id,
+                name=f"JSON (Baseline - fmt/817) ({encoding})",
+                version="",
+                puid="jsonid:0000",
+                mime="application/json",
+                classification="structured text",
+                external_signatures=[
+                    pronom.ExternalSignature(
+                        id=increment_id,
+                        signature="json",
+                        type=pronom.EXT,
+                    )
+                ],
+                internal_signatures=[baseline],
+                priorities=priorities,
+            )
+            priorities.append(f"{increment_id}")
+            formats.append(fmt)
+
+    for encoding in encodings:
+        for entry in reg_data:
+            increment_id += 1
+            json_puid = f"{entry.json()['identifier']};{encoding}"
+            name_ = f"{entry.json()['name'][0]['@en']} ({encoding})"
+            markers = entry.json()["markers"]
+            try:
+                mime = entry.json()["mime"][0]
+            except IndexError:
+                mime = ""
+            try:
+                sequences = pronom.process_markers(
+                    markers.copy(),
+                    increment_id,
+                    encoding=encoding,
+                )
+            except pronom.UnprocessableEntity as err:
+                logger.error(
+                    "%s %s: cannot handle: %s",
+                    json_puid,
+                    name_,
+                    err,
+                )
+                for marker in markers:
+                    logger.debug("--- START ---")
+                    logger.debug("marker: %s", marker)
+                    logger.debug("---  END  ---")
+                continue
+            fmt = pronom.Format(
+                id=increment_id,
+                name=name_,
+                version="",
+                puid=json_puid,
+                mime=mime,
+                classification="structured text",
+                external_signatures=[
+                    pronom.ExternalSignature(
+                        id=increment_id,
+                        signature="json",
+                        type=pronom.EXT,
+                    )
+                ],
+                internal_signatures=sequences,
+                priorities=copy.deepcopy(list(set(priorities))),
+            )
+            priorities.append(f"{increment_id}")
+            formats.append(fmt)
+
+    pronom.process_formats_and_save(formats, PRONOM_FILENAME)
diff --git a/src/jsonid/export_helpers.py b/src/jsonid/export_helpers.py
new file mode 100644
index 0000000..0dfec2f
--- /dev/null
+++ b/src/jsonid/export_helpers.py
@@ -0,0 +1,30 @@
+"""Helpers for the export functions."""
+
+import datetime
+from datetime import timezone
+from typing import Final
+from xml.dom.minidom import parseString
+
+UTC_TIME_FORMAT: Final[str] = "%Y-%m-%dT%H:%M:%SZ"
+
+
+def get_utc_timestamp_now():
+    """Get a formatted UTC timestamp for 'now' that can be used when
+    a timestamp is needed.
+    """
+    return datetime.datetime.now(timezone.utc).strftime(UTC_TIME_FORMAT)
+
+
+def new_prettify(c):
+    """Remove excess newlines from DOM output.
+
+    via: https://stackoverflow.com/a/14493981
+    """
+    reparsed = parseString(c)
+    return "\n".join(
+        [
+            line
+            for line in reparsed.toprettyxml(indent=" " * 2).split("\n")
+            if line.strip()
+        ]
+    )
diff --git a/src/jsonid/file_processing.py b/src/jsonid/file_processing.py
index 1d083a4..1da44ee 100644
--- a/src/jsonid/file_processing.py
+++ b/src/jsonid/file_processing.py
@@ -338,6 +338,27 @@ async def identify_plaintext_bytestream(
 
     If analysis is `True` we try to return more low-level file
     information to help folks make appraisal decisions.
+
+    Encodings in Python are split into the following, where UTF-32 on
+    its own is a little confusing. If WE are writing the encoding then
+    I believe it leaves off the byte-order-marker and we want to
+    select UTF-32LE to make sure it is written.
+
+    If we are decoding, then I don't think it matters. I think we
+    try to decode and if it works it works.
+
+    Encodings:
+
+        "UTF-8",
+        "UTF-16",
+        "UTF-16LE",
+        "UTF-16BE",
+        "UTF-32",
+        "UTF-32LE",
+        "UTF-32BE",
+        "SHIFT-JIS",
+        "BIG5",
+
     """
 
     # pylint: disable=R0911
diff --git a/src/jsonid/helpers.py b/src/jsonid/helpers.py
index ca28bc1..3776fc1 100644
--- a/src/jsonid/helpers.py
+++ b/src/jsonid/helpers.py
@@ -2,7 +2,7 @@
 
 import logging
 import time
-from typing import Union
+from typing import Final, Union
 
 try:
     import htm_template
@@ -75,6 +75,15 @@ def format_marker(marker_text: str, marker: dict) -> str:
     return f"{marker_text}{marker_formatted}\n"
 
 
+TYPE_BOOL: Final[str] = "bool"
+TYPE_FLOAT: Final[str] = "float"
+TYPE_INTEGER: Final[str] = "integer"
+TYPE_LIST: Final[str] = "list"
+TYPE_NONE: Final[str] = "NoneType"
+TYPE_MAP: Final[str] = "map"
+TYPE_STRING: Final[str] = "string"
+
+
 def substitute_type_text(replace_me: Union[str, type]):
     """Output a text substitution for a type that will otherwise not
     pretty-print.
@@ -83,19 +92,19 @@ def substitute_type_text(replace_me: Union[str, type]):
     # pylint: disable=R0911
 
     if replace_me.__name__ == "dict":
-        return "map"
+        return TYPE_MAP
     if replace_me.__name__ == "int":
-        return "integer"
+        return TYPE_INTEGER
     if replace_me.__name__ == "list":
-        return "list"
+        return TYPE_LIST
     if replace_me.__name__ == "str":
-        return "string"
+        return TYPE_STRING
     if replace_me.__name__ == "float":
-        return "float"
+        return TYPE_FLOAT
     if replace_me.__name__ == "bool":
-        return "bool"
+        return TYPE_BOOL
     if replace_me.__name__ == "NoneType":
-        return "NoneType"
+        return TYPE_NONE
     if not isinstance(replace_me, type):
         pass
     return replace_me
diff --git a/src/jsonid/jsonid.py b/src/jsonid/jsonid.py
index 0dfa6b8..7475e0d 100644
--- a/src/jsonid/jsonid.py
+++ b/src/jsonid/jsonid.py
@@ -98,7 +98,7 @@ def main() -> None:
     # pylint: disable=R0912,R0915
 
     parser = argparse.ArgumentParser(
-        prog="json-id",
+        prog="jsonid",
         description="proof-of-concept identifier for JSON objects on disk based on identifying valid objects and their key-values",
         epilog="for more information visit https://github.com/ffdev-info/json-id",
     )
@@ -156,10 +156,12 @@ def main() -> None:
         help="path to a custom registry to lead into memory replacing the default",
         required=False,
     )
+    # NB. consider output to stdout once the feature is more stable.
     parser.add_argument(
         "--pronom",
-        help="return a PRONOM-centric view of the results",
+        help=f"return a PRONOM-centric view of the results to `{export.PRONOM_FILENAME}` (BETA)",
         required=False,
+        action="store_true",
     )
     parser.add_argument(
         "--export",
@@ -220,7 +222,8 @@ def main() -> None:
     if args.registry:
         raise NotImplementedError("custom registry is not yet available")
     if args.pronom:
-        raise NotImplementedError("pronom view is not yet implemented")
+        export.export_pronom()
+        sys.exit()
     if args.language:
         raise NotImplementedError("multiple languages are not yet implemented")
     if args.export:
diff --git a/src/jsonid/pronom.py b/src/jsonid/pronom.py
new file mode 100644
index 0000000..ddf0d80
--- /dev/null
+++ b/src/jsonid/pronom.py
@@ -0,0 +1,668 @@
+"""PRONOM export routines.
+
+XML tooling: https://xmllint.com/
+"""
+
+import binascii
+import codecs
+import logging
+import xml.dom.minidom
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import Any, Final
+
+try:
+    import export_helpers
+    import helpers
+    import registry_matchers
+except ModuleNotFoundError:
+    try:
+        from src.jsonid import export_helpers, helpers, registry_matchers
+    except ModuleNotFoundError:
+        from jsonid import export_helpers, helpers, registry_matchers
+
+
+logger = logging.getLogger(__name__)
+
+
+DISK_SECTOR_SIZE: Final[int] = 4095
+
+# Common PRONOM characters.
+COLON: Final[str] = "3A"
+CURLY_OPEN: Final[str] = "7B"
+CURLY_CLOSE: Final[str] = "7D"
+SQUARE_OPEN: Final[str] = "5B"
+SQUARE_CLOSE: Final[str] = "5D"
+DOUBLE_QUOTE: Final[str] = "22"
+
+# Constant values.
+NUMBER_REGEX: Final[str] = "[30:39]"
+TRUE_VALUE: Final[str] = "74727565"
+FALSE_VALUE: Final[str] = "66616C7365"
+NULL_VALUE: Final[str] = "6E756C6C"
+
+# Our whitespace values could potentially be optimized per encoding,
+# e.g. to be more or less per encoding. 16 is a good default to enable
+# some UTF32-identification.
+WHITESPACE_REGEX: Final[str] = "{0-16}"
+
+# External signature types.
+EXT: Final[str] = "file extension"
+
+# Replacement markers for PRONOM pre-processing.
+MARKER_INDEX_START = "INDEX_START"
+MARKER_INDEX_END = "INDEX END"
+
+
+class UnprocessableEntity(Exception):
+    """Provide a way to give complete feedback to the caller to allow
+    it to exit."""
+
+
+@dataclass
+class ExternalSignature:
+    id: str
+    signature: str
+    type: str
+
+
+@dataclass
+class ByteSequence:
+    id: str
+    pos: str
+    min_off: str
+    max_off: str
+    endian: str
+    value: str
+
+
+@dataclass
+class InternalSignature:
+    id: str
+    name: str
+    byte_sequences: list[ByteSequence]
+
+
+@dataclass
+class Priority:
+    type: str
+    id: str
+
+
+@dataclass
+class Identifier:
+    type: str
+    value: str
+
+
+@dataclass
+class Format:  # pylint: disable=R0902
+    id: str
+    name: str
+    version: str
+    puid: str
+    mime: str
+    classification: str
+    external_signatures: list[ExternalSignature]
+    internal_signatures: list[InternalSignature]
+    priorities: list[int]
+
+
+@lru_cache()
+def _get_bom() -> list:
+    """Generate a list of byte-order markers that allow us to replace
+    markers introduced through various encoding operations.
+    """
+    replaces = [
+        codecs.BOM,
+        codecs.BOM_BE,
+        codecs.BOM_LE,
+        codecs.BOM_UTF8,
+        codecs.BOM_UTF16,
+        codecs.BOM_UTF16_BE,
+        codecs.BOM_UTF16_LE,
+        codecs.BOM_UTF32,
+        codecs.BOM_UTF32_BE,
+        codecs.BOM_UTF32_LE,
+    ]
+    res = []
+    for bom in replaces:
+        hex_bom = ""
+        for marker in bom:
+            char = hex(marker)
+            hex_bom = f"{hex_bom}{char.replace('0x', '')}".upper()
+        res.append(hex_bom)
+    return res
+
+
+def create_many_to_one_byte_sequence(internal_signatures: list[InternalSignature]):
+    """Create a many to one byte sequence, i.e. a format with multiple
+    Internal Signatures.
+    """
+    internal_signature = ""
+    for internal in internal_signatures:
+        id_ = internal.id
+        bs = create_one_to_many_byte_sequence(internal.byte_sequences)
+        internal_signature = f"""
+{internal_signature}<InternalSignature ID=\"{id_}\" Specificity=\"Specific\">
+    {bs}
+</InternalSignature>
+        """
+    return internal_signature.strip()
+
+
+def calculate_variable_off_bof(item: ByteSequence):
+    """Given variable offsets, calculate the correct syntax."""
+    seq = item.value
+    if (
+        item.min_off != ""
+        and int(item.min_off) > 0
+        and item.max_off != ""
+        and int(item.max_off) > 0
+    ):
+        seq = f"{{{item.min_off}-{int(item.min_off)+int(item.max_off)}}}{seq}"
+    elif item.max_off != "" and int(item.max_off) > 0:
+        seq = f"{{0-{item.max_off}}}{seq}"
+    elif item.min_off != "" and int(item.min_off) > 0:
+        seq = f"{{{item.min_off}}}{seq}"
+    return seq
+
+
+def calculate_variable_off_eof(item: ByteSequence):
+    """Given variable offsets, calculate the correct syntax."""
+    seq = item.value
+    if (
+        item.min_off != ""
+        and int(item.min_off) > 0
+        and item.max_off != ""
+        and int(item.max_off) > 0
+    ):
+        seq = f"{seq}{{{item.min_off}-{int(item.min_off)+int(item.max_off)}}}"
+    elif item.max_off != "" and int(item.max_off) > 0:
+        seq = f"{seq}{{0-{item.max_off}}}"
+    elif item.min_off != "" and int(item.min_off) > 0:
+        seq = f"{seq}{{{item.min_off}}}"
+    return seq
+
+
+def create_one_to_many_byte_sequence(byte_sequences: list[ByteSequence]):
+    """Create a byte sequence object."""
+    byte_sequence = ""
+    for item in byte_sequences:
+        seq = item.value
+        if item.pos.startswith("EOF"):
+            seq = calculate_variable_off_eof(item)
+        elif item.pos.startswith("BOF"):
+            seq = calculate_variable_off_bof(item)
+        byte_sequence = f"""
+{byte_sequence.strip()}
+    <ByteSequence Reference=\"{item.pos}\" Sequence=\"{seq}\" MinOffset=\"{item.min_off}\" MaxOffset=\"{item.max_off}\"/>
+        """
+    return byte_sequence.strip()
+
+
+def create_file_format_collection(fmt: list[Format]):
+    """Create the FileFormatCollection object.
+
+    E.g.
+    ```
+        <FileFormat ID="1" Name="Development Signature" PUID="dev/1" Version="1.0" MIMEType="application/octet-stream">
+            <InternalSignatureID>1</InternalSignatureID>
+            <Extension>ext</Extension>
+        </FileFormat>
+
+        <FileFormat ID="49" MIMEType="application/postscript"  FormatType="Text (Structured)"
+            Name="Adobe Illustrator" PUID="x-fmt/20" Version="1.0 / 1.1">
+            <InternalSignatureID>880</InternalSignatureID>
+            <InternalSignatureID>881</InternalSignatureID>
+            <Extension>ai</Extension>
+            <HasPriorityOverFileFormatID>86</HasPriorityOverFileFormatID>
+            <HasPriorityOverFileFormatID>331</HasPriorityOverFileFormatID>
+            <HasPriorityOverFileFormatID>332</HasPriorityOverFileFormatID>
+            <HasPriorityOverFileFormatID>771</HasPriorityOverFileFormatID>
+            <HasPriorityOverFileFormatID>773</HasPriorityOverFileFormatID>
+        </FileFormat>
+    ```
+
+    """
+    internal_sigs = [
+        f"<InternalSignatureID>{sig.id}</InternalSignatureID>"
+        for sig in fmt.internal_signatures
+    ]
+    external_sigs = [
+        f"<Extension>{sig.signature}</Extension>"
+        for sig in fmt.external_signatures
+        if sig.type.lower() == EXT
+    ]
+    priorities = [
+        f"<HasPriorityOverFileFormatID>{priority}</HasPriorityOverFileFormatID>"
+        for priority in fmt.priorities
+    ]
+    ff = f"""
+<FileFormat ID=\"{fmt.id}\" Name=\"{fmt.name}\" PUID=\"{fmt.puid}\" Version="{fmt.version}" MIMEType=\"{fmt.mime}\" FormatType=\"{fmt.classification}\" >
+    {"".join(internal_sigs).strip()}
+    {"".join(external_sigs).strip()}
+    {"".join(priorities).strip()}
+</FileFormat>
+    """
+    return ff.strip()
+
+
+def process_formats_and_save(formats: list[Format], filename: str):
+    """Process the collected formats and output a signature file.
+
+    NB. Given our dataclasses here, we have the opportunity to rework
+    this data into many new structures. We output XML because DROID
+    expects XML.
+    """
+    isc = []
+    ffc = []
+    for fmt in formats:
+        ffc.append(create_file_format_collection(fmt))
+        if fmt.internal_signatures:
+            isc.append(create_many_to_one_byte_sequence(fmt.internal_signatures))
+    droid_template = f"""
+<?xml version="1.0" encoding="UTF-8"?>
+<FFSignatureFile xmlns='http://www.nationalarchives.gov.uk/pronom/SignatureFile' Version='1' DateCreated='{export_helpers.get_utc_timestamp_now()}'>
+    <InternalSignatureCollection>
+        {"".join(isc).strip()}
+    </InternalSignatureCollection>
+    <FileFormatCollection>
+        {"".join(ffc).strip()}
+    </FileFormatCollection>
+</FFSignatureFile>
+    """
+    dom = None
+    signature_file = droid_template.strip().replace("\n", "")
+    try:
+        dom = xml.dom.minidom.parseString(signature_file)
+    except xml.parsers.expat.ExpatError as err:
+        logger.error("cannot process xml: %s", err)
+        return
+    pretty_xml = dom.toprettyxml(indent=" ", encoding="utf-8")
+    prettier_xml = export_helpers.new_prettify(pretty_xml)
+    logger.info("outputting to: %s", filename)
+    with open(filename, "w", encoding="utf=8") as output_file:
+        output_file.write(prettier_xml)
+
+
+def encode_roundtrip(hexed_val: str, encoding: str) -> str:
+    """We want to get a plain-text byte-sequence into a new
+    encoding. It takes a few hops and skips.
+    """
+    val = hexed_val.strip()
+    try:
+        re_encoded = binascii.unhexlify(hexed_val).decode("utf-8").encode(encoding)
+    except (binascii.Error, UnicodeDecodeError) as err:
+        logger.error("cannot convert: %s len: %s ('%s')", hexed_val, len(val), err)
+        return val
+    hex_val = binascii.hexlify(re_encoded).decode().upper()
+    for bom in _get_bom():
+        if not hex_val.startswith(bom):
+            continue
+        return hex_val.replace(bom, "")
+    return hex_val
+
+
+def _type_to_str(type_: type, encoding: str) -> str:
+    """Given a data type marker we need to convert the type into a
+    byte sequence that will match the type.
+
+    E.g. BOOLEAN types evaluate to true or false encoded in ASCII.
+    E.g. STRING types need to begin and end with double-quotes but the
+         string itself is just a wildcard. The wildcard will match any
+         value between the double quotes.
+    """
+
+    curly_open_encoded = encode_roundtrip(CURLY_OPEN, encoding)
+    curly_close_encoded: Final[str] = encode_roundtrip(CURLY_CLOSE, encoding)
+    square_open_encoded: Final[str] = encode_roundtrip(SQUARE_OPEN, encoding)
+    square_close_encoded: Final[str] = encode_roundtrip(SQUARE_CLOSE, encoding)
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+
+    try:
+        type_ = helpers.substitute_type_text(type_)
+    except AttributeError:
+        logger.debug("type_ already converted: %s", type_)
+
+    if type_ in (helpers.TYPE_INTEGER, type_ == helpers.TYPE_FLOAT):
+        # an integer field will begin 0-9 but it is unclear how to
+        # represent larger numbers? and whether we need to?
+        return NUMBER_REGEX
+    if type_ == helpers.TYPE_BOOL:
+        # true | false
+        return f"({encode_roundtrip(TRUE_VALUE, encoding)}|{encode_roundtrip(FALSE_VALUE, encoding)})"
+    if type_ == helpers.TYPE_STRING:
+        # string begins with a double quote and ends in a double quote.
+        return f"'{double_quote_encoded}*{double_quote_encoded}"
+    if type_ == helpers.TYPE_MAP:
+        # { == 7B; } == 7D
+        return f"{curly_open_encoded}*{curly_close_encoded}"
+    if type_ == helpers.TYPE_LIST:
+        # [ == 5B; ] == 5D
+        return f"{square_open_encoded}*{square_close_encoded}"
+    if type_ == helpers.TYPE_NONE:
+        # null
+        return f"{encode_roundtrip(NULL_VALUE, encoding)}".encode(encoding)
+    # This should only trigger for incorrect values at this point..
+    raise UnprocessableEntity(f"type_to_str: {type_}")
+
+
+def _complex_is_type(marker: Any) -> str:
+    """Complex IS might be another data structure, e.g. a dict, or
+    something else that we can't convert easily. It is simply a WIP
+    for now.
+    """
+    raise UnprocessableEntity(f"complex IS type: '{marker}' (WIP)")
+
+
+def _str_to_hex_str(string: str) -> str:
+    """Convert string to hexadecimal bytes.
+
+    We convert to bytes here first without encoding and then convert
+    the bytes to an encoding second. It should be possible to combine
+    those two procedures, but this has worked well during the
+    prototyping phase.
+    """
+    hex_bytes = []
+    for byte_ in string.encode():
+        hex_bytes.append(hex(byte_).replace("0x", ""))
+    hex_str = "".join(hex_bytes).upper()
+    return hex_str
+
+
+def quote_and_encode(value, encoding) -> str:
+    """Quote and encode a given value."""
+
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+    # return f"{double_quote_encoded}{value}{double_quote_encoded}"
+    return f"{double_quote_encoded}{encode_roundtrip(value, encoding)}{double_quote_encoded}"
+
+
+def convert_marker_to_signature_sequence(marker: dict, encoding: str) -> str:
+    """Convert a JSONID marker into a signature sequence."""
+
+    # pylint: disable=R0914; too-many local variables.
+    # pylint: disable=R0911; too-many return statements.
+    # pylint: disable=R0915; too-many statements.
+
+    logger.debug("marker: %s", marker)
+
+    colon_encoded: Final[str] = encode_roundtrip(COLON, encoding)
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+    curly_open_encoded: Final[str] = encode_roundtrip(CURLY_OPEN, encoding)
+    curly_close_encoded: Final[str] = encode_roundtrip(CURLY_CLOSE, encoding)
+    colon_encoded: Final[str] = encode_roundtrip(COLON, encoding)
+    square_open_encoded: Final[str] = encode_roundtrip(SQUARE_OPEN, encoding)
+    square_close_encoded: Final[str] = encode_roundtrip(SQUARE_CLOSE, encoding)
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+
+    instruction = ""
+    if registry_matchers.MARKER_GOTO in marker.keys():
+        # GOTO KEY and match KEY.
+        goto_key = _str_to_hex_str(marker["GOTO"])
+        key_at_goto = _str_to_hex_str(marker["KEY"])
+        goto_encoded = quote_and_encode(goto_key, encoding)
+        key_encoded = quote_and_encode(key_at_goto, encoding)
+        instruction = f"{goto_encoded}{WHITESPACE_REGEX}{colon_encoded}*{WHITESPACE_REGEX}{key_encoded}{WHITESPACE_REGEX}{colon_encoded}"
+        marker.pop("GOTO")
+        marker.pop("KEY")
+        return instruction.upper()
+    if registry_matchers.MARKER_INDEX in marker.keys():
+        key = _str_to_hex_str(marker["KEY"])
+        instruction = f"{WHITESPACE_REGEX}{square_open_encoded}*{curly_open_encoded}*{double_quote_encoded}{encode_roundtrip(key, encoding)}{double_quote_encoded}{WHITESPACE_REGEX}{colon_encoded}*{curly_close_encoded}*{square_close_encoded}"
+        marker.pop("INDEX")
+        marker.pop("KEY")
+        return instruction.upper()
+    if "KEY" in marker.keys():
+        key = _str_to_hex_str(marker["KEY"])
+        instruction = quote_and_encode(key, encoding)
+        marker.pop("KEY")
+    if registry_matchers.MARKER_KEY_EXISTS in marker.keys():
+        instruction = f"{instruction}{WHITESPACE_REGEX}{colon_encoded}".upper()
+        return instruction
+    if registry_matchers.MARKER_IS_TYPE in marker.keys():
+        is_type = _type_to_str(marker["ISTYPE"], encoding=encoding)
+        type_val = (
+            f"{instruction}{WHITESPACE_REGEX}{colon_encoded}{WHITESPACE_REGEX}{is_type}"
+        )
+        return type_val.upper()
+    if registry_matchers.MARKER_IS in marker.keys():
+        marker_is = marker["IS"]
+        if not isinstance(marker_is, str):
+            _complex_is_type(marker_is)
+        equals = _str_to_hex_str(marker_is)
+        is_val = f"{instruction}{WHITESPACE_REGEX}{encode_roundtrip(equals, encoding)}"
+        return is_val.upper()
+    if registry_matchers.MARKER_STARTSWITH in marker.keys():
+        starts_with = _str_to_hex_str(marker["STARTSWITH"])
+        starts_with_val = f"{instruction}{WHITESPACE_REGEX}{colon_encoded}{WHITESPACE_REGEX}{double_quote_encoded}{encode_roundtrip(starts_with, encoding)}"
+        return starts_with_val.upper()
+    if registry_matchers.MARKER_ENDSWITH in marker.keys():
+        ends_with = _str_to_hex_str(marker["ENDSWITH"])
+        ends_with_val = f"{instruction}{WHITESPACE_REGEX}{colon_encoded}{WHITESPACE_REGEX}*{encode_roundtrip(ends_with, encoding)}{double_quote_encoded}"
+        return ends_with_val.upper()
+    if registry_matchers.MARKER_CONTAINS in marker.keys():
+        contains = _str_to_hex_str(marker["CONTAINS"])
+        contains_val = f"{instruction}{WHITESPACE_REGEX}{colon_encoded}{WHITESPACE_REGEX}{double_quote_encoded}*{encode_roundtrip(contains, encoding)}*{double_quote_encoded}"
+        return contains_val.upper()
+    if registry_matchers.MARKER_REGEX in marker.keys():
+        raise UnprocessableEntity("REGEX not yet implemented")
+    if registry_matchers.MARKER_KEY_NO_EXIST in marker.keys():
+        raise UnprocessableEntity("KEY NO EXIST not yet implemented")
+    # We should never arrive here. In the future clean this up so we
+    # only return when we have information.
+    return ""
+
+
+def preprocess_goto_markers(markers: dict) -> list:
+    """Preprocess markers to remove data that is otherwise duplicated
+    when converted to a PRONOM signature, e.g. GOTO."""
+
+    out = []
+    for marker in markers:
+        if registry_matchers.MARKER_GOTO not in marker:
+            out.append(marker)
+            continue
+        key = marker.pop("GOTO")
+        new_marker = {"KEY": key, "EXISTS": None}
+        if new_marker not in out:
+            out.append(new_marker)
+        out.append(marker)
+    return out
+
+
+def process_markers(
+    markers: list, sig_id: int, encoding: str = ""
+) -> tuple[list | bool]:
+    """Given a set of markers for a document type, process them into
+    a set of byte sequences and finally an internal signature sequence
+    that can be output as a PRONOM signature.
+
+    returns a tuple describing the processed value and a flag to
+    highlight the result is potentially lossless, e.g. in the case
+    of matching types, e.g. strings.
+
+    dict_keys(['CONTAINS'])
+    dict_keys(['ENDSWITH'])
+    dict_keys(['IS']
+    dict_keys(['ISTYPE'])
+    dict_keys(['STARTSWITH'])
+
+    key(0-n):(0-n)value
+
+    Need to return something like:
+
+      <ByteSequence Reference="BOFoffset" Sequence="FFD8FFE0{2}4A464946000101(00|01|02)" MinOffset="0" MaxOffset=""/>
+
+    Different encodings need to be accounted for, e.g. (with added
+    whitespace below)
+
+    UTF-32-LE:
+
+        00000000: 2000 0000 2000 0000 2000 0000 2000 0000   ... ... ... ...
+        00000010: 2000 0000 2000 0000 0a00 0000 0a00 0000   ... ...........
+        00000020: 0a00 0000 0a00 0000 7b00 0000 2200 0000  ........{..."...
+        00000030: 6100 0000 2200 0000 3a00 0000 2000 0000  a..."...:... ...
+        00000040: 2200 0000 6200 0000 2200 0000 7d00 0000  "...b..."...}...
+        00000050: 0a00 0000                                ....
+
+    UTF-32-BE:
+
+        00000000: 0000 0020 0000 0020 0000 0020 0000 0020  ... ... ... ...
+        00000010: 0000 0020 0000 0020 0000 000a 0000 000a  ... ... ........
+        00000020: 0000 000a 0000 000a 0000 007b 0000 0022  ...........{..."
+        00000030: 0000 0061 0000 0022 0000 003a 0000 0020  ...a..."...:...
+        00000040: 0000 0022 0000 0062 0000 0022 0000 007d  ..."...b..."...}
+        00000050: 0000 000a                                ....
+
+
+    UTF-16-LE:
+
+        00000000: 2000 2000 2000 2000 2000 2000 0a00 0a00   . . . . . .....
+        00000010: 0a00 0a00 7b00 2200 6100 2200 3a00 2000  ....{.".a.".:. .
+        00000020: 2200 6200 2200 7d00 0a00                 ".b.".}...
+
+    UTF-16-BE:
+
+        00000000: 0020 0020 0020 0020 0020 0020 000a 000a  . . . . . . ....
+        00000010: 000a 000a 007b 0022 0061 0022 003a 0020  .....{.".a.".:.
+        00000020: 0022 0062 0022 007d 000a                 .".b.".}..
+
+
+    """
+
+    curly_open_encoded: Final[str] = encode_roundtrip(CURLY_OPEN, encoding)
+    curly_close_encoded: Final[str] = encode_roundtrip(CURLY_CLOSE, encoding)
+
+    sequences = []
+
+    markers = preprocess_goto_markers(markers)
+
+    for marker in markers:
+        sig_sequence = convert_marker_to_signature_sequence(marker, encoding)
+        sequences.append(sig_sequence)
+
+    byte_sequences = []
+
+    byte_sequences.append(
+        ByteSequence(
+            id=0,
+            pos="BOF",
+            min_off=0,
+            max_off=f"{DISK_SECTOR_SIZE}",
+            endian="",
+            value=curly_open_encoded,
+        )
+    )
+
+    for idx, item in enumerate(sequences, 0):
+        logger.debug("%s. %s", idx, item)
+        byte_sequence = ByteSequence(
+            id=idx,
+            pos="VAR",
+            min_off="",
+            max_off="",
+            endian="",
+            value=item,
+        )
+        byte_sequences.append(byte_sequence)
+
+    byte_sequences.append(
+        ByteSequence(
+            id=0,
+            pos="EOF",
+            min_off="0",
+            max_off=f"{DISK_SECTOR_SIZE}",
+            endian="",
+            value=curly_close_encoded,
+        )
+    )
+
+    internal_signature = InternalSignature(
+        id=sig_id,
+        name="",
+        byte_sequences=byte_sequences,
+    )
+
+    return [internal_signature]
+
+
+def create_baseline_json_sequences(encoding: str):
+    """Create baseline JSON sequences that match map and list types
+    with various different encodings.
+    """
+
+    # pylint: disable=R0914; too-many local variables.
+
+    curly_open_encoded: Final[str] = encode_roundtrip(CURLY_OPEN, encoding)
+    curly_close_encoded: Final[str] = encode_roundtrip(CURLY_CLOSE, encoding)
+    square_open_encoded: Final[str] = encode_roundtrip(SQUARE_OPEN, encoding)
+    square_close_encoded: Final[str] = encode_roundtrip(SQUARE_CLOSE, encoding)
+
+    colon_encoded: Final[str] = encode_roundtrip(COLON, encoding)
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+
+    bof = f"({curly_open_encoded}|{square_open_encoded})"
+    eof = f"({curly_close_encoded}|{square_close_encoded})"
+
+    no_encoded: Final[str] = NUMBER_REGEX
+    true_encoded: Final[str] = encode_roundtrip(TRUE_VALUE, encoding)
+    false_encoded: Final[str] = encode_roundtrip(FALSE_VALUE, encoding)
+    null_encoded: Final[str] = encode_roundtrip(NULL_VALUE, encoding)
+
+    options = (
+        f"{double_quote_encoded}{WHITESPACE_REGEX}{colon_encoded}",
+        no_encoded,
+        f"({true_encoded}|{false_encoded})",
+        null_encoded,
+    )
+
+    sigs = []
+
+    for opt in options:
+
+        bs = []
+
+        bs.append(
+            ByteSequence(
+                id=1,
+                pos="BOF",
+                min_off=0,
+                max_off=f"{DISK_SECTOR_SIZE}",
+                endian="",
+                value=bof,
+            )
+        )
+
+        bs.append(
+            ByteSequence(
+                id=1,
+                pos="VAR",
+                min_off=0,
+                max_off=0,
+                endian="",
+                value=opt,
+            )
+        )
+
+        bs.append(
+            ByteSequence(
+                id=1,
+                pos="EOF",
+                min_off="0",
+                max_off=f"{DISK_SECTOR_SIZE}",
+                endian="",
+                value=eof,
+            )
+        )
+
+        iss = InternalSignature(
+            id=0,
+            name="",
+            byte_sequences=bs,
+        )
+
+        sigs.append(iss)
+
+    return sigs
diff --git a/src/utils/json2json.py b/src/utils/json2json.py
index bdbbf21..c54b022 100644
--- a/src/utils/json2json.py
+++ b/src/utils/json2json.py
@@ -62,7 +62,7 @@ async def identify_plaintext_bytestream(path: str) -> Tuple[bool, str]:
 async def identify_json(paths: list[str]):
     """Identify objects."""
     for idx, path in enumerate(paths):
-        valid, data, _, _ = await identify_plaintext_bytestream(path)
+        valid, data = await identify_plaintext_bytestream(path)
         if not valid:
             continue
         print(json.dumps(data, indent=2))
@@ -114,7 +114,7 @@ def main() -> None:
     parser = argparse.ArgumentParser(
         prog="json2json",
         description="parse JSON UTF-16 (BE-LE) objects and output them as UTF-8 for the sake of developer ergonomics",
-        epilog="for more information visit https://github.com/ffdev-info/json-id",
+        epilog="for more information visit https://github.com/ffdev-info/jsonid",
     )
     parser.add_argument(
         "--debug",
diff --git a/tests/test_pronom_export.py b/tests/test_pronom_export.py
new file mode 100644
index 0000000..7b6e9d5
--- /dev/null
+++ b/tests/test_pronom_export.py
@@ -0,0 +1,420 @@
+"""Test PRONOM export functions.
+
+NB. many of the tests here can be paramettrized once we have good
+coverage. They've all been tested individually as the conversion
+scripts are ironed out. This will take on more relevance if the
+tool is used more for JSON signature creation in general.
+"""
+
+import pytest
+
+from src.jsonid import pronom
+
+encode_roundtrip_tests = [
+    (
+        "74657374",
+        "7400650073007400",
+        "utf-16",
+    )
+]
+
+
+@pytest.mark.parametrize("hex_sequences, expected, encoding", encode_roundtrip_tests)
+def test_encode_roundtrip(hex_sequences, expected, encoding):
+    """Re-encode a set of hexadecimal values to a new encoding."""
+
+    res = pronom.encode_roundtrip(hex_sequences, encoding)
+    assert res == expected
+
+
+preprocess_goto_tests = [
+    (
+        [
+            {"KEY": "sops", "EXISTS": None},
+            {"GOTO": "sops", "KEY": "kms", "EXISTS": None},
+            {"GOTO": "sops", "KEY": "pgp", "EXISTS": None},
+        ],
+        [
+            {"KEY": "sops", "EXISTS": None},
+            {"KEY": "kms", "EXISTS": None},
+            {"KEY": "pgp", "EXISTS": None},
+        ],
+    )
+]
+
+
+@pytest.mark.parametrize("markers_in, markers_out", preprocess_goto_tests)
+def test_preprocess_goto_markers(markers_in: list, markers_out: list):
+    """Make sure preprocess markers works as anticipated.
+
+    GOTO is also largely synonymous with "KEY" exists and so we can
+    remove duplicate examples of GOTO and ensure just one "EXISTS" for
+    that GOTO exists.
+    """
+
+    res = pronom.preprocess_goto_markers(markers_in)
+    assert res == markers_out
+
+
+preprocess_index_tests = [
+    (
+        [
+            {"INDEX": 0, "KEY": "Content-Length", "EXISTS": None},
+            {"INDEX": 0, "KEY": "Content-Type", "EXISTS": None},
+            {"INDEX": 0, "KEY": "X-TIKA:Parsed-By", "EXISTS": None},
+            {"INDEX": 0, "KEY": "X-TIKA:parse_time_millis", "EXISTS": None},
+        ],
+        [
+            {"INDEX_START": None},
+            {"KEY": "Content-Length", "EXISTS": None},
+            {"INDEX END": None},
+            {"INDEX_START": None},
+            {"KEY": "Content-Type", "EXISTS": None},
+            {"INDEX END": None},
+            {"INDEX_START": None},
+            {"KEY": "X-TIKA:Parsed-By", "EXISTS": None},
+            {"INDEX END": None},
+            {"INDEX_START": None},
+            {"KEY": "X-TIKA:parse_time_millis", "EXISTS": None},
+            {"INDEX END": None},
+        ],
+    )
+]
+
+
+def test_export_sops():
+    """SOPS is a good example of a more complex signature. Ensure
+    it works here.
+    """
+
+    markers = [
+        {"KEY": "sops", "EXISTS": None},
+        {"GOTO": "sops", "KEY": "kms", "EXISTS": None},
+        {"GOTO": "sops", "KEY": "pgp", "EXISTS": None},
+    ]
+
+    processed = pronom.process_markers(markers, 0, "utf-8")
+
+    res = []
+    for sequence in processed[0].byte_sequences:
+        res.append(sequence.value)
+
+    expected = [
+        "7B",
+        "22736F707322{0-16}3A",
+        "226B6D7322{0-16}3A",
+        "2270677022{0-16}3A",
+        "7D",
+    ]
+
+    assert res == expected
+
+
+def test_ocfl_inventory():
+    """OFCL inventoty is one of the first examples that worked out of
+    the box and provides good control for errors."""
+
+    markers = [
+        {"KEY": "type", "STARTSWITH": "https://ocfl.io/"},
+        {"KEY": "type", "CONTAINS": "spec/#inventory"},
+        {"KEY": "head", "EXISTS": None},
+        {"KEY": "manifest", "EXISTS": None},
+    ]
+
+    processed = pronom.process_markers(markers, 0, "utf-8")
+
+    res = []
+    for sequence in processed[0].byte_sequences:
+        res.append(sequence.value)
+
+    expected = [
+        "7B",
+        "227479706522{0-16}3A{0-16}2268747470733A2F2F6F63666C2E696F2F",
+        "227479706522{0-16}3A{0-16}22*737065632F23696E76656E746F7279*22",
+        "226865616422{0-16}3A",
+        "226D616E696665737422{0-16}3A",
+        "7D",
+    ]
+    assert res == expected
+
+
+def test_json_patch():
+    """Ensure that JSON patch style markers are converted
+    correctly.
+    """
+
+    markers = [
+        {"INDEX": 0, "KEY": "op", "EXISTS": None},
+        {"INDEX": 0, "KEY": "path", "EXISTS": None},
+    ]
+
+    processed = pronom.process_markers(markers, 0, "utf-8")
+
+    res = []
+    for sequence in processed[0].byte_sequences:
+        res.append(sequence.value)
+
+    expected = [
+        "7B",
+        "{0-16}5B*7B*226F7022{0-16}3A*7D*5D",
+        "{0-16}5B*7B*227061746822{0-16}3A*7D*5D",
+        "7D",
+    ]
+
+    assert res == expected
+
+
+def test_gltf_schema():
+    """Ensure that GLTF style patterns are converted correctly.
+
+    NB. the pattern here is potentially different from that in the
+    registry and the registry may need updating to be more
+    permissive, or simply corrected.
+    """
+
+    markers = [
+        {"KEY": "$schema", "STARTSWITH": "https://json-schema.org/"},
+        {"KEY": "$schema", "ENDSWITH": "/schema"},
+        {"KEY": "title", "EXISTS": None},
+        {"KEY": "type", "IS": "object"},
+        {"KEY": "description", "IS": "The root object for a glTF asset."},
+    ]
+
+    processed = pronom.process_markers(markers, 0, "utf-8")
+
+    res = []
+    for sequence in processed[0].byte_sequences:
+        res.append(sequence.value)
+
+    expected = [
+        "7B",
+        "2224736368656D6122{0-16}3A{0-16}2268747470733A2F2F6A736F6E2D736368656D612E6F72672F",
+        "2224736368656D6122{0-16}3A{0-16}*2F736368656D6122",
+        "227469746C6522{0-16}3A",
+        "227479706522{0-16}6F626A656374",
+        "226465736372697074696F6E22{0-16}54686520726F6F74206F626A65637420666F72206120676C54462061737365742E",
+        "7D",
+    ]
+
+    assert res == expected
+
+
+def test_tika_recursive():
+    """Ensure that TIKA style signatures (relying largely on INDEX) are
+    converted correctly.
+    """
+
+    markers = [
+        {"INDEX": 0, "KEY": "Content-Length", "EXISTS": None},
+        {"INDEX": 0, "KEY": "Content-Type", "EXISTS": None},
+        {"INDEX": 0, "KEY": "X-TIKA:Parsed-By", "EXISTS": None},
+        {"INDEX": 0, "KEY": "X-TIKA:parse_time_millis", "EXISTS": None},
+    ]
+
+    processed = pronom.process_markers(markers, 0, "utf-8")
+
+    res = []
+    for sequence in processed[0].byte_sequences:
+        res.append(sequence.value)
+
+    expected = [
+        "7B",
+        "{0-16}5B*7B*22436F6E74656E742D4C656E67746822{0-16}3A*7D*5D",
+        "{0-16}5B*7B*22436F6E74656E742D5479706522{0-16}3A*7D*5D",
+        "{0-16}5B*7B*22582D54494B413A5061727365642D427922{0-16}3A*7D*5D",
+        "{0-16}5B*7B*22582D54494B413A70617273655F74696D655F6D696C6C697322{0-16}3A*7D*5D",
+        "7D",
+    ]
+
+    assert res == expected
+
+
+encoding_tests = [
+    (
+        [
+            {"KEY": "test", "IS": "data"},
+            {"KEY": "file", "ISTYPE": int},
+            {"KEY": "bool", "ISTYPE": bool},
+            {"KEY": "here", "EXISTS": None},
+            {"KEY": "within", "CONTAINS": "value"},
+            {"KEY": "start", "STARTSWITH": "value"},
+            {"KEY": "end", "ENDSWITH": "value"},
+            {"GOTO": "key", "KEY": "at", "EXISTS": None},
+        ],
+        [
+            "7B",
+            "227465737422{0-16}64617461",
+            "2266696C6522{0-16}3A{0-16}[30:39]",
+            "22626F6F6C22{0-16}3A{0-16}(74727565|66616C7365)",
+            "226865726522{0-16}3A",
+            "2277697468696E22{0-16}3A{0-16}22*76616C7565*22",
+            "22737461727422{0-16}3A{0-16}2276616C7565",
+            "22656E6422{0-16}3A{0-16}*76616C756522",
+            "226B657922{0-16}3A",
+            "22617422{0-16}3A",
+            "7D",
+        ],
+        "utf-8",
+    ),
+    (
+        [
+            {"INDEX": 1, "KEY": "key", "EXISTS": None},
+        ],
+        [
+            "7B",
+            "{0-16}5B*7B*226B657922{0-16}3A*7D*5D",
+            "7D",
+        ],
+        "utf-8",
+    ),
+    (
+        [
+            {"KEY": "test", "IS": "data"},
+            {"KEY": "file", "ISTYPE": int},
+            {"KEY": "bool", "ISTYPE": bool},
+            {"KEY": "here", "EXISTS": None},
+            {"KEY": "within", "CONTAINS": "value"},
+            {"KEY": "start", "STARTSWITH": "value"},
+            {"KEY": "end", "ENDSWITH": "value"},
+            {"GOTO": "key", "KEY": "at", "EXISTS": None},
+        ],
+        [
+            "7B00",
+            "220074006500730074002200{0-16}6400610074006100",
+            "2200660069006C0065002200{0-16}3A00{0-16}[30:39]",
+            "220062006F006F006C002200{0-16}3A00{0-16}(7400720075006500|660061006C0073006500)",
+            "220068006500720065002200{0-16}3A00",
+            "2200770069007400680069006E002200{0-16}3A00{0-16}2200*760061006C0075006500*2200",
+            "2200730074006100720074002200{0-16}3A00{0-16}2200760061006C0075006500",
+            "220065006E0064002200{0-16}3A00{0-16}*760061006C00750065002200",
+            "22006B00650079002200{0-16}3A00",
+            "2200610074002200{0-16}3A00",
+            "7D00",
+        ],
+        "utf-16",
+    ),
+    (
+        [
+            {"INDEX": 1, "KEY": "key", "EXISTS": None},
+        ],
+        [
+            "7B00",
+            "{0-16}5B00*7B00*22006B00650079002200{0-16}3A00*7D00*5D00",
+            "7D00",
+        ],
+        "utf-16",
+    ),
+    (
+        [
+            {"KEY": "test", "IS": "data"},
+            {"KEY": "file", "ISTYPE": int},
+            {"KEY": "bool", "ISTYPE": bool},
+            {"KEY": "here", "EXISTS": None},
+            {"KEY": "within", "CONTAINS": "value"},
+            {"KEY": "start", "STARTSWITH": "value"},
+            {"KEY": "end", "ENDSWITH": "value"},
+            {"GOTO": "key", "KEY": "at", "EXISTS": None},
+        ],
+        [
+            "007B",
+            "002200740065007300740022{0-16}0064006100740061",
+            "002200660069006C00650022{0-16}003A{0-16}[30:39]",
+            "00220062006F006F006C0022{0-16}003A{0-16}(0074007200750065|00660061006C00730065)",
+            "002200680065007200650022{0-16}003A",
+            "002200770069007400680069006E0022{0-16}003A{0-16}0022*00760061006C00750065*0022",
+            "0022007300740061007200740022{0-16}003A{0-16}002200760061006C00750065",
+            "00220065006E00640022{0-16}003A{0-16}*00760061006C007500650022",
+            "0022006B006500790022{0-16}003A",
+            "0022006100740022{0-16}003A",
+            "007D",
+        ],
+        "utf-16BE",
+    ),
+    (
+        [
+            {"INDEX": 1, "KEY": "key", "EXISTS": None},
+        ],
+        [
+            "007B",
+            "{0-16}005B*007B*0022006B006500790022{0-16}003A*007D*005D",
+            "007D",
+        ],
+        "utf-16BE",
+    ),
+    (
+        [
+            {"KEY": "test", "IS": "data"},
+            {"KEY": "file", "ISTYPE": int},
+            {"KEY": "bool", "ISTYPE": bool},
+            {"KEY": "here", "EXISTS": None},
+            {"KEY": "within", "CONTAINS": "value"},
+            {"KEY": "start", "STARTSWITH": "value"},
+            {"KEY": "end", "ENDSWITH": "value"},
+            {"GOTO": "key", "KEY": "at", "EXISTS": None},
+        ],
+        [
+            "7B000000",
+            "220000007400000065000000730000007400000022000000{0-16}64000000610000007400000061000000",
+            "2200000066000000690000006C0000006500000022000000{0-16}3A000000{0-16}[30:39]",
+            "22000000620000006F0000006F0000006C00000022000000{0-16}3A000000{0-16}(74000000720000007500000065000000|66000000610000006C0000007300000065000000)",
+            "220000006800000065000000720000006500000022000000{0-16}3A000000",
+            "2200000077000000690000007400000068000000690000006E00000022000000{0-16}3A000000{0-16}22000000*76000000610000006C0000007500000065000000*22000000",
+            "22000000730000007400000061000000720000007400000022000000{0-16}3A000000{0-16}2200000076000000610000006C0000007500000065000000",
+            "22000000650000006E0000006400000022000000{0-16}3A000000{0-16}*76000000610000006C000000750000006500000022000000",
+            "220000006B000000650000007900000022000000{0-16}3A000000",
+            "22000000610000007400000022000000{0-16}3A000000",
+            "7D000000",
+        ],
+        "utf-32le",
+    ),
+    (
+        [
+            {"INDEX": 1, "KEY": "key", "EXISTS": None},
+        ],
+        [
+            "7B000000",
+            "{0-16}5B000000*7B000000*220000006B000000650000007900000022000000{0-16}3A000000*7D000000*5D000000",
+            "7D000000",
+        ],
+        "utf-32le",
+    ),
+]
+
+
+@pytest.mark.parametrize("markers, expected, encoding", encoding_tests)
+def test_unicode_signatures(markers, expected, encoding):
+    """Provide a basic unicode tests.
+
+    These tests are based on the following two sample files:
+
+    ```json
+        {
+            "test": "data",
+            "file": 1,
+            "bool": true,
+            "here": "random...",
+            "within": "_value_",
+            "start": "value_",
+            "end": "_value",
+            "key": {
+                "at": "value"
+            }
+        }
+    ```
+
+    ```json
+        [
+            0,
+            {
+                "key": "value"
+            }
+        ]
+    ```
+
+    """
+
+    processed = pronom.process_markers(markers, 0, encoding)
+    res = []
+    for sequence in processed[0].byte_sequences:
+        res.append(sequence.value)
+    assert res == expected

From 5c02a8d36a4530fe456083e804e1ae8cef82ad4a Mon Sep 17 00:00:00 2001
From: ross-spencer <all.along.the.watchtower2001+github@gmail.com>
Date: Sun, 4 Jan 2026 17:15:22 +0100
Subject: [PATCH 2/7] Add json2json entry point

---
 json2json.py | 12 ++++++++++++
 jsonid.py    |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 json2json.py

diff --git a/json2json.py b/json2json.py
new file mode 100644
index 0000000..1bcbe2b
--- /dev/null
+++ b/json2json.py
@@ -0,0 +1,12 @@
+"""JSONID entry-points."""
+
+from src.utils import json2json
+
+
+def main():
+    """Primary entry point for this script."""
+    json2json.main()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/jsonid.py b/jsonid.py
index 5c087ae..597256b 100644
--- a/jsonid.py
+++ b/jsonid.py
@@ -1,4 +1,4 @@
-"""Donovan entry-points."""
+"""JSONID entry-points."""
 
 from src.jsonid import jsonid
 

From 44f9a0b7394c414f651449dc7939f6df869840f0 Mon Sep 17 00:00:00 2001
From: ross-spencer <all.along.the.watchtower2001+github@gmail.com>
Date: Sun, 4 Jan 2026 17:18:40 +0100
Subject: [PATCH 3/7] Add standalone PRONOM export

Enables the user to supply a single patterns file to a PRONOM utility to
output a PRONOM signature file.

Additionally deepcopy has been used in the export script to ensure
immutability of list parameters.
---
 json2pronom.py                       |  12 +++
 pronom_example/patterns_example.json |   4 +
 src/jsonid/export.py                 |   2 +-
 src/jsonid/pronom.py                 |  34 +++++--
 src/utils/jsonid2pronom.py           | 133 +++++++++++++++++++++++++++
 5 files changed, 176 insertions(+), 9 deletions(-)
 create mode 100644 json2pronom.py
 create mode 100644 pronom_example/patterns_example.json
 create mode 100644 src/utils/jsonid2pronom.py

diff --git a/json2pronom.py b/json2pronom.py
new file mode 100644
index 0000000..0f2c3f7
--- /dev/null
+++ b/json2pronom.py
@@ -0,0 +1,12 @@
+"""JSONID entry-points."""
+
+from src.utils import jsonid2pronom
+
+
+def main():
+    """Primary entry point for this script."""
+    jsonid2pronom.main()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pronom_example/patterns_example.json b/pronom_example/patterns_example.json
new file mode 100644
index 0000000..8631d38
--- /dev/null
+++ b/pronom_example/patterns_example.json
@@ -0,0 +1,4 @@
+[
+   {"KEY": "key1", "EXISTS": null},
+   {"KEY": "key2", "EXISTS": null}
+]
diff --git a/src/jsonid/export.py b/src/jsonid/export.py
index b128b5a..7164a0c 100644
--- a/src/jsonid/export.py
+++ b/src/jsonid/export.py
@@ -107,7 +107,7 @@ def export_pronom() -> None:
                 mime = ""
             try:
                 sequences = pronom.process_markers(
-                    markers.copy(),
+                    copy.deepcopy(markers),
                     increment_id,
                     encoding=encoding,
                 )
diff --git a/src/jsonid/pronom.py b/src/jsonid/pronom.py
index ddf0d80..5a279df 100644
--- a/src/jsonid/pronom.py
+++ b/src/jsonid/pronom.py
@@ -248,13 +248,8 @@ def create_file_format_collection(fmt: list[Format]):
     return ff.strip()
 
 
-def process_formats_and_save(formats: list[Format], filename: str):
-    """Process the collected formats and output a signature file.
-
-    NB. Given our dataclasses here, we have the opportunity to rework
-    this data into many new structures. We output XML because DROID
-    expects XML.
-    """
+def _process_formats(formats: list[Format]):
+    """Process formats into a PRONOM XML file."""
     isc = []
     ffc = []
     for fmt in formats:
@@ -278,14 +273,37 @@ def process_formats_and_save(formats: list[Format], filename: str):
         dom = xml.dom.minidom.parseString(signature_file)
     except xml.parsers.expat.ExpatError as err:
         logger.error("cannot process xml: %s", err)
-        return
+        return ""
     pretty_xml = dom.toprettyxml(indent=" ", encoding="utf-8")
     prettier_xml = export_helpers.new_prettify(pretty_xml)
+    return prettier_xml
+
+
+def process_formats_and_save(formats: list[Format], filename: str):
+    """Process the collected formats and output a signature file.
+
+    NB. Given our dataclasses here, we have the opportunity to rework
+    this data into many new structures. We output XML because DROID
+    expects XML.
+    """
+    prettier_xml = _process_formats(formats)
     logger.info("outputting to: %s", filename)
     with open(filename, "w", encoding="utf=8") as output_file:
         output_file.write(prettier_xml)
 
 
+def process_formats_to_stdout(formats: list[Format]):
+    """Process the collected formats and output a signature file.
+
+    NB. Given our dataclasses here, we have the opportunity to rework
+    this data into many new structures. We output XML because DROID
+    expects XML.
+    """
+    prettier_xml = _process_formats(formats)
+    logger.info("outputting to: stdout")
+    print(prettier_xml)
+
+
 def encode_roundtrip(hexed_val: str, encoding: str) -> str:
     """We want to get a plain-text byte-sequence into a new
     encoding. It takes a few hops and skips.
diff --git a/src/utils/jsonid2pronom.py b/src/utils/jsonid2pronom.py
new file mode 100644
index 0000000..be3fae2
--- /dev/null
+++ b/src/utils/jsonid2pronom.py
@@ -0,0 +1,133 @@
+"""jsonid2pronom provides a helper script to enable export of generic
+JSONID compatible markers to a PRONOM compatible signature file.
+"""
+
+import argparse
+import asyncio
+import copy
+import json
+import logging
+import sys
+
+from src.jsonid import pronom
+
+# Set up logging.
+logging.basicConfig(
+    format="%(asctime)-15s %(levelname)s :: %(filename)s:%(lineno)s:%(funcName)s() :: %(message)s",  # noqa: E501
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level="INFO",
+    handlers=[
+        logging.StreamHandler(),
+    ],
+)
+
+logger = logging.getLogger(__name__)
+
+
+async def load_patterns(path: str) -> list:
+    """Load patterns from a file for conversion to a signature file."""
+    patterns = []
+    with open(path, "r", encoding="utf-8") as patterns_file:
+        patterns = json.loads(patterns_file.read())
+    return patterns
+
+
+async def output_signature(path: str):
+    """Output JSONID compatible signatures to PRONOM."""
+
+    formats = []
+
+    encodings = ("UTF-8", "UTF-16", "UTF-16BE", "UTF-32LE")
+    priorities = []
+
+    increment_id = 0
+
+    markers = await load_patterns(path)
+
+    if not markers:
+        logger.error("no patterns provided via path arg")
+        sys.exit(1)
+
+    for encoding in encodings:
+        increment_id += 1
+        json_puid = "jsonid2pronom/1"
+        name_ = f"JSONID2PRONOM Conversion ({encoding})"
+        try:
+            mime = "application/json"
+        except IndexError:
+            mime = ""
+        try:
+            sequences = pronom.process_markers(
+                copy.deepcopy(markers),
+                increment_id,
+                encoding=encoding,
+            )
+        except pronom.UnprocessableEntity as err:
+            logger.error(
+                "%s %s: cannot handle: %s",
+                json_puid,
+                name_,
+                err,
+            )
+            for err_marker in markers:
+                logger.debug("--- START ---")
+                logger.debug("marker: %s", err_marker)
+                logger.debug("---  END  ---")
+            continue
+        fmt = pronom.Format(
+            id=increment_id,
+            name=name_,
+            version="",
+            puid=json_puid,
+            mime=mime,
+            classification="structured text",
+            external_signatures=[
+                pronom.ExternalSignature(
+                    id=increment_id,
+                    signature="json",
+                    type=pronom.EXT,
+                )
+            ],
+            internal_signatures=sequences,
+            priorities=list(set(priorities)),
+        )
+        priorities.append(f"{increment_id}")
+        formats.append(fmt)
+
+    pronom.process_formats_to_stdout(formats)
+
+
+def main() -> None:
+    """Primary entry point for this script."""
+    parser = argparse.ArgumentParser(
+        prog="jsonid2pronom",
+        description="convert JSONID compatible markers to PRONOM",
+        epilog="for more information visit https://github.com/ffdev-info/jsonid",
+    )
+    parser.add_argument(
+        "--debug",
+        help="use debug loggng",
+        required=False,
+        action="store_true",
+    )
+    parser.add_argument(
+        "--path",
+        "-p",
+        help="file path to process",
+        required=False,
+    )
+    args = parser.parse_args()
+    logging.getLogger(__name__).setLevel(logging.DEBUG if args.debug else logging.INFO)
+    logger.debug("debug logging is configured")
+    if not args.path:
+        parser.print_help(sys.stderr)
+        sys.exit()
+    asyncio.run(
+        output_signature(
+            path=args.path,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

From b064c3e888d261878f8063690baaa9e4e3532fff Mon Sep 17 00:00:00 2001
From: ross-spencer <all.along.the.watchtower2001+github@gmail.com>
Date: Sun, 4 Jan 2026 17:26:21 +0100
Subject: [PATCH 4/7] Update docs

---
 docs/jsonid/export.html         |  145 ++++-
 docs/jsonid/export_helpers.html |  118 ++++
 docs/jsonid/helpers.html        |   14 +-
 docs/jsonid/index.html          |   10 +
 docs/jsonid/jsonid.html         |   11 +-
 docs/jsonid/pronom.html         | 1040 +++++++++++++++++++++++++++++++
 docs/registry/index.htm         |    8 +-
 docs/utils/index.html           |    6 +
 docs/utils/json2json.html       |    4 +-
 docs/utils/jsonid2pronom.html   |  211 +++++++
 10 files changed, 1549 insertions(+), 18 deletions(-)
 create mode 100644 docs/jsonid/export_helpers.html
 create mode 100644 docs/jsonid/pronom.html
 create mode 100644 docs/utils/jsonid2pronom.html

diff --git a/docs/jsonid/export.html b/docs/jsonid/export.html
index c85574f..52b34c9 100644
--- a/docs/jsonid/export.html
+++ b/docs/jsonid/export.html
@@ -55,7 +55,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 </summary>
 <pre><code class="python">def exportJSON() -&gt; None:  # pylint: disable=C0103
     &#34;&#34;&#34;Export to JSON.&#34;&#34;&#34;
-    logger.debug(&#34;exporting registry ad JSON&#34;)
+    logger.debug(&#34;exporting registry as JSON&#34;)
     data = registry_data.registry()
     json_obj = []
     id_ = {
@@ -74,9 +74,144 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 </details>
 <div class="desc"><p>Export to JSON.</p></div>
 </dd>
+<dt id="src.jsonid.export.export_pronom"><code class="name flex">
+<span>def <span class="ident">export_pronom</span></span>(<span>) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def export_pronom() -&gt; None:
+    &#34;&#34;&#34;Export a PRONOM compatible set of signatures.
+
+    Export is done in two phases. A set of proposed &#34;Baseline&#34; JSON
+    signatures to catch many JSON instances.
+
+    Second the JSONID registry is exported.
+
+    Every export has a priority over the other so that there should
+    be no multiple identification results.
+    &#34;&#34;&#34;
+
+    # pylint: disable=R0914; too-many local variables.
+
+    logger.debug(&#34;exporting registry as PRONOM&#34;)
+
+    reg_data = registry_data.registry()
+    formats = []
+
+    encodings = (&#34;UTF-8&#34;, &#34;UTF-16&#34;, &#34;UTF-16BE&#34;, &#34;UTF-32LE&#34;)
+    priorities = []
+
+    increment_id = 0
+
+    for encoding in encodings:
+        all_baseline = pronom.create_baseline_json_sequences(encoding)
+        for baseline in all_baseline:
+            increment_id += 1
+            fmt = pronom.Format(
+                id=increment_id,
+                name=f&#34;JSON (Baseline - fmt/817) ({encoding})&#34;,
+                version=&#34;&#34;,
+                puid=&#34;jsonid:0000&#34;,
+                mime=&#34;application/json&#34;,
+                classification=&#34;structured text&#34;,
+                external_signatures=[
+                    pronom.ExternalSignature(
+                        id=increment_id,
+                        signature=&#34;json&#34;,
+                        type=pronom.EXT,
+                    )
+                ],
+                internal_signatures=[baseline],
+                priorities=priorities,
+            )
+            priorities.append(f&#34;{increment_id}&#34;)
+            formats.append(fmt)
+
+    for encoding in encodings:
+        for entry in reg_data:
+            increment_id += 1
+            json_puid = f&#34;{entry.json()[&#39;identifier&#39;]};{encoding}&#34;
+            name_ = f&#34;{entry.json()[&#39;name&#39;][0][&#39;@en&#39;]} ({encoding})&#34;
+            markers = entry.json()[&#34;markers&#34;]
+            try:
+                mime = entry.json()[&#34;mime&#34;][0]
+            except IndexError:
+                mime = &#34;&#34;
+            try:
+                sequences = pronom.process_markers(
+                    copy.deepcopy(markers),
+                    increment_id,
+                    encoding=encoding,
+                )
+            except pronom.UnprocessableEntity as err:
+                logger.error(
+                    &#34;%s %s: cannot handle: %s&#34;,
+                    json_puid,
+                    name_,
+                    err,
+                )
+                for marker in markers:
+                    logger.debug(&#34;--- START ---&#34;)
+                    logger.debug(&#34;marker: %s&#34;, marker)
+                    logger.debug(&#34;---  END  ---&#34;)
+                continue
+            fmt = pronom.Format(
+                id=increment_id,
+                name=name_,
+                version=&#34;&#34;,
+                puid=json_puid,
+                mime=mime,
+                classification=&#34;structured text&#34;,
+                external_signatures=[
+                    pronom.ExternalSignature(
+                        id=increment_id,
+                        signature=&#34;json&#34;,
+                        type=pronom.EXT,
+                    )
+                ],
+                internal_signatures=sequences,
+                priorities=copy.deepcopy(list(set(priorities))),
+            )
+            priorities.append(f&#34;{increment_id}&#34;)
+            formats.append(fmt)
+
+    pronom.process_formats_and_save(formats, PRONOM_FILENAME)</code></pre>
+</details>
+<div class="desc"><p>Export a PRONOM compatible set of signatures.</p>
+<p>Export is done in two phases. A set of proposed "Baseline" JSON
+signatures to catch many JSON instances.</p>
+<p>Second the JSONID registry is exported.</p>
+<p>Every export has a priority over the other so that there should
+be no multiple identification results.</p></div>
+</dd>
 </dl>
 </section>
 <section>
+<h2 class="section-title" id="header-classes">Classes</h2>
+<dl>
+<dt id="src.jsonid.export.PRONOMException"><code class="flex name class">
+<span>class <span class="ident">PRONOMException</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class PRONOMException(Exception):
+    &#34;&#34;&#34;Exception class if we can&#39;t create a PRONOM signature as expected.&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Exception class if we can't create a PRONOM signature as expected.</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+</dl>
 </section>
 </article>
 <nav id="sidebar">
@@ -92,6 +227,14 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 <li><h3><a href="#header-functions">Functions</a></h3>
 <ul class="">
 <li><code><a title="src.jsonid.export.exportJSON" href="#src.jsonid.export.exportJSON">exportJSON</a></code></li>
+<li><code><a title="src.jsonid.export.export_pronom" href="#src.jsonid.export.export_pronom">export_pronom</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-classes">Classes</a></h3>
+<ul>
+<li>
+<h4><code><a title="src.jsonid.export.PRONOMException" href="#src.jsonid.export.PRONOMException">PRONOMException</a></code></h4>
+</li>
 </ul>
 </li>
 </ul>
diff --git a/docs/jsonid/export_helpers.html b/docs/jsonid/export_helpers.html
new file mode 100644
index 0000000..4e7a399
--- /dev/null
+++ b/docs/jsonid/export_helpers.html
@@ -0,0 +1,118 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.6">
+<title>src.jsonid.export_helpers API documentation</title>
+<meta name="description" content="Helpers for the export functions.">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>src.jsonid.export_helpers</code></h1>
+</header>
+<section id="section-intro">
+<p>Helpers for the export functions.</p>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="src.jsonid.export_helpers.get_utc_timestamp_now"><code class="name flex">
+<span>def <span class="ident">get_utc_timestamp_now</span></span>(<span>)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def get_utc_timestamp_now():
+    &#34;&#34;&#34;Get a formatted UTC timestamp for &#39;now&#39; that can be used when
+    a timestamp is needed.
+    &#34;&#34;&#34;
+    return datetime.datetime.now(timezone.utc).strftime(UTC_TIME_FORMAT)</code></pre>
+</details>
+<div class="desc"><p>Get a formatted UTC timestamp for 'now' that can be used when
+a timestamp is needed.</p></div>
+</dd>
+<dt id="src.jsonid.export_helpers.new_prettify"><code class="name flex">
+<span>def <span class="ident">new_prettify</span></span>(<span>c)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def new_prettify(c):
+    &#34;&#34;&#34;Remove excess newlines from DOM output.
+
+    via: https://stackoverflow.com/a/14493981
+    &#34;&#34;&#34;
+    reparsed = parseString(c)
+    return &#34;\n&#34;.join(
+        [
+            line
+            for line in reparsed.toprettyxml(indent=&#34; &#34; * 2).split(&#34;\n&#34;)
+            if line.strip()
+        ]
+    )</code></pre>
+</details>
+<div class="desc"><p>Remove excess newlines from DOM output.</p>
+<p>via: <a href="https://stackoverflow.com/a/14493981">https://stackoverflow.com/a/14493981</a></p></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="src.jsonid" href="index.html">src.jsonid</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="src.jsonid.export_helpers.get_utc_timestamp_now" href="#src.jsonid.export_helpers.get_utc_timestamp_now">get_utc_timestamp_now</a></code></li>
+<li><code><a title="src.jsonid.export_helpers.new_prettify" href="#src.jsonid.export_helpers.new_prettify">new_prettify</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.6</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/jsonid/helpers.html b/docs/jsonid/helpers.html
index d0e4dc2..e651af1 100644
--- a/docs/jsonid/helpers.html
+++ b/docs/jsonid/helpers.html
@@ -273,19 +273,19 @@ <h2 class="section-title" id="header-functions">Functions</h2>
     # pylint: disable=R0911
 
     if replace_me.__name__ == &#34;dict&#34;:
-        return &#34;map&#34;
+        return TYPE_MAP
     if replace_me.__name__ == &#34;int&#34;:
-        return &#34;integer&#34;
+        return TYPE_INTEGER
     if replace_me.__name__ == &#34;list&#34;:
-        return &#34;list&#34;
+        return TYPE_LIST
     if replace_me.__name__ == &#34;str&#34;:
-        return &#34;string&#34;
+        return TYPE_STRING
     if replace_me.__name__ == &#34;float&#34;:
-        return &#34;float&#34;
+        return TYPE_FLOAT
     if replace_me.__name__ == &#34;bool&#34;:
-        return &#34;bool&#34;
+        return TYPE_BOOL
     if replace_me.__name__ == &#34;NoneType&#34;:
-        return &#34;NoneType&#34;
+        return TYPE_NONE
     if not isinstance(replace_me, type):
         pass
     return replace_me</code></pre>
diff --git a/docs/jsonid/index.html b/docs/jsonid/index.html
index 3a3f96e..02e4e2d 100644
--- a/docs/jsonid/index.html
+++ b/docs/jsonid/index.html
@@ -56,6 +56,10 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <dd>
 <div class="desc"><p>Functions to support export.</p></div>
 </dd>
+<dt><code class="name"><a title="src.jsonid.export_helpers" href="export_helpers.html">src.jsonid.export_helpers</a></code></dt>
+<dd>
+<div class="desc"><p>Helpers for the export functions.</p></div>
+</dd>
 <dt><code class="name"><a title="src.jsonid.file_processing" href="file_processing.html">src.jsonid.file_processing</a></code></dt>
 <dd>
 <div class="desc"><p>File processing functions.</p></div>
@@ -80,6 +84,10 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <dd>
 <div class="desc"><p>Functions for output of results.</p></div>
 </dd>
+<dt><code class="name"><a title="src.jsonid.pronom" href="pronom.html">src.jsonid.pronom</a></code></dt>
+<dd>
+<div class="desc"><p>PRONOM export routines …</p></div>
+</dd>
 <dt><code class="name"><a title="src.jsonid.registry" href="registry.html">src.jsonid.registry</a></code></dt>
 <dd>
 <div class="desc"><p>JSON registry processor.</p></div>
@@ -125,12 +133,14 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <li><code><a title="src.jsonid.base_obj_presets" href="base_obj_presets.html">src.jsonid.base_obj_presets</a></code></li>
 <li><code><a title="src.jsonid.compressionlib" href="compressionlib.html">src.jsonid.compressionlib</a></code></li>
 <li><code><a title="src.jsonid.export" href="export.html">src.jsonid.export</a></code></li>
+<li><code><a title="src.jsonid.export_helpers" href="export_helpers.html">src.jsonid.export_helpers</a></code></li>
 <li><code><a title="src.jsonid.file_processing" href="file_processing.html">src.jsonid.file_processing</a></code></li>
 <li><code><a title="src.jsonid.helpers" href="helpers.html">src.jsonid.helpers</a></code></li>
 <li><code><a title="src.jsonid.htm_template" href="htm_template.html">src.jsonid.htm_template</a></code></li>
 <li><code><a title="src.jsonid.jsonid" href="jsonid.html">src.jsonid.jsonid</a></code></li>
 <li><code><a title="src.jsonid.lookup" href="lookup.html">src.jsonid.lookup</a></code></li>
 <li><code><a title="src.jsonid.output" href="output.html">src.jsonid.output</a></code></li>
+<li><code><a title="src.jsonid.pronom" href="pronom.html">src.jsonid.pronom</a></code></li>
 <li><code><a title="src.jsonid.registry" href="registry.html">src.jsonid.registry</a></code></li>
 <li><code><a title="src.jsonid.registry_class" href="registry_class.html">src.jsonid.registry_class</a></code></li>
 <li><code><a title="src.jsonid.registry_data" href="registry_data.html">src.jsonid.registry_data</a></code></li>
diff --git a/docs/jsonid/jsonid.html b/docs/jsonid/jsonid.html
index 401ff40..f217201 100644
--- a/docs/jsonid/jsonid.html
+++ b/docs/jsonid/jsonid.html
@@ -84,7 +84,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
     # pylint: disable=R0912,R0915
 
     parser = argparse.ArgumentParser(
-        prog=&#34;json-id&#34;,
+        prog=&#34;jsonid&#34;,
         description=&#34;proof-of-concept identifier for JSON objects on disk based on identifying valid objects and their key-values&#34;,
         epilog=&#34;for more information visit https://github.com/ffdev-info/json-id&#34;,
     )
@@ -142,10 +142,12 @@ <h2 class="section-title" id="header-functions">Functions</h2>
         help=&#34;path to a custom registry to lead into memory replacing the default&#34;,
         required=False,
     )
+    # NB. consider output to stdout once the feature is more stable.
     parser.add_argument(
         &#34;--pronom&#34;,
-        help=&#34;return a PRONOM-centric view of the results&#34;,
+        help=f&#34;return a PRONOM-centric view of the results to `{export.PRONOM_FILENAME}` (BETA)&#34;,
         required=False,
+        action=&#34;store_true&#34;,
     )
     parser.add_argument(
         &#34;--export&#34;,
@@ -155,7 +157,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
     )
     parser.add_argument(
         &#34;--check&#34;,
-        help=&#34;check the registry entrues are correct&#34;,
+        help=&#34;check the registry entries are correct&#34;,
         required=False,
         action=&#34;store_true&#34;,
     )
@@ -206,7 +208,8 @@ <h2 class="section-title" id="header-functions">Functions</h2>
     if args.registry:
         raise NotImplementedError(&#34;custom registry is not yet available&#34;)
     if args.pronom:
-        raise NotImplementedError(&#34;pronom view is not yet implemented&#34;)
+        export.export_pronom()
+        sys.exit()
     if args.language:
         raise NotImplementedError(&#34;multiple languages are not yet implemented&#34;)
     if args.export:
diff --git a/docs/jsonid/pronom.html b/docs/jsonid/pronom.html
new file mode 100644
index 0000000..77e8db0
--- /dev/null
+++ b/docs/jsonid/pronom.html
@@ -0,0 +1,1040 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.6">
+<title>src.jsonid.pronom API documentation</title>
+<meta name="description" content="PRONOM export routines …">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>src.jsonid.pronom</code></h1>
+</header>
+<section id="section-intro">
+<p>PRONOM export routines.</p>
+<p>XML tooling: <a href="https://xmllint.com/">https://xmllint.com/</a></p>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="src.jsonid.pronom.calculate_variable_off_bof"><code class="name flex">
+<span>def <span class="ident">calculate_variable_off_bof</span></span>(<span>item: <a title="src.jsonid.pronom.ByteSequence" href="#src.jsonid.pronom.ByteSequence">ByteSequence</a>)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def calculate_variable_off_bof(item: ByteSequence):
+    &#34;&#34;&#34;Given variable offsets, calculate the correct syntax.&#34;&#34;&#34;
+    seq = item.value
+    if (
+        item.min_off != &#34;&#34;
+        and int(item.min_off) &gt; 0
+        and item.max_off != &#34;&#34;
+        and int(item.max_off) &gt; 0
+    ):
+        seq = f&#34;{{{item.min_off}-{int(item.min_off)+int(item.max_off)}}}{seq}&#34;
+    elif item.max_off != &#34;&#34; and int(item.max_off) &gt; 0:
+        seq = f&#34;{{0-{item.max_off}}}{seq}&#34;
+    elif item.min_off != &#34;&#34; and int(item.min_off) &gt; 0:
+        seq = f&#34;{{{item.min_off}}}{seq}&#34;
+    return seq</code></pre>
+</details>
+<div class="desc"><p>Given variable offsets, calculate the correct syntax.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.calculate_variable_off_eof"><code class="name flex">
+<span>def <span class="ident">calculate_variable_off_eof</span></span>(<span>item: <a title="src.jsonid.pronom.ByteSequence" href="#src.jsonid.pronom.ByteSequence">ByteSequence</a>)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def calculate_variable_off_eof(item: ByteSequence):
+    &#34;&#34;&#34;Given variable offsets, calculate the correct syntax.&#34;&#34;&#34;
+    seq = item.value
+    if (
+        item.min_off != &#34;&#34;
+        and int(item.min_off) &gt; 0
+        and item.max_off != &#34;&#34;
+        and int(item.max_off) &gt; 0
+    ):
+        seq = f&#34;{seq}{{{item.min_off}-{int(item.min_off)+int(item.max_off)}}}&#34;
+    elif item.max_off != &#34;&#34; and int(item.max_off) &gt; 0:
+        seq = f&#34;{seq}{{0-{item.max_off}}}&#34;
+    elif item.min_off != &#34;&#34; and int(item.min_off) &gt; 0:
+        seq = f&#34;{seq}{{{item.min_off}}}&#34;
+    return seq</code></pre>
+</details>
+<div class="desc"><p>Given variable offsets, calculate the correct syntax.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.convert_marker_to_signature_sequence"><code class="name flex">
+<span>def <span class="ident">convert_marker_to_signature_sequence</span></span>(<span>marker: dict, encoding: str) ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def convert_marker_to_signature_sequence(marker: dict, encoding: str) -&gt; str:
+    &#34;&#34;&#34;Convert a JSONID marker into a signature sequence.&#34;&#34;&#34;
+
+    # pylint: disable=R0914; too-many local variables.
+    # pylint: disable=R0911; too-many return statements.
+    # pylint: disable=R0915; too-many statements.
+
+    logger.debug(&#34;marker: %s&#34;, marker)
+
+    colon_encoded: Final[str] = encode_roundtrip(COLON, encoding)
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+    curly_open_encoded: Final[str] = encode_roundtrip(CURLY_OPEN, encoding)
+    curly_close_encoded: Final[str] = encode_roundtrip(CURLY_CLOSE, encoding)
+    colon_encoded: Final[str] = encode_roundtrip(COLON, encoding)
+    square_open_encoded: Final[str] = encode_roundtrip(SQUARE_OPEN, encoding)
+    square_close_encoded: Final[str] = encode_roundtrip(SQUARE_CLOSE, encoding)
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+
+    instruction = &#34;&#34;
+    if registry_matchers.MARKER_GOTO in marker.keys():
+        # GOTO KEY and match KEY.
+        goto_key = _str_to_hex_str(marker[&#34;GOTO&#34;])
+        key_at_goto = _str_to_hex_str(marker[&#34;KEY&#34;])
+        goto_encoded = quote_and_encode(goto_key, encoding)
+        key_encoded = quote_and_encode(key_at_goto, encoding)
+        instruction = f&#34;{goto_encoded}{WHITESPACE_REGEX}{colon_encoded}*{WHITESPACE_REGEX}{key_encoded}{WHITESPACE_REGEX}{colon_encoded}&#34;
+        marker.pop(&#34;GOTO&#34;)
+        marker.pop(&#34;KEY&#34;)
+        return instruction.upper()
+    if registry_matchers.MARKER_INDEX in marker.keys():
+        key = _str_to_hex_str(marker[&#34;KEY&#34;])
+        instruction = f&#34;{WHITESPACE_REGEX}{square_open_encoded}*{curly_open_encoded}*{double_quote_encoded}{encode_roundtrip(key, encoding)}{double_quote_encoded}{WHITESPACE_REGEX}{colon_encoded}*{curly_close_encoded}*{square_close_encoded}&#34;
+        marker.pop(&#34;INDEX&#34;)
+        marker.pop(&#34;KEY&#34;)
+        return instruction.upper()
+    if &#34;KEY&#34; in marker.keys():
+        key = _str_to_hex_str(marker[&#34;KEY&#34;])
+        instruction = quote_and_encode(key, encoding)
+        marker.pop(&#34;KEY&#34;)
+    if registry_matchers.MARKER_KEY_EXISTS in marker.keys():
+        instruction = f&#34;{instruction}{WHITESPACE_REGEX}{colon_encoded}&#34;.upper()
+        return instruction
+    if registry_matchers.MARKER_IS_TYPE in marker.keys():
+        is_type = _type_to_str(marker[&#34;ISTYPE&#34;], encoding=encoding)
+        type_val = (
+            f&#34;{instruction}{WHITESPACE_REGEX}{colon_encoded}{WHITESPACE_REGEX}{is_type}&#34;
+        )
+        return type_val.upper()
+    if registry_matchers.MARKER_IS in marker.keys():
+        marker_is = marker[&#34;IS&#34;]
+        if not isinstance(marker_is, str):
+            _complex_is_type(marker_is)
+        equals = _str_to_hex_str(marker_is)
+        is_val = f&#34;{instruction}{WHITESPACE_REGEX}{encode_roundtrip(equals, encoding)}&#34;
+        return is_val.upper()
+    if registry_matchers.MARKER_STARTSWITH in marker.keys():
+        starts_with = _str_to_hex_str(marker[&#34;STARTSWITH&#34;])
+        starts_with_val = f&#34;{instruction}{WHITESPACE_REGEX}{colon_encoded}{WHITESPACE_REGEX}{double_quote_encoded}{encode_roundtrip(starts_with, encoding)}&#34;
+        return starts_with_val.upper()
+    if registry_matchers.MARKER_ENDSWITH in marker.keys():
+        ends_with = _str_to_hex_str(marker[&#34;ENDSWITH&#34;])
+        ends_with_val = f&#34;{instruction}{WHITESPACE_REGEX}{colon_encoded}{WHITESPACE_REGEX}*{encode_roundtrip(ends_with, encoding)}{double_quote_encoded}&#34;
+        return ends_with_val.upper()
+    if registry_matchers.MARKER_CONTAINS in marker.keys():
+        contains = _str_to_hex_str(marker[&#34;CONTAINS&#34;])
+        contains_val = f&#34;{instruction}{WHITESPACE_REGEX}{colon_encoded}{WHITESPACE_REGEX}{double_quote_encoded}*{encode_roundtrip(contains, encoding)}*{double_quote_encoded}&#34;
+        return contains_val.upper()
+    if registry_matchers.MARKER_REGEX in marker.keys():
+        raise UnprocessableEntity(&#34;REGEX not yet implemented&#34;)
+    if registry_matchers.MARKER_KEY_NO_EXIST in marker.keys():
+        raise UnprocessableEntity(&#34;KEY NO EXIST not yet implemented&#34;)
+    # We should never arrive here. In the future clean this up so we
+    # only return when we have information.
+    return &#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Convert a JSONID marker into a signature sequence.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.create_baseline_json_sequences"><code class="name flex">
+<span>def <span class="ident">create_baseline_json_sequences</span></span>(<span>encoding: str)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def create_baseline_json_sequences(encoding: str):
+    &#34;&#34;&#34;Create baseline JSON sequences that match map and list types
+    with various different encodings.
+    &#34;&#34;&#34;
+
+    # pylint: disable=R0914; too-many local variables.
+
+    curly_open_encoded: Final[str] = encode_roundtrip(CURLY_OPEN, encoding)
+    curly_close_encoded: Final[str] = encode_roundtrip(CURLY_CLOSE, encoding)
+    square_open_encoded: Final[str] = encode_roundtrip(SQUARE_OPEN, encoding)
+    square_close_encoded: Final[str] = encode_roundtrip(SQUARE_CLOSE, encoding)
+
+    colon_encoded: Final[str] = encode_roundtrip(COLON, encoding)
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+
+    bof = f&#34;({curly_open_encoded}|{square_open_encoded})&#34;
+    eof = f&#34;({curly_close_encoded}|{square_close_encoded})&#34;
+
+    no_encoded: Final[str] = NUMBER_REGEX
+    true_encoded: Final[str] = encode_roundtrip(TRUE_VALUE, encoding)
+    false_encoded: Final[str] = encode_roundtrip(FALSE_VALUE, encoding)
+    null_encoded: Final[str] = encode_roundtrip(NULL_VALUE, encoding)
+
+    options = (
+        f&#34;{double_quote_encoded}{WHITESPACE_REGEX}{colon_encoded}&#34;,
+        no_encoded,
+        f&#34;({true_encoded}|{false_encoded})&#34;,
+        null_encoded,
+    )
+
+    sigs = []
+
+    for opt in options:
+
+        bs = []
+
+        bs.append(
+            ByteSequence(
+                id=1,
+                pos=&#34;BOF&#34;,
+                min_off=0,
+                max_off=f&#34;{DISK_SECTOR_SIZE}&#34;,
+                endian=&#34;&#34;,
+                value=bof,
+            )
+        )
+
+        bs.append(
+            ByteSequence(
+                id=1,
+                pos=&#34;VAR&#34;,
+                min_off=0,
+                max_off=0,
+                endian=&#34;&#34;,
+                value=opt,
+            )
+        )
+
+        bs.append(
+            ByteSequence(
+                id=1,
+                pos=&#34;EOF&#34;,
+                min_off=&#34;0&#34;,
+                max_off=f&#34;{DISK_SECTOR_SIZE}&#34;,
+                endian=&#34;&#34;,
+                value=eof,
+            )
+        )
+
+        iss = InternalSignature(
+            id=0,
+            name=&#34;&#34;,
+            byte_sequences=bs,
+        )
+
+        sigs.append(iss)
+
+    return sigs</code></pre>
+</details>
+<div class="desc"><p>Create baseline JSON sequences that match map and list types
+with various different encodings.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.create_file_format_collection"><code class="name flex">
+<span>def <span class="ident">create_file_format_collection</span></span>(<span>fmt: list[<a title="src.jsonid.pronom.Format" href="#src.jsonid.pronom.Format">Format</a>])</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def create_file_format_collection(fmt: list[Format]):
+    &#34;&#34;&#34;Create the FileFormatCollection object.
+
+    E.g.
+    ```
+        &lt;FileFormat ID=&#34;1&#34; Name=&#34;Development Signature&#34; PUID=&#34;dev/1&#34; Version=&#34;1.0&#34; MIMEType=&#34;application/octet-stream&#34;&gt;
+            &lt;InternalSignatureID&gt;1&lt;/InternalSignatureID&gt;
+            &lt;Extension&gt;ext&lt;/Extension&gt;
+        &lt;/FileFormat&gt;
+
+        &lt;FileFormat ID=&#34;49&#34; MIMEType=&#34;application/postscript&#34;  FormatType=&#34;Text (Structured)&#34;
+            Name=&#34;Adobe Illustrator&#34; PUID=&#34;x-fmt/20&#34; Version=&#34;1.0 / 1.1&#34;&gt;
+            &lt;InternalSignatureID&gt;880&lt;/InternalSignatureID&gt;
+            &lt;InternalSignatureID&gt;881&lt;/InternalSignatureID&gt;
+            &lt;Extension&gt;ai&lt;/Extension&gt;
+            &lt;HasPriorityOverFileFormatID&gt;86&lt;/HasPriorityOverFileFormatID&gt;
+            &lt;HasPriorityOverFileFormatID&gt;331&lt;/HasPriorityOverFileFormatID&gt;
+            &lt;HasPriorityOverFileFormatID&gt;332&lt;/HasPriorityOverFileFormatID&gt;
+            &lt;HasPriorityOverFileFormatID&gt;771&lt;/HasPriorityOverFileFormatID&gt;
+            &lt;HasPriorityOverFileFormatID&gt;773&lt;/HasPriorityOverFileFormatID&gt;
+        &lt;/FileFormat&gt;
+    ```
+
+    &#34;&#34;&#34;
+    internal_sigs = [
+        f&#34;&lt;InternalSignatureID&gt;{sig.id}&lt;/InternalSignatureID&gt;&#34;
+        for sig in fmt.internal_signatures
+    ]
+    external_sigs = [
+        f&#34;&lt;Extension&gt;{sig.signature}&lt;/Extension&gt;&#34;
+        for sig in fmt.external_signatures
+        if sig.type.lower() == EXT
+    ]
+    priorities = [
+        f&#34;&lt;HasPriorityOverFileFormatID&gt;{priority}&lt;/HasPriorityOverFileFormatID&gt;&#34;
+        for priority in fmt.priorities
+    ]
+    ff = f&#34;&#34;&#34;
+&lt;FileFormat ID=\&#34;{fmt.id}\&#34; Name=\&#34;{fmt.name}\&#34; PUID=\&#34;{fmt.puid}\&#34; Version=&#34;{fmt.version}&#34; MIMEType=\&#34;{fmt.mime}\&#34; FormatType=\&#34;{fmt.classification}\&#34; &gt;
+    {&#34;&#34;.join(internal_sigs).strip()}
+    {&#34;&#34;.join(external_sigs).strip()}
+    {&#34;&#34;.join(priorities).strip()}
+&lt;/FileFormat&gt;
+    &#34;&#34;&#34;
+    return ff.strip()</code></pre>
+</details>
+<div class="desc"><p>Create the FileFormatCollection object.</p>
+<p>E.g.</p>
+<pre><code>    &lt;FileFormat ID=&quot;1&quot; Name=&quot;Development Signature&quot; PUID=&quot;dev/1&quot; Version=&quot;1.0&quot; MIMEType=&quot;application/octet-stream&quot;&gt;
+        &lt;InternalSignatureID&gt;1&lt;/InternalSignatureID&gt;
+        &lt;Extension&gt;ext&lt;/Extension&gt;
+    &lt;/FileFormat&gt;
+
+    &lt;FileFormat ID=&quot;49&quot; MIMEType=&quot;application/postscript&quot;  FormatType=&quot;Text (Structured)&quot;
+        Name=&quot;Adobe Illustrator&quot; PUID=&quot;x-fmt/20&quot; Version=&quot;1.0 / 1.1&quot;&gt;
+        &lt;InternalSignatureID&gt;880&lt;/InternalSignatureID&gt;
+        &lt;InternalSignatureID&gt;881&lt;/InternalSignatureID&gt;
+        &lt;Extension&gt;ai&lt;/Extension&gt;
+        &lt;HasPriorityOverFileFormatID&gt;86&lt;/HasPriorityOverFileFormatID&gt;
+        &lt;HasPriorityOverFileFormatID&gt;331&lt;/HasPriorityOverFileFormatID&gt;
+        &lt;HasPriorityOverFileFormatID&gt;332&lt;/HasPriorityOverFileFormatID&gt;
+        &lt;HasPriorityOverFileFormatID&gt;771&lt;/HasPriorityOverFileFormatID&gt;
+        &lt;HasPriorityOverFileFormatID&gt;773&lt;/HasPriorityOverFileFormatID&gt;
+    &lt;/FileFormat&gt;
+</code></pre></div>
+</dd>
+<dt id="src.jsonid.pronom.create_many_to_one_byte_sequence"><code class="name flex">
+<span>def <span class="ident">create_many_to_one_byte_sequence</span></span>(<span>internal_signatures: list[<a title="src.jsonid.pronom.InternalSignature" href="#src.jsonid.pronom.InternalSignature">InternalSignature</a>])</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def create_many_to_one_byte_sequence(internal_signatures: list[InternalSignature]):
+    &#34;&#34;&#34;Create a many to one byte sequence, i.e. a format with multiple
+    Internal Signatures.
+    &#34;&#34;&#34;
+    internal_signature = &#34;&#34;
+    for internal in internal_signatures:
+        id_ = internal.id
+        bs = create_one_to_many_byte_sequence(internal.byte_sequences)
+        internal_signature = f&#34;&#34;&#34;
+{internal_signature}&lt;InternalSignature ID=\&#34;{id_}\&#34; Specificity=\&#34;Specific\&#34;&gt;
+    {bs}
+&lt;/InternalSignature&gt;
+        &#34;&#34;&#34;
+    return internal_signature.strip()</code></pre>
+</details>
+<div class="desc"><p>Create a many to one byte sequence, i.e. a format with multiple
+Internal Signatures.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.create_one_to_many_byte_sequence"><code class="name flex">
+<span>def <span class="ident">create_one_to_many_byte_sequence</span></span>(<span>byte_sequences: list[<a title="src.jsonid.pronom.ByteSequence" href="#src.jsonid.pronom.ByteSequence">ByteSequence</a>])</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def create_one_to_many_byte_sequence(byte_sequences: list[ByteSequence]):
+    &#34;&#34;&#34;Create a byte sequence object.&#34;&#34;&#34;
+    byte_sequence = &#34;&#34;
+    for item in byte_sequences:
+        seq = item.value
+        if item.pos.startswith(&#34;EOF&#34;):
+            seq = calculate_variable_off_eof(item)
+        elif item.pos.startswith(&#34;BOF&#34;):
+            seq = calculate_variable_off_bof(item)
+        byte_sequence = f&#34;&#34;&#34;
+{byte_sequence.strip()}
+    &lt;ByteSequence Reference=\&#34;{item.pos}\&#34; Sequence=\&#34;{seq}\&#34; MinOffset=\&#34;{item.min_off}\&#34; MaxOffset=\&#34;{item.max_off}\&#34;/&gt;
+        &#34;&#34;&#34;
+    return byte_sequence.strip()</code></pre>
+</details>
+<div class="desc"><p>Create a byte sequence object.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.encode_roundtrip"><code class="name flex">
+<span>def <span class="ident">encode_roundtrip</span></span>(<span>hexed_val: str, encoding: str) ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def encode_roundtrip(hexed_val: str, encoding: str) -&gt; str:
+    &#34;&#34;&#34;We want to get a plain-text byte-sequence into a new
+    encoding. It takes a few hops and skips.
+    &#34;&#34;&#34;
+    val = hexed_val.strip()
+    try:
+        re_encoded = binascii.unhexlify(hexed_val).decode(&#34;utf-8&#34;).encode(encoding)
+    except (binascii.Error, UnicodeDecodeError) as err:
+        logger.error(&#34;cannot convert: %s len: %s (&#39;%s&#39;)&#34;, hexed_val, len(val), err)
+        return val
+    hex_val = binascii.hexlify(re_encoded).decode().upper()
+    for bom in _get_bom():
+        if not hex_val.startswith(bom):
+            continue
+        return hex_val.replace(bom, &#34;&#34;)
+    return hex_val</code></pre>
+</details>
+<div class="desc"><p>We want to get a plain-text byte-sequence into a new
+encoding. It takes a few hops and skips.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.preprocess_goto_markers"><code class="name flex">
+<span>def <span class="ident">preprocess_goto_markers</span></span>(<span>markers: dict) ‑> list</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def preprocess_goto_markers(markers: dict) -&gt; list:
+    &#34;&#34;&#34;Preprocess markers to remove data that is otherwise duplicated
+    when converted to a PRONOM signature, e.g. GOTO.&#34;&#34;&#34;
+
+    out = []
+    for marker in markers:
+        if registry_matchers.MARKER_GOTO not in marker:
+            out.append(marker)
+            continue
+        key = marker.pop(&#34;GOTO&#34;)
+        new_marker = {&#34;KEY&#34;: key, &#34;EXISTS&#34;: None}
+        if new_marker not in out:
+            out.append(new_marker)
+        out.append(marker)
+    return out</code></pre>
+</details>
+<div class="desc"><p>Preprocess markers to remove data that is otherwise duplicated
+when converted to a PRONOM signature, e.g. GOTO.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.process_formats_and_save"><code class="name flex">
+<span>def <span class="ident">process_formats_and_save</span></span>(<span>formats: list[<a title="src.jsonid.pronom.Format" href="#src.jsonid.pronom.Format">Format</a>],<br>filename: str)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def process_formats_and_save(formats: list[Format], filename: str):
+    &#34;&#34;&#34;Process the collected formats and output a signature file.
+
+    NB. Given our dataclasses here, we have the opportunity to rework
+    this data into many new structures. We output XML because DROID
+    expects XML.
+    &#34;&#34;&#34;
+    prettier_xml = _process_formats(formats)
+    logger.info(&#34;outputting to: %s&#34;, filename)
+    with open(filename, &#34;w&#34;, encoding=&#34;utf=8&#34;) as output_file:
+        output_file.write(prettier_xml)</code></pre>
+</details>
+<div class="desc"><p>Process the collected formats and output a signature file.</p>
+<p>NB. Given our dataclasses here, we have the opportunity to rework
+this data into many new structures. We output XML because DROID
+expects XML.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.process_formats_to_stdout"><code class="name flex">
+<span>def <span class="ident">process_formats_to_stdout</span></span>(<span>formats: list[<a title="src.jsonid.pronom.Format" href="#src.jsonid.pronom.Format">Format</a>])</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def process_formats_to_stdout(formats: list[Format]):
+    &#34;&#34;&#34;Process the collected formats and output a signature file.
+
+    NB. Given our dataclasses here, we have the opportunity to rework
+    this data into many new structures. We output XML because DROID
+    expects XML.
+    &#34;&#34;&#34;
+    prettier_xml = _process_formats(formats)
+    logger.info(&#34;outputting to: stdout&#34;)
+    print(prettier_xml)</code></pre>
+</details>
+<div class="desc"><p>Process the collected formats and output a signature file.</p>
+<p>NB. Given our dataclasses here, we have the opportunity to rework
+this data into many new structures. We output XML because DROID
+expects XML.</p></div>
+</dd>
+<dt id="src.jsonid.pronom.process_markers"><code class="name flex">
+<span>def <span class="ident">process_markers</span></span>(<span>markers: list, sig_id: int, encoding: str = '') ‑> tuple[list | bool]</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def process_markers(
+    markers: list, sig_id: int, encoding: str = &#34;&#34;
+) -&gt; tuple[list | bool]:
+    &#34;&#34;&#34;Given a set of markers for a document type, process them into
+    a set of byte sequences and finally an internal signature sequence
+    that can be output as a PRONOM signature.
+
+    returns a tuple describing the processed value and a flag to
+    highlight the result is potentially lossless, e.g. in the case
+    of matching types, e.g. strings.
+
+    dict_keys([&#39;CONTAINS&#39;])
+    dict_keys([&#39;ENDSWITH&#39;])
+    dict_keys([&#39;IS&#39;]
+    dict_keys([&#39;ISTYPE&#39;])
+    dict_keys([&#39;STARTSWITH&#39;])
+
+    key(0-n):(0-n)value
+
+    Need to return something like:
+
+      &lt;ByteSequence Reference=&#34;BOFoffset&#34; Sequence=&#34;FFD8FFE0{2}4A464946000101(00|01|02)&#34; MinOffset=&#34;0&#34; MaxOffset=&#34;&#34;/&gt;
+
+    Different encodings need to be accounted for, e.g. (with added
+    whitespace below)
+
+    UTF-32-LE:
+
+        00000000: 2000 0000 2000 0000 2000 0000 2000 0000   ... ... ... ...
+        00000010: 2000 0000 2000 0000 0a00 0000 0a00 0000   ... ...........
+        00000020: 0a00 0000 0a00 0000 7b00 0000 2200 0000  ........{...&#34;...
+        00000030: 6100 0000 2200 0000 3a00 0000 2000 0000  a...&#34;...:... ...
+        00000040: 2200 0000 6200 0000 2200 0000 7d00 0000  &#34;...b...&#34;...}...
+        00000050: 0a00 0000                                ....
+
+    UTF-32-BE:
+
+        00000000: 0000 0020 0000 0020 0000 0020 0000 0020  ... ... ... ...
+        00000010: 0000 0020 0000 0020 0000 000a 0000 000a  ... ... ........
+        00000020: 0000 000a 0000 000a 0000 007b 0000 0022  ...........{...&#34;
+        00000030: 0000 0061 0000 0022 0000 003a 0000 0020  ...a...&#34;...:...
+        00000040: 0000 0022 0000 0062 0000 0022 0000 007d  ...&#34;...b...&#34;...}
+        00000050: 0000 000a                                ....
+
+
+    UTF-16-LE:
+
+        00000000: 2000 2000 2000 2000 2000 2000 0a00 0a00   . . . . . .....
+        00000010: 0a00 0a00 7b00 2200 6100 2200 3a00 2000  ....{.&#34;.a.&#34;.:. .
+        00000020: 2200 6200 2200 7d00 0a00                 &#34;.b.&#34;.}...
+
+    UTF-16-BE:
+
+        00000000: 0020 0020 0020 0020 0020 0020 000a 000a  . . . . . . ....
+        00000010: 000a 000a 007b 0022 0061 0022 003a 0020  .....{.&#34;.a.&#34;.:.
+        00000020: 0022 0062 0022 007d 000a                 .&#34;.b.&#34;.}..
+
+
+    &#34;&#34;&#34;
+
+    curly_open_encoded: Final[str] = encode_roundtrip(CURLY_OPEN, encoding)
+    curly_close_encoded: Final[str] = encode_roundtrip(CURLY_CLOSE, encoding)
+
+    sequences = []
+
+    markers = preprocess_goto_markers(markers)
+
+    for marker in markers:
+        sig_sequence = convert_marker_to_signature_sequence(marker, encoding)
+        sequences.append(sig_sequence)
+
+    byte_sequences = []
+
+    byte_sequences.append(
+        ByteSequence(
+            id=0,
+            pos=&#34;BOF&#34;,
+            min_off=0,
+            max_off=f&#34;{DISK_SECTOR_SIZE}&#34;,
+            endian=&#34;&#34;,
+            value=curly_open_encoded,
+        )
+    )
+
+    for idx, item in enumerate(sequences, 0):
+        logger.debug(&#34;%s. %s&#34;, idx, item)
+        byte_sequence = ByteSequence(
+            id=idx,
+            pos=&#34;VAR&#34;,
+            min_off=&#34;&#34;,
+            max_off=&#34;&#34;,
+            endian=&#34;&#34;,
+            value=item,
+        )
+        byte_sequences.append(byte_sequence)
+
+    byte_sequences.append(
+        ByteSequence(
+            id=0,
+            pos=&#34;EOF&#34;,
+            min_off=&#34;0&#34;,
+            max_off=f&#34;{DISK_SECTOR_SIZE}&#34;,
+            endian=&#34;&#34;,
+            value=curly_close_encoded,
+        )
+    )
+
+    internal_signature = InternalSignature(
+        id=sig_id,
+        name=&#34;&#34;,
+        byte_sequences=byte_sequences,
+    )
+
+    return [internal_signature]</code></pre>
+</details>
+<div class="desc"><p>Given a set of markers for a document type, process them into
+a set of byte sequences and finally an internal signature sequence
+that can be output as a PRONOM signature.</p>
+<p>returns a tuple describing the processed value and a flag to
+highlight the result is potentially lossless, e.g. in the case
+of matching types, e.g. strings.</p>
+<p>dict_keys(['CONTAINS'])
+dict_keys(['ENDSWITH'])
+dict_keys(['IS']
+dict_keys(['ISTYPE'])
+dict_keys(['STARTSWITH'])</p>
+<p>key(0-n):(0-n)value</p>
+<p>Need to return something like:</p>
+<p><ByteSequence Reference="BOFoffset" Sequence="FFD8FFE0{2}4A464946000101(00|01|02)" MinOffset="0" MaxOffset=""/></p>
+<p>Different encodings need to be accounted for, e.g. (with added
+whitespace below)</p>
+<p>UTF-32-LE:</p>
+<pre><code>00000000: 2000 0000 2000 0000 2000 0000 2000 0000   ... ... ... ...
+00000010: 2000 0000 2000 0000 0a00 0000 0a00 0000   ... ...........
+00000020: 0a00 0000 0a00 0000 7b00 0000 2200 0000  ........{..."...
+00000030: 6100 0000 2200 0000 3a00 0000 2000 0000  a..."...:... ...
+00000040: 2200 0000 6200 0000 2200 0000 7d00 0000  "...b..."...}...
+00000050: 0a00 0000                                ....
+</code></pre>
+<p>UTF-32-BE:</p>
+<pre><code>00000000: 0000 0020 0000 0020 0000 0020 0000 0020  ... ... ... ...
+00000010: 0000 0020 0000 0020 0000 000a 0000 000a  ... ... ........
+00000020: 0000 000a 0000 000a 0000 007b 0000 0022  ...........{..."
+00000030: 0000 0061 0000 0022 0000 003a 0000 0020  ...a..."...:...
+00000040: 0000 0022 0000 0062 0000 0022 0000 007d  ..."...b..."...}
+00000050: 0000 000a                                ....
+</code></pre>
+<p>UTF-16-LE:</p>
+<pre><code>00000000: 2000 2000 2000 2000 2000 2000 0a00 0a00   . . . . . .....
+00000010: 0a00 0a00 7b00 2200 6100 2200 3a00 2000  ....{.".a.".:. .
+00000020: 2200 6200 2200 7d00 0a00                 ".b.".}...
+</code></pre>
+<p>UTF-16-BE:</p>
+<pre><code>00000000: 0020 0020 0020 0020 0020 0020 000a 000a  . . . . . . ....
+00000010: 000a 000a 007b 0022 0061 0022 003a 0020  .....{.".a.".:.
+00000020: 0022 0062 0022 007d 000a                 .".b.".}..
+</code></pre></div>
+</dd>
+<dt id="src.jsonid.pronom.quote_and_encode"><code class="name flex">
+<span>def <span class="ident">quote_and_encode</span></span>(<span>value, encoding) ‑> str</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def quote_and_encode(value, encoding) -&gt; str:
+    &#34;&#34;&#34;Quote and encode a given value.&#34;&#34;&#34;
+
+    double_quote_encoded: Final[str] = encode_roundtrip(DOUBLE_QUOTE, encoding)
+    # return f&#34;{double_quote_encoded}{value}{double_quote_encoded}&#34;
+    return f&#34;{double_quote_encoded}{encode_roundtrip(value, encoding)}{double_quote_encoded}&#34;</code></pre>
+</details>
+<div class="desc"><p>Quote and encode a given value.</p></div>
+</dd>
+</dl>
+</section>
+<section>
+<h2 class="section-title" id="header-classes">Classes</h2>
+<dl>
+<dt id="src.jsonid.pronom.ByteSequence"><code class="flex name class">
+<span>class <span class="ident">ByteSequence</span></span>
+<span>(</span><span>id: str, pos: str, min_off: str, max_off: str, endian: str, value: str)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@dataclass
+class ByteSequence:
+    id: str
+    pos: str
+    min_off: str
+    max_off: str
+    endian: str
+    value: str</code></pre>
+</details>
+<div class="desc"><p>ByteSequence(id: str, pos: str, min_off: str, max_off: str, endian: str, value: str)</p></div>
+<h3>Instance variables</h3>
+<dl>
+<dt id="src.jsonid.pronom.ByteSequence.endian"><code class="name">var <span class="ident">endian</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.ByteSequence.id"><code class="name">var <span class="ident">id</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.ByteSequence.max_off"><code class="name">var <span class="ident">max_off</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.ByteSequence.min_off"><code class="name">var <span class="ident">min_off</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.ByteSequence.pos"><code class="name">var <span class="ident">pos</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.ByteSequence.value"><code class="name">var <span class="ident">value</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+<dt id="src.jsonid.pronom.ExternalSignature"><code class="flex name class">
+<span>class <span class="ident">ExternalSignature</span></span>
+<span>(</span><span>id: str, signature: str, type: str)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@dataclass
+class ExternalSignature:
+    id: str
+    signature: str
+    type: str</code></pre>
+</details>
+<div class="desc"><p>ExternalSignature(id: str, signature: str, type: str)</p></div>
+<h3>Instance variables</h3>
+<dl>
+<dt id="src.jsonid.pronom.ExternalSignature.id"><code class="name">var <span class="ident">id</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.ExternalSignature.signature"><code class="name">var <span class="ident">signature</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.ExternalSignature.type"><code class="name">var <span class="ident">type</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+<dt id="src.jsonid.pronom.Format"><code class="flex name class">
+<span>class <span class="ident">Format</span></span>
+<span>(</span><span>id: str,<br>name: str,<br>version: str,<br>puid: str,<br>mime: str,<br>classification: str,<br>external_signatures: list[<a title="src.jsonid.pronom.ExternalSignature" href="#src.jsonid.pronom.ExternalSignature">ExternalSignature</a>],<br>internal_signatures: list[<a title="src.jsonid.pronom.InternalSignature" href="#src.jsonid.pronom.InternalSignature">InternalSignature</a>],<br>priorities: list[int])</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@dataclass
+class Format:  # pylint: disable=R0902
+    id: str
+    name: str
+    version: str
+    puid: str
+    mime: str
+    classification: str
+    external_signatures: list[ExternalSignature]
+    internal_signatures: list[InternalSignature]
+    priorities: list[int]</code></pre>
+</details>
+<div class="desc"><p>Format(id: str, name: str, version: str, puid: str, mime: str, classification: str, external_signatures: list[src.jsonid.pronom.ExternalSignature], internal_signatures: list[src.jsonid.pronom.InternalSignature], priorities: list[int])</p></div>
+<h3>Instance variables</h3>
+<dl>
+<dt id="src.jsonid.pronom.Format.classification"><code class="name">var <span class="ident">classification</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Format.external_signatures"><code class="name">var <span class="ident">external_signatures</span> : list[<a title="src.jsonid.pronom.ExternalSignature" href="#src.jsonid.pronom.ExternalSignature">ExternalSignature</a>]</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Format.id"><code class="name">var <span class="ident">id</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Format.internal_signatures"><code class="name">var <span class="ident">internal_signatures</span> : list[<a title="src.jsonid.pronom.InternalSignature" href="#src.jsonid.pronom.InternalSignature">InternalSignature</a>]</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Format.mime"><code class="name">var <span class="ident">mime</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Format.name"><code class="name">var <span class="ident">name</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Format.priorities"><code class="name">var <span class="ident">priorities</span> : list[int]</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Format.puid"><code class="name">var <span class="ident">puid</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Format.version"><code class="name">var <span class="ident">version</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+<dt id="src.jsonid.pronom.Identifier"><code class="flex name class">
+<span>class <span class="ident">Identifier</span></span>
+<span>(</span><span>type: str, value: str)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@dataclass
+class Identifier:
+    type: str
+    value: str</code></pre>
+</details>
+<div class="desc"><p>Identifier(type: str, value: str)</p></div>
+<h3>Instance variables</h3>
+<dl>
+<dt id="src.jsonid.pronom.Identifier.type"><code class="name">var <span class="ident">type</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Identifier.value"><code class="name">var <span class="ident">value</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+<dt id="src.jsonid.pronom.InternalSignature"><code class="flex name class">
+<span>class <span class="ident">InternalSignature</span></span>
+<span>(</span><span>id: str,<br>name: str,<br>byte_sequences: list[<a title="src.jsonid.pronom.ByteSequence" href="#src.jsonid.pronom.ByteSequence">ByteSequence</a>])</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@dataclass
+class InternalSignature:
+    id: str
+    name: str
+    byte_sequences: list[ByteSequence]</code></pre>
+</details>
+<div class="desc"><p>InternalSignature(id: str, name: str, byte_sequences: list[src.jsonid.pronom.ByteSequence])</p></div>
+<h3>Instance variables</h3>
+<dl>
+<dt id="src.jsonid.pronom.InternalSignature.byte_sequences"><code class="name">var <span class="ident">byte_sequences</span> : list[<a title="src.jsonid.pronom.ByteSequence" href="#src.jsonid.pronom.ByteSequence">ByteSequence</a>]</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.InternalSignature.id"><code class="name">var <span class="ident">id</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.InternalSignature.name"><code class="name">var <span class="ident">name</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+<dt id="src.jsonid.pronom.Priority"><code class="flex name class">
+<span>class <span class="ident">Priority</span></span>
+<span>(</span><span>type: str, id: str)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">@dataclass
+class Priority:
+    type: str
+    id: str</code></pre>
+</details>
+<div class="desc"><p>Priority(type: str, id: str)</p></div>
+<h3>Instance variables</h3>
+<dl>
+<dt id="src.jsonid.pronom.Priority.id"><code class="name">var <span class="ident">id</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+<dt id="src.jsonid.pronom.Priority.type"><code class="name">var <span class="ident">type</span> : str</code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
+</dl>
+</dd>
+<dt id="src.jsonid.pronom.UnprocessableEntity"><code class="flex name class">
+<span>class <span class="ident">UnprocessableEntity</span></span>
+<span>(</span><span>*args, **kwargs)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">class UnprocessableEntity(Exception):
+    &#34;&#34;&#34;Provide a way to give complete feedback to the caller to allow
+    it to exit.&#34;&#34;&#34;</code></pre>
+</details>
+<div class="desc"><p>Provide a way to give complete feedback to the caller to allow
+it to exit.</p></div>
+<h3>Ancestors</h3>
+<ul class="hlist">
+<li>builtins.Exception</li>
+<li>builtins.BaseException</li>
+</ul>
+</dd>
+</dl>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="src.jsonid" href="index.html">src.jsonid</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="src.jsonid.pronom.calculate_variable_off_bof" href="#src.jsonid.pronom.calculate_variable_off_bof">calculate_variable_off_bof</a></code></li>
+<li><code><a title="src.jsonid.pronom.calculate_variable_off_eof" href="#src.jsonid.pronom.calculate_variable_off_eof">calculate_variable_off_eof</a></code></li>
+<li><code><a title="src.jsonid.pronom.convert_marker_to_signature_sequence" href="#src.jsonid.pronom.convert_marker_to_signature_sequence">convert_marker_to_signature_sequence</a></code></li>
+<li><code><a title="src.jsonid.pronom.create_baseline_json_sequences" href="#src.jsonid.pronom.create_baseline_json_sequences">create_baseline_json_sequences</a></code></li>
+<li><code><a title="src.jsonid.pronom.create_file_format_collection" href="#src.jsonid.pronom.create_file_format_collection">create_file_format_collection</a></code></li>
+<li><code><a title="src.jsonid.pronom.create_many_to_one_byte_sequence" href="#src.jsonid.pronom.create_many_to_one_byte_sequence">create_many_to_one_byte_sequence</a></code></li>
+<li><code><a title="src.jsonid.pronom.create_one_to_many_byte_sequence" href="#src.jsonid.pronom.create_one_to_many_byte_sequence">create_one_to_many_byte_sequence</a></code></li>
+<li><code><a title="src.jsonid.pronom.encode_roundtrip" href="#src.jsonid.pronom.encode_roundtrip">encode_roundtrip</a></code></li>
+<li><code><a title="src.jsonid.pronom.preprocess_goto_markers" href="#src.jsonid.pronom.preprocess_goto_markers">preprocess_goto_markers</a></code></li>
+<li><code><a title="src.jsonid.pronom.process_formats_and_save" href="#src.jsonid.pronom.process_formats_and_save">process_formats_and_save</a></code></li>
+<li><code><a title="src.jsonid.pronom.process_formats_to_stdout" href="#src.jsonid.pronom.process_formats_to_stdout">process_formats_to_stdout</a></code></li>
+<li><code><a title="src.jsonid.pronom.process_markers" href="#src.jsonid.pronom.process_markers">process_markers</a></code></li>
+<li><code><a title="src.jsonid.pronom.quote_and_encode" href="#src.jsonid.pronom.quote_and_encode">quote_and_encode</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-classes">Classes</a></h3>
+<ul>
+<li>
+<h4><code><a title="src.jsonid.pronom.ByteSequence" href="#src.jsonid.pronom.ByteSequence">ByteSequence</a></code></h4>
+<ul class="two-column">
+<li><code><a title="src.jsonid.pronom.ByteSequence.endian" href="#src.jsonid.pronom.ByteSequence.endian">endian</a></code></li>
+<li><code><a title="src.jsonid.pronom.ByteSequence.id" href="#src.jsonid.pronom.ByteSequence.id">id</a></code></li>
+<li><code><a title="src.jsonid.pronom.ByteSequence.max_off" href="#src.jsonid.pronom.ByteSequence.max_off">max_off</a></code></li>
+<li><code><a title="src.jsonid.pronom.ByteSequence.min_off" href="#src.jsonid.pronom.ByteSequence.min_off">min_off</a></code></li>
+<li><code><a title="src.jsonid.pronom.ByteSequence.pos" href="#src.jsonid.pronom.ByteSequence.pos">pos</a></code></li>
+<li><code><a title="src.jsonid.pronom.ByteSequence.value" href="#src.jsonid.pronom.ByteSequence.value">value</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="src.jsonid.pronom.ExternalSignature" href="#src.jsonid.pronom.ExternalSignature">ExternalSignature</a></code></h4>
+<ul class="">
+<li><code><a title="src.jsonid.pronom.ExternalSignature.id" href="#src.jsonid.pronom.ExternalSignature.id">id</a></code></li>
+<li><code><a title="src.jsonid.pronom.ExternalSignature.signature" href="#src.jsonid.pronom.ExternalSignature.signature">signature</a></code></li>
+<li><code><a title="src.jsonid.pronom.ExternalSignature.type" href="#src.jsonid.pronom.ExternalSignature.type">type</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="src.jsonid.pronom.Format" href="#src.jsonid.pronom.Format">Format</a></code></h4>
+<ul class="two-column">
+<li><code><a title="src.jsonid.pronom.Format.classification" href="#src.jsonid.pronom.Format.classification">classification</a></code></li>
+<li><code><a title="src.jsonid.pronom.Format.external_signatures" href="#src.jsonid.pronom.Format.external_signatures">external_signatures</a></code></li>
+<li><code><a title="src.jsonid.pronom.Format.id" href="#src.jsonid.pronom.Format.id">id</a></code></li>
+<li><code><a title="src.jsonid.pronom.Format.internal_signatures" href="#src.jsonid.pronom.Format.internal_signatures">internal_signatures</a></code></li>
+<li><code><a title="src.jsonid.pronom.Format.mime" href="#src.jsonid.pronom.Format.mime">mime</a></code></li>
+<li><code><a title="src.jsonid.pronom.Format.name" href="#src.jsonid.pronom.Format.name">name</a></code></li>
+<li><code><a title="src.jsonid.pronom.Format.priorities" href="#src.jsonid.pronom.Format.priorities">priorities</a></code></li>
+<li><code><a title="src.jsonid.pronom.Format.puid" href="#src.jsonid.pronom.Format.puid">puid</a></code></li>
+<li><code><a title="src.jsonid.pronom.Format.version" href="#src.jsonid.pronom.Format.version">version</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="src.jsonid.pronom.Identifier" href="#src.jsonid.pronom.Identifier">Identifier</a></code></h4>
+<ul class="">
+<li><code><a title="src.jsonid.pronom.Identifier.type" href="#src.jsonid.pronom.Identifier.type">type</a></code></li>
+<li><code><a title="src.jsonid.pronom.Identifier.value" href="#src.jsonid.pronom.Identifier.value">value</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="src.jsonid.pronom.InternalSignature" href="#src.jsonid.pronom.InternalSignature">InternalSignature</a></code></h4>
+<ul class="">
+<li><code><a title="src.jsonid.pronom.InternalSignature.byte_sequences" href="#src.jsonid.pronom.InternalSignature.byte_sequences">byte_sequences</a></code></li>
+<li><code><a title="src.jsonid.pronom.InternalSignature.id" href="#src.jsonid.pronom.InternalSignature.id">id</a></code></li>
+<li><code><a title="src.jsonid.pronom.InternalSignature.name" href="#src.jsonid.pronom.InternalSignature.name">name</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="src.jsonid.pronom.Priority" href="#src.jsonid.pronom.Priority">Priority</a></code></h4>
+<ul class="">
+<li><code><a title="src.jsonid.pronom.Priority.id" href="#src.jsonid.pronom.Priority.id">id</a></code></li>
+<li><code><a title="src.jsonid.pronom.Priority.type" href="#src.jsonid.pronom.Priority.type">type</a></code></li>
+</ul>
+</li>
+<li>
+<h4><code><a title="src.jsonid.pronom.UnprocessableEntity" href="#src.jsonid.pronom.UnprocessableEntity">UnprocessableEntity</a></code></h4>
+</li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.6</a>.</p>
+</footer>
+</body>
+</html>
diff --git a/docs/registry/index.htm b/docs/registry/index.htm
index 7c333b4..672769c 100644
--- a/docs/registry/index.htm
+++ b/docs/registry/index.htm
@@ -672,13 +672,13 @@ <h2 class="section-title" id="header-submodules">Contents</h2>
 </tr>
 <tr>
     <td id="jrid:0020">jrid:0020</td>
-    <td class="markers">GL Transmission Format: GLTF runtime 3D asset library (Generic)</td>
+    <td class="markers">GL Transmission Format: GLTF runtime 3D asset library schema (Generic)</td>
     <td>False</td>
     <td>False</td>
     <td>False</td>
     <td>True</td>
-    <td class="markers"><pre>{'KEY': '$schema', 'STARTSWITH': 'http://json-schema.org/'}
-{'KEY': '$schema', 'ENDSWITH': '/schema#'}
+    <td class="markers"><pre>{'KEY': '$schema', 'STARTSWITH': 'https://json-schema.org/'}
+{'KEY': '$schema', 'ENDSWITH': '/schema'}
 {'KEY': 'title', 'EXISTS': None}
 {'KEY': 'type', 'IS': 'object'}
 {'KEY': 'description', 'IS': 'The root object for a glTF asset.'}
@@ -1510,7 +1510,7 @@ <h3>Contents</h3>
 <li class="contents"><code><a href="#jrid:0017">jrid:0017: Open Resume Document</a></code></li>
 <li class="contents"><code><a href="#jrid:0018">jrid:0018: jacker song</a></code></li>
 <li class="contents"><code><a href="#jrid:0019">jrid:0019: JSON Patch</a></code></li>
-<li class="contents"><code><a href="#jrid:0020">jrid:0020: GL Transmission Format: GLTF runtime 3D asset library (Generic)</a></code></li>
+<li class="contents"><code><a href="#jrid:0020">jrid:0020: GL Transmission Format: GLTF runtime 3D asset library schema (Generic)</a></code></li>
 <li class="contents"><code><a href="#jrid:0021">jrid:0021: Tweet Object</a></code></li>
 <li class="contents"><code><a href="#jrid:0022">jrid:0022: sandboxels save file</a></code></li>
 <li class="contents"><code><a href="#jrid:0023">jrid:0023: dublin core metadata (archivematica)</a></code></li>
diff --git a/docs/utils/index.html b/docs/utils/index.html
index ce33717..6236290 100644
--- a/docs/utils/index.html
+++ b/docs/utils/index.html
@@ -45,6 +45,11 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <div class="desc"><p>json2json will convert JSON compatible objects from one encoding
 to UTF-8.</p></div>
 </dd>
+<dt><code class="name"><a title="src.utils.jsonid2pronom" href="jsonid2pronom.html">src.utils.jsonid2pronom</a></code></dt>
+<dd>
+<div class="desc"><p>jsonid2pronom provides a helper script to enable export of generic
+JSONID compatible markers to a PRONOM compatible signature file.</p></div>
+</dd>
 </dl>
 </section>
 <section>
@@ -67,6 +72,7 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <li><h3><a href="#header-submodules">Sub-modules</a></h3>
 <ul>
 <li><code><a title="src.utils.json2json" href="json2json.html">src.utils.json2json</a></code></li>
+<li><code><a title="src.utils.jsonid2pronom" href="jsonid2pronom.html">src.utils.jsonid2pronom</a></code></li>
 </ul>
 </li>
 </ul>
diff --git a/docs/utils/json2json.html b/docs/utils/json2json.html
index d68fa96..62dce2d 100644
--- a/docs/utils/json2json.html
+++ b/docs/utils/json2json.html
@@ -78,7 +78,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 <pre><code class="python">async def identify_json(paths: list[str]):
     &#34;&#34;&#34;Identify objects.&#34;&#34;&#34;
     for idx, path in enumerate(paths):
-        valid, data, _, _ = await identify_plaintext_bytestream(path)
+        valid, data = await identify_plaintext_bytestream(path)
         if not valid:
             continue
         print(json.dumps(data, indent=2))</code></pre>
@@ -143,7 +143,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
     parser = argparse.ArgumentParser(
         prog=&#34;json2json&#34;,
         description=&#34;parse JSON UTF-16 (BE-LE) objects and output them as UTF-8 for the sake of developer ergonomics&#34;,
-        epilog=&#34;for more information visit https://github.com/ffdev-info/json-id&#34;,
+        epilog=&#34;for more information visit https://github.com/ffdev-info/jsonid&#34;,
     )
     parser.add_argument(
         &#34;--debug&#34;,
diff --git a/docs/utils/jsonid2pronom.html b/docs/utils/jsonid2pronom.html
new file mode 100644
index 0000000..09d9b27
--- /dev/null
+++ b/docs/utils/jsonid2pronom.html
@@ -0,0 +1,211 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
+<meta name="generator" content="pdoc3 0.11.6">
+<title>src.utils.jsonid2pronom API documentation</title>
+<meta name="description" content="jsonid2pronom provides a helper script to enable export of generic
+JSONID compatible markers to a PRONOM compatible signature file.">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css" crossorigin>
+<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source > summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible;min-width:max-content}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin:1em 0}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
+<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
+<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
+<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
+<script>window.addEventListener('DOMContentLoaded', () => {
+hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
+hljs.highlightAll();
+/* Collapse source docstrings */
+setTimeout(() => {
+[...document.querySelectorAll('.hljs.language-python > .hljs-string')]
+.filter(el => el.innerHTML.length > 200 && ['"""', "'''"].includes(el.innerHTML.substring(0, 3)))
+.forEach(el => {
+let d = document.createElement('details');
+d.classList.add('hljs-string');
+d.innerHTML = '<summary>"""</summary>' + el.innerHTML.substring(3);
+el.replaceWith(d);
+});
+}, 100);
+})</script>
+</head>
+<body>
+<main>
+<article id="content">
+<header>
+<h1 class="title">Module <code>src.utils.jsonid2pronom</code></h1>
+</header>
+<section id="section-intro">
+<p>jsonid2pronom provides a helper script to enable export of generic
+JSONID compatible markers to a PRONOM compatible signature file.</p>
+</section>
+<section>
+</section>
+<section>
+</section>
+<section>
+<h2 class="section-title" id="header-functions">Functions</h2>
+<dl>
+<dt id="src.utils.jsonid2pronom.load_patterns"><code class="name flex">
+<span>async def <span class="ident">load_patterns</span></span>(<span>path: str) ‑> list</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">async def load_patterns(path: str) -&gt; list:
+    &#34;&#34;&#34;Load patterns from a file for conversion to a signature file.&#34;&#34;&#34;
+    patterns = []
+    with open(path, &#34;r&#34;, encoding=&#34;utf-8&#34;) as patterns_file:
+        patterns = json.loads(patterns_file.read())
+    return patterns</code></pre>
+</details>
+<div class="desc"><p>Load patterns from a file for conversion to a signature file.</p></div>
+</dd>
+<dt id="src.utils.jsonid2pronom.main"><code class="name flex">
+<span>def <span class="ident">main</span></span>(<span>) ‑> None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def main() -&gt; None:
+    &#34;&#34;&#34;Primary entry point for this script.&#34;&#34;&#34;
+    parser = argparse.ArgumentParser(
+        prog=&#34;jsonid2pronom&#34;,
+        description=&#34;convert JSONID compatible markers to PRONOM&#34;,
+        epilog=&#34;for more information visit https://github.com/ffdev-info/jsonid&#34;,
+    )
+    parser.add_argument(
+        &#34;--debug&#34;,
+        help=&#34;use debug loggng&#34;,
+        required=False,
+        action=&#34;store_true&#34;,
+    )
+    parser.add_argument(
+        &#34;--path&#34;,
+        &#34;-p&#34;,
+        help=&#34;file path to process&#34;,
+        required=False,
+    )
+    args = parser.parse_args()
+    logging.getLogger(__name__).setLevel(logging.DEBUG if args.debug else logging.INFO)
+    logger.debug(&#34;debug logging is configured&#34;)
+    if not args.path:
+        parser.print_help(sys.stderr)
+        sys.exit()
+    asyncio.run(
+        output_signature(
+            path=args.path,
+        )
+    )</code></pre>
+</details>
+<div class="desc"><p>Primary entry point for this script.</p></div>
+</dd>
+<dt id="src.utils.jsonid2pronom.output_signature"><code class="name flex">
+<span>async def <span class="ident">output_signature</span></span>(<span>path: str)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">async def output_signature(path: str):
+    &#34;&#34;&#34;Output JSONID compatible signatures to PRONOM.&#34;&#34;&#34;
+
+    formats = []
+
+    encodings = (&#34;UTF-8&#34;, &#34;UTF-16&#34;, &#34;UTF-16BE&#34;, &#34;UTF-32LE&#34;)
+    priorities = []
+
+    increment_id = 0
+
+    markers = await load_patterns(path)
+
+    if not markers:
+        logger.error(&#34;no patterns provided via path arg&#34;)
+        sys.exit(1)
+
+    for encoding in encodings:
+        increment_id += 1
+        json_puid = &#34;jsonid2pronom/1&#34;
+        name_ = f&#34;JSONID2PRONOM Conversion ({encoding})&#34;
+        try:
+            mime = &#34;application/json&#34;
+        except IndexError:
+            mime = &#34;&#34;
+        try:
+            sequences = pronom.process_markers(
+                copy.deepcopy(markers),
+                increment_id,
+                encoding=encoding,
+            )
+        except pronom.UnprocessableEntity as err:
+            logger.error(
+                &#34;%s %s: cannot handle: %s&#34;,
+                json_puid,
+                name_,
+                err,
+            )
+            for err_marker in markers:
+                logger.debug(&#34;--- START ---&#34;)
+                logger.debug(&#34;marker: %s&#34;, err_marker)
+                logger.debug(&#34;---  END  ---&#34;)
+            continue
+        fmt = pronom.Format(
+            id=increment_id,
+            name=name_,
+            version=&#34;&#34;,
+            puid=json_puid,
+            mime=mime,
+            classification=&#34;structured text&#34;,
+            external_signatures=[
+                pronom.ExternalSignature(
+                    id=increment_id,
+                    signature=&#34;json&#34;,
+                    type=pronom.EXT,
+                )
+            ],
+            internal_signatures=sequences,
+            priorities=list(set(priorities)),
+        )
+        priorities.append(f&#34;{increment_id}&#34;)
+        formats.append(fmt)
+
+    pronom.process_formats_to_stdout(formats)</code></pre>
+</details>
+<div class="desc"><p>Output JSONID compatible signatures to PRONOM.</p></div>
+</dd>
+</dl>
+</section>
+<section>
+</section>
+</article>
+<nav id="sidebar">
+<div class="toc">
+<ul></ul>
+</div>
+<ul id="index">
+<li><h3>Super-module</h3>
+<ul>
+<li><code><a title="src.utils" href="index.html">src.utils</a></code></li>
+</ul>
+</li>
+<li><h3><a href="#header-functions">Functions</a></h3>
+<ul class="">
+<li><code><a title="src.utils.jsonid2pronom.load_patterns" href="#src.utils.jsonid2pronom.load_patterns">load_patterns</a></code></li>
+<li><code><a title="src.utils.jsonid2pronom.main" href="#src.utils.jsonid2pronom.main">main</a></code></li>
+<li><code><a title="src.utils.jsonid2pronom.output_signature" href="#src.utils.jsonid2pronom.output_signature">output_signature</a></code></li>
+</ul>
+</li>
+</ul>
+</nav>
+</main>
+<footer id="footer">
+<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.6</a>.</p>
+</footer>
+</body>
+</html>

From 9bb5ef5be2eb8cfee24d90c6412b08afca95a0e6 Mon Sep 17 00:00:00 2001
From: ross-spencer <all.along.the.watchtower2001+github@gmail.com>
Date: Sun, 4 Jan 2026 17:34:22 +0100
Subject: [PATCH 5/7] Fix imports and provide PRONOM entry point

---
 pyproject.toml             | 1 +
 src/utils/jsonid2pronom.py | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b7fe1b5..991bcdf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,7 @@ dependencies = {file = ["requirements/requirements.txt"]}
 jsonid = "jsonid.jsonid:main"
 momoa = "jsonid.jsonid:main"
 json2json = "utils.json2json:main"
+json2pronom = "utils.jsonid2pronom:main"
 
 [build-system]
 requires = ["setuptools>=80.9.0", "wheel>=0.45.1", "setuptools_scm[toml]>=9.2.2"]
diff --git a/src/utils/jsonid2pronom.py b/src/utils/jsonid2pronom.py
index be3fae2..a515f28 100644
--- a/src/utils/jsonid2pronom.py
+++ b/src/utils/jsonid2pronom.py
@@ -9,7 +9,13 @@
 import logging
 import sys
 
-from src.jsonid import pronom
+try:
+    from src.jsonid import pronom
+except ModuleNotFoundError:
+    try:
+        from jsonid import pronom
+    except ModuleNotFoundError:
+        import pronom
 
 # Set up logging.
 logging.basicConfig(

From fa97890a4fce756ee16261c481166fc7183a2f6e Mon Sep 17 00:00:00 2001
From: ross-spencer <all.along.the.watchtower2001+github@gmail.com>
Date: Sun, 4 Jan 2026 17:43:35 +0100
Subject: [PATCH 6/7] Add PRONOM export to the README

---
 README.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/README.md b/README.md
index 2a17e78..51a5669 100644
--- a/README.md
+++ b/README.md
@@ -413,6 +413,60 @@ PRONOM IDs that can then be referenced in the JSONID output.
 Evantually, PRONOM or a PRONOM-like tool might host an authoritative version
 of the JSONID registry.
 
+### JSONID for PRONOM Signature Development
+
+JSONID provides a high-level language for output of PRONOM compatible
+signatures. The feature set is still in its BETA phase but JSONID provides
+two distinct capabilities:
+
+#### 1. Registry output
+
+JSONID's registry can be output using the `--pronom` flag. A signature file
+will be created under `jsonid_pronom.xml` which can be imported into DROID
+for identification of document types registered with JSONID.
+
+JSONID's registry is output alongisde a handful of baseline JSON signatures
+designed to capture "plain"-JSON that is not yet encoded in the registry.
+
+#### 2. Signature development
+
+A standalone `json2pronom` utility is provided for creation of potentially
+robust DROID compatible signatures.
+
+As a high-level language, signatures can be defined in easy to understand
+syntax and then output consistently via the `json2pronom` utility. Signatures
+include sensible defaults for whitespace and other aspects that are
+difficult for signature developers to consistently anticipate when writing
+JSON based signatures.
+
+Given a [sample pattern file](./pronom_example/patterns_example.json) a DROID
+compatible snippet can be output as follows (UTF-8 shown for brevity):
+
+<!--markdownlint-disable-->
+
+```xml
+<?xml version="1.0" ?>
+<FFSignatureFile xmlns="http://www.nationalarchives.gov.uk/pronom/SignatureFile" Version="1" DateCreated="2026-01-04T16:14:16Z">
+  <InternalSignatureCollection>
+    <InternalSignature ID="1" Specificity="Specific">
+      <ByteSequence Reference="BOF" Sequence="{0-4095}7B" MinOffset="0" MaxOffset="4095"/>
+      <ByteSequence Reference="VAR" Sequence="226B65793122{0-16}3A" MinOffset="" MaxOffset=""/>
+      <ByteSequence Reference="VAR" Sequence="226B65793222{0-16}3A" MinOffset="" MaxOffset=""/>
+      <ByteSequence Reference="EOF" Sequence="7D{0-4095}" MinOffset="0" MaxOffset="4095"/>
+    </InternalSignature>
+  </InternalSignatureCollection>
+  <FileFormatCollection>
+    <FileFormat ID="1" Name="JSONID2PRONOM Conversion (UTF-8)" PUID="jsonid2pronom/1" Version="" MIMEType="application/json" FormatType="structured text">
+      <InternalSignatureID>1</InternalSignatureID>
+      <Extension>json</Extension>
+    </FileFormat>
+</FFSignatureFile>
+```
+
+<!--markdownlint-enable-->
+
+Feedback on this utility is welcome.
+
 ## Output format
 
 Previously JSONID output YAML containing all result object metadata. It has

From 66b482d897837017cc2b7551357e2b6a65b7e82d Mon Sep 17 00:00:00 2001
From: ross-spencer <all.along.the.watchtower2001+github@gmail.com>
Date: Sun, 4 Jan 2026 17:57:05 +0100
Subject: [PATCH 7/7] Fix PUID numbering

---
 src/utils/jsonid2pronom.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/jsonid2pronom.py b/src/utils/jsonid2pronom.py
index a515f28..444b641 100644
--- a/src/utils/jsonid2pronom.py
+++ b/src/utils/jsonid2pronom.py
@@ -56,7 +56,7 @@ async def output_signature(path: str):
 
     for encoding in encodings:
         increment_id += 1
-        json_puid = "jsonid2pronom/1"
+        json_puid = f"jsonid2pronom/{increment_id}"
         name_ = f"JSONID2PRONOM Conversion ({encoding})"
         try:
             mime = "application/json"