From 1894cb13b0ee19ddcff0ac7ce0e67c38ce883e41 Mon Sep 17 00:00:00 2001 From: ross-spencer Date: Fri, 7 Nov 2025 10:47:38 +0100 Subject: [PATCH 1/5] Add a further alias for analysis If I am typing analysis instead of analyse or analyze for the analysis mode in this tool, then others might too. Provided as a third alias for now. The others could be removed in future. --- src/jsonid/jsonid.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jsonid/jsonid.py b/src/jsonid/jsonid.py index 7475e0d..59edb4f 100644 --- a/src/jsonid/jsonid.py +++ b/src/jsonid/jsonid.py @@ -189,6 +189,7 @@ def main() -> None: parser.add_argument( "--analyse", "--analyze", + "--analysis", "-a", help="analyse a file in support of ruleset development and data preservation", required=False, From 28861bd148373edf94896e6918810e9757ae0cbd Mon Sep 17 00:00:00 2001 From: ross-spencer Date: Fri, 7 Nov 2025 10:52:10 +0100 Subject: [PATCH 2/5] Fix jsonid url --- src/jsonid/jsonid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jsonid/jsonid.py b/src/jsonid/jsonid.py index 59edb4f..ee6c232 100644 --- a/src/jsonid/jsonid.py +++ b/src/jsonid/jsonid.py @@ -100,7 +100,7 @@ def main() -> None: parser = argparse.ArgumentParser( prog="jsonid", description="proof-of-concept identifier for JSON objects on disk based on identifying valid objects and their key-values", - epilog="for more information visit https://github.com/ffdev-info/json-id", + epilog="for more information visit https://github.com/ffdev-info/jsonid", ) parser.add_argument( "--debug", From 32ee6c7cfa3a9d173b14d392ca486e5abb8dee2c Mon Sep 17 00:00:00 2001 From: ross-spencer Date: Fri, 7 Nov 2025 11:09:00 +0100 Subject: [PATCH 3/5] Provide analysis only entry-point --- pyproject.toml | 1 + src/jsonid/jsonid.py | 94 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 991bcdf..717cfe6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = {file = ["requirements/requirements.txt"]} [project.scripts] jsonid = "jsonid.jsonid:main" +jsonida = "jsonid.jsonid:analysis" momoa = "jsonid.jsonid:main" json2json = "utils.json2json:main" json2pronom = "utils.jsonid2pronom:main" diff --git a/src/jsonid/jsonid.py b/src/jsonid/jsonid.py index ee6c232..826621b 100644 --- a/src/jsonid/jsonid.py +++ b/src/jsonid/jsonid.py @@ -92,6 +92,98 @@ def _get_strategy(args: argparse.Namespace): return strategy +def analysis() -> None: + """Secondary entry point for analysis functionality. + + Enables us to call analysis from the command line once installed + via PyPi. + """ + parser = argparse.ArgumentParser( + prog="jsonida", + description="JSONID(A)nalysis", + epilog="for more information visit https://github.com/ffdev-info/jsonid", + ) + parser.add_argument( + "--debug", + help="use debug loggng", + required=False, + action="store_true", + ) + parser.add_argument( + "--nojson", + "-nj", + action="store_true", + required=False, + ) + parser.add_argument( + "--nojsonl", + "-njl", + action="store_true", + required=False, + ) + parser.add_argument( + "--noyaml", + "-ny", + action="store_true", + required=False, + ) + parser.add_argument( + "--notoml", + "-nt", + action="store_true", + required=False, + ) + parser.add_argument( + "--language", + help="return results in different languages", + required=False, + ) + parser.add_argument( + "--path", + "-p", + help="analyse a file in support of ruleset development and data preservation", + required=False, + type=str, + metavar="PATH", + ) + args = parser.parse_args() + + if not args.path: + parser.print_help(sys.stderr) + sys.exit() + + # Initialize logging. + init_logging(args.debug) + + # Attempt lookup in the registry. This should come first as it + # doesn't involve reading files. + _attempt_lookup(args) + + # Determine which decode strategy to adopt. + strategy = _get_strategy(args) + if not strategy: + logger.error( + "please ensure there is one remaining decode strategy, e.g. %s", + ",".join(decode_strategies), + ) + sys.exit(1) + + # Enable graceful exit via signal handler... + def signal_handler(*args): # pylint: disable=W0613 + logger.info("exiting...") + sys.exit(0) + + signal.signal(signal.SIGINT, signal_handler) + + if args.path: + asyncio.run( + file_processing.analyse_data( + path=args.path, + strategy=strategy, + ) + ) + + def main() -> None: """Primary entry point for this script.""" @@ -99,7 +191,7 @@ def main() -> None: parser = argparse.ArgumentParser( prog="jsonid", - description="proof-of-concept identifier for JSON objects on disk based on identifying valid objects and their key-values", + description="JSON(ID)entification of objects on disk based on identifying valid objects and their key-values", epilog="for more information visit https://github.com/ffdev-info/jsonid", ) parser.add_argument( From 378a1c20249a48aff217c018843f64752fb8581d Mon Sep 17 00:00:00 2001 From: ross-spencer Date: Fri, 7 Nov 2025 11:16:33 +0100 Subject: [PATCH 4/5] Add agent to analysis output --- src/jsonid/file_processing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jsonid/file_processing.py b/src/jsonid/file_processing.py index 1da44ee..fa5927e 100644 --- a/src/jsonid/file_processing.py +++ b/src/jsonid/file_processing.py @@ -180,6 +180,7 @@ async def analyse_json(paths: list[str], strategy: list): res = await analysis.analyse_input(base_obj.data, base_obj.content_for_analysis) res["doctype"] = base_obj.doctype res["encoding"] = base_obj.encoding + res["agent"] = version.get_agent() if base_obj.doctype == registry.DOCTYPE_JSONL: res["compression"] = base_obj.compression res.pop("content_length") From ab77f2ec8c63cda665d3ed13e2b0081f042cc13b Mon Sep 17 00:00:00 2001 From: ross-spencer Date: Mon, 5 Jan 2026 00:00:35 +0100 Subject: [PATCH 5/5] Update analysis options Excludes and language don't make sense for the analysis functionality of this tool. Language at least not yet. We will see if it needs to be added back in if we receive translations. Technical characteristics should largely be language agnostic. --- src/jsonid/jsonid.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/src/jsonid/jsonid.py b/src/jsonid/jsonid.py index 826621b..9d3379e 100644 --- a/src/jsonid/jsonid.py +++ b/src/jsonid/jsonid.py @@ -109,35 +109,6 @@ def analysis() -> None: required=False, action="store_true", ) - parser.add_argument( - "--nojson", - "-nj", - action="store_true", - required=False, - ) - parser.add_argument( - "--nojsonl", - "-njl", - action="store_true", - required=False, - ) - parser.add_argument( - "--noyaml", - "-ny", - action="store_true", - required=False, - ) - parser.add_argument( - "--notoml", - "-nt", - action="store_true", - required=False, - ) - parser.add_argument( - "--language", - help="return results in different languages", - required=False, - ) parser.add_argument( "--path", "-p",