From 04e588b8b37d5eee5a8d524330fd76503689a55d Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Mon, 8 Dec 2025 14:32:34 -0500 Subject: [PATCH] fix: cap length for iri_values doc id elasticsearch document ids may only be 512 bytes long, but the id used for trovesearch_denorm's iri_values index uses a base64-encoded url that may be longer -- use a hash instead --- share/search/index_strategy/trovesearch_denorm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py index 4bea2a932..18b2a73a7 100644 --- a/share/search/index_strategy/trovesearch_denorm.py +++ b/share/search/index_strategy/trovesearch_denorm.py @@ -2,6 +2,7 @@ from collections import abc, defaultdict import dataclasses import functools +import hashlib import itertools import json import logging @@ -374,7 +375,7 @@ def _doc_id(self, value_iri=None) -> str: return ( _card_pk if value_iri is None - else f'{_card_pk}-{ts.b64(value_iri)}' + else f'{_card_pk}-{hashlib.sha256(value_iri.encode()).hexdigest()}' ) @functools.cached_property