From 9bf02b74a9e071531074a145f9cf0cfbb1b1cda7 Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sun, 25 Jan 2026 08:57:33 +0100 Subject: [PATCH 1/2] Improve and describe conceptual model --- README.md | 46 ++++++++++ src/euring/codes.py | 7 ++ src/euring/field_schema.py | 103 ++++++++++++++++------ src/euring/fields.py | 173 +++++++++++++++++++++++-------------- src/euring/parsing.py | 4 +- src/euring/record.py | 29 ++++--- tests/test_fields.py | 2 +- tests/test_lookup_data.py | 6 ++ tests/test_record.py | 27 ++++++ uv.lock | 6 +- 10 files changed, 294 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index 69b075d..0d44835 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,52 @@ EURING vocabulary (as per the manuals): EURING uses a record-based format: each record contains a fixed sequence of fields. The manuals define official field names (with spaces/hyphens), which we preserve for display. +### Encoding vs data from an IT perspective + +A EURING record is UTF-8 text that follows an ASCII-era encoding structure +with punch-card origins. + +Many fields are described as "Integer" in the manual, but this often means +"digits-only in the encoding" rather than an actual numeric field. + +Two principles guide the model used here: + +1. Separate encoding type from data type. +The EURING encoding type (`euring_type`) expresses what can appear in the record +encoding. For example, `euring_type=Integer` means "digits-only in the encoding". +It does not automatically imply `value: int`. + +2. Make `value_type` explicit per field. +Each field can declare a `value_type` that reflects how the value should behave +in code. Typical values include `code_str`, `int`, `float`, and `date`. +Code-like fields can also add a field-specific parser or regex constraint when +needed. + +Default policy: + +- Code/lookup fields use `value_type=code_str` (always a string). +Examples include `accuracy_of_date`, `species`, `place_code`, +`ringing_scheme`, and `primary_identification_method`. +- Measurements and durations use typed numbers. +Examples include distances and other numeric measurements. +- True dates use typed dates. +The `date` field is treated as a date even though it is encoded as `ddmmyyyy`. + +Why this helps: + +- It preserves leading zeros (for example, `"00010"` stays `"00010"`). +- It removes ambiguity around `"1"` vs `1`. +- It makes lookups deterministic and consistent. + +Record field state: + +- `raw_value`: the exact string as received from a EURING record. +When you call `set(...)`, any prior `raw_value` is cleared. +- `value`: the decoded value after validation and parsing. +- `encoded_value`: how this library would encode the current `value` back into +the EURING format. +- Serialization always re-encodes from `value`. + This package introduces a signed numeric type (`NumericSigned`) for the EURING2020 fields Latitude and Longitude. `NumericSigned` behaves like `Numeric`, but allows a leading minus sign and explicitly disallows -0. `NumericSigned` is a small, intentional clarification of the generic numeric types. The manuals clearly permit negative Latitude and Longitude in EURING2020, but the generic `Numeric` definition does not describe signed numbers. Making this explicit in the code helps prevent invalid values while staying faithful to the manuals and real-world usage. If a future revision of the specification formally defines signed numeric fields, this implementation can align with it without breaking compatibility. ### Field keys diff --git a/src/euring/codes.py b/src/euring/codes.py index 6b1d877..5c394fd 100644 --- a/src/euring/codes.py +++ b/src/euring/codes.py @@ -313,6 +313,13 @@ def lookup_date(value: str | int) -> date: raise EuringConstraintException(f'Value "{value}" is not a valid EURING date.') +def parse_date(value: str) -> str: + """Validate that date placeholders are not used, then return the raw value.""" + if value and set(value) == {"-"}: + raise EuringConstraintException("Date cannot be all dashes; provide an estimated real date instead.") + return value + + def lookup_ringing_scheme(value: str | int) -> str: """ Ringing scheme lookup - uses packaged reference data when available. diff --git a/src/euring/field_schema.py b/src/euring/field_schema.py index dd1c76a..d584e22 100644 --- a/src/euring/field_schema.py +++ b/src/euring/field_schema.py @@ -2,6 +2,7 @@ from collections.abc import Iterator, Mapping from dataclasses import dataclass +from datetime import date as dt_date from typing import Any from euring.utils import euring_lat_to_dms, euring_lng_to_dms @@ -33,7 +34,8 @@ class EuringField(Mapping[str, Any]): key: str name: str - type_name: str = "" + euring_type: str = "" + value_type: str | None = None required: bool = True length: int | None = None variable_length: bool = False @@ -43,9 +45,11 @@ def _mapping(self) -> dict[str, Any]: mapping: dict[str, Any] = { "key": self.key, "name": self.name, - "type_name": self.type_name, + "euring_type": self.euring_type, "required": self.required, } + if self.value_type is not None: + mapping["value_type"] = self.value_type if self.length is not None: mapping["length"] = self.length if self.variable_length: @@ -83,29 +87,53 @@ def _validate_raw(self, raw: str) -> str | None: return None raise EuringConstraintException('Required field, empty value "" is not permitted.') self._validate_length(raw) - if self.type_name and not is_valid_type(raw, self.type_name): - raise EuringTypeException(f'Value "{raw}" is not valid for type {self.type_name}.') + if self.euring_type and not is_valid_type(raw, self.euring_type): + raise EuringTypeException(f'Value "{raw}" is not valid for type {self.euring_type}.') return raw def _coerce_type(self, raw: str) -> Any: - if self.type_name == TYPE_INTEGER: + if self.euring_type == TYPE_INTEGER: if set(raw) == {"-"}: return None return int(raw) - if self.type_name == TYPE_NUMERIC: + if self.euring_type == TYPE_NUMERIC: return float(raw) - if self.type_name == TYPE_NUMERIC_SIGNED: + if self.euring_type == TYPE_NUMERIC_SIGNED: return float(raw) - if self.type_name in {TYPE_ALPHABETIC, TYPE_ALPHANUMERIC, TYPE_TEXT}: + if self.euring_type in {TYPE_ALPHABETIC, TYPE_ALPHANUMERIC, TYPE_TEXT}: return raw return raw + def _coerce_value_type(self, raw: str) -> Any: + """Coerce a validated raw value to the configured value type.""" + if self.value_type in {None, ""}: + return self._coerce_type(raw) + if self.value_type == "code_str": + return raw + if self.value_type == "int": + if set(raw) == {"-"}: + return None + return int(raw) + if self.value_type == "float": + return float(raw) + if self.value_type == "date": + if len(raw) != 8 or not raw.isdigit(): + raise EuringConstraintException(f'Value "{raw}" is not a valid ddmmyyyy date.') + day = int(raw[0:2]) + month = int(raw[2:4]) + year = int(raw[4:8]) + try: + return dt_date(year, month, day) + except ValueError: + raise EuringConstraintException(f'Value "{raw}" is not a valid ddmmyyyy date.') + raise ValueError(f'Unsupported value_type "{self.value_type}" for field "{self.key}".') + def parse(self, raw: str) -> Any | None: """Parse raw text into a Python value.""" validated = self._validate_raw(raw) if validated is None: return None - return self._coerce_type(validated) + return self._coerce_value_type(validated) def encode(self, value: Any | None) -> str: """Encode a Python value to raw text.""" @@ -119,18 +147,21 @@ def encode(self, value: Any | None) -> str: raise EuringConstraintException("Geographical coordinates require both lat and lng values.") return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" + if self.key == "date" and isinstance(value, dt_date): + return value.strftime("%d%m%Y") + str_value = f"{value}" - if self.type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: + if self.euring_type in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: str_value = str_value.rstrip("0").rstrip(".") if ( - self.type_name in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} + self.euring_type in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} and self.length and not self.variable_length ): str_value = str_value.zfill(self.length) self._validate_length(str_value) - if self.type_name and not is_valid_type(str_value, self.type_name): - raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.') + if self.euring_type and not is_valid_type(str_value, self.euring_type): + raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.euring_type}.') return str_value def encode_for_format(self, value: Any | None, *, format: str) -> str: @@ -140,7 +171,7 @@ def encode_for_format(self, value: Any | None, *, format: str) -> str: return self.empty_value if self.length and format == FORMAT_EURING2000: return "-" * self.length - if self.length and self.required and self.type_name == TYPE_INTEGER: + if self.length and self.required and self.euring_type == TYPE_INTEGER: return "-" * self.length return "" @@ -149,24 +180,31 @@ def encode_for_format(self, value: Any | None, *, format: str) -> str: raise EuringConstraintException("Geographical coordinates require both lat and lng values.") return f"{euring_lat_to_dms(float(value['lat']))}{euring_lng_to_dms(float(value['lng']))}" - if self.type_name == TYPE_INTEGER and isinstance(value, str) and value and set(value) == {"-"}: + if self.key == "date" and isinstance(value, dt_date): + str_value = value.strftime("%d%m%Y") + self._validate_length(str_value) + if self.euring_type and not is_valid_type(str_value, self.euring_type): + raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.euring_type}.') + return str_value + + if self.euring_type == TYPE_INTEGER and isinstance(value, str) and value and set(value) == {"-"}: return self.encode_for_format(None, format=format) str_value = f"{value}" - if self.type_name in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: + if self.euring_type in {TYPE_NUMERIC, TYPE_NUMERIC_SIGNED}: str_value = str_value.rstrip("0").rstrip(".") ignore_variable_length = format == FORMAT_EURING2000 - if self.type_name in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} and self.length: + if self.euring_type in {TYPE_INTEGER, TYPE_NUMERIC, TYPE_NUMERIC_SIGNED} and self.length: str_value = str_value.zfill(self.length) if self.variable_length and not ignore_variable_length: str_value = str_value.lstrip("0") or "0" self._validate_length(str_value, ignore_variable_length=ignore_variable_length) - if self.type_name and not is_valid_type(str_value, self.type_name): - raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.type_name}.') + if self.euring_type and not is_valid_type(str_value, self.euring_type): + raise EuringTypeException(f'Value "{str_value}" is not valid for type {self.euring_type}.') return str_value def describe(self, value: Any | None) -> Any | None: @@ -214,8 +252,13 @@ def parse(self, raw: str) -> Any | None: if validated is None: return None if self.parser is None: - return self._coerce_type(validated) - return self.parser(validated) + return self._coerce_value_type(validated) + parsed = self.parser(validated) + # Allow a parser to validate and pass through a raw string, while still + # applying the configured value_type coercion. + if isinstance(parsed, str) and self.value_type not in {None, ""}: + return self._coerce_value_type(parsed) + return parsed def describe(self, value: Any | None) -> Any | None: if self.lookup is None or value is None: @@ -231,9 +274,12 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: return definition key = definition.get("key", "") name = definition.get("name", key) - if "type" in definition and "type_name" not in definition: - raise ValueError('Field definitions must use "type_name" instead of legacy "type".') - type_name = definition.get("type_name") or "" + if "type" in definition and "euring_type" not in definition: + raise ValueError('Field definitions must use "euring_type" instead of legacy "type".') + if "type_name" in definition: + raise ValueError('Field definitions must use "euring_type" instead of legacy "type_name".') + euring_type = definition.get("euring_type") or "" + value_type = definition.get("value_type") required = definition.get("required", True) length = definition.get("length") variable_length = bool(definition.get("variable_length", False)) @@ -244,7 +290,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: return EuringFormattedField( key=key, name=name, - type_name=type_name, + euring_type=euring_type, + value_type=value_type, required=required, length=length, variable_length=variable_length, @@ -256,7 +303,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: return EuringLookupField( key=key, name=name, - type_name=type_name, + euring_type=euring_type, + value_type=value_type, required=required, length=length, variable_length=variable_length, @@ -266,7 +314,8 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: return EuringField( key=key, name=name, - type_name=type_name, + euring_type=euring_type, + value_type=value_type, required=required, length=length, variable_length=variable_length, diff --git a/src/euring/fields.py b/src/euring/fields.py index 5cf0d82..441ca4d 100644 --- a/src/euring/fields.py +++ b/src/euring/fields.py @@ -37,6 +37,7 @@ lookup_ring_number, lookup_ringing_scheme, lookup_species, + parse_date, parse_direction, parse_geographical_coordinates, parse_latitude, @@ -62,122 +63,151 @@ EuringLookupField( name="Ringing Scheme", key="ringing_scheme", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=3, lookup=lookup_ringing_scheme, ), EuringLookupField( name="Primary Identification Method", key="primary_identification_method", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, length=2, lookup=LOOKUP_PRIMARY_IDENTIFICATION_METHOD, ), EuringLookupField( name="Identification Number (ring)", key="identification_number", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, length=10, lookup=lookup_ring_number, ), EuringLookupField( name="Verification of the Metal Ring", key="verification_of_the_metal_ring", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, + value_type="code_str", length=1, lookup=LOOKUP_VERIFICATION_OF_THE_METAL_RING, ), EuringLookupField( name="Metal Ring Information", key="metal_ring_information", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, + value_type="code_str", length=1, lookup=LOOKUP_METAL_RING_INFORMATION, ), EuringLookupField( name="Other Marks Information", key="other_marks_information", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=2, lookup=lookup_other_marks, ), EuringLookupField( name="Species Mentioned", key="species_mentioned", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, + value_type="code_str", length=5, lookup=lookup_species, ), EuringLookupField( name="Species Concluded", key="species_concluded", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, + value_type="code_str", length=5, lookup=lookup_species, ), EuringLookupField( name="Manipulated", key="manipulated", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_MANIPULATED, ), EuringLookupField( name="Moved Before Encounter", key="moved_before_recovery", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, + value_type="code_str", length=1, lookup=LOOKUP_MOVED_BEFORE_ENCOUNTER, ), EuringLookupField( name="Catching Method", key="catching_method", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_CATCHING_METHOD, ), EuringLookupField( name="Catching Lures", key="catching_lures", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_CATCHING_LURES, ), EuringLookupField( - name="Sex Mentioned", key="sex_mentioned", type_name=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_SEX + name="Sex Mentioned", key="sex_mentioned", euring_type=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_SEX ), EuringLookupField( - name="Sex Concluded", key="sex_concluded", type_name=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_SEX + name="Sex Concluded", key="sex_concluded", euring_type=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_SEX ), EuringLookupField( - name="Age Mentioned", key="age_mentioned", type_name=TYPE_ALPHANUMERIC, length=1, lookup=lookup_age + name="Age Mentioned", key="age_mentioned", euring_type=TYPE_ALPHANUMERIC, length=1, lookup=lookup_age ), EuringLookupField( - name="Age Concluded", key="age_concluded", type_name=TYPE_ALPHANUMERIC, length=1, lookup=lookup_age + name="Age Concluded", key="age_concluded", euring_type=TYPE_ALPHANUMERIC, length=1, lookup=lookup_age + ), + EuringLookupField(name="Status", key="status", euring_type=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_STATUS), + EuringLookupField( + name="Brood Size", + key="brood_size", + euring_type=TYPE_INTEGER, + value_type="code_str", + length=2, + lookup=lookup_brood_size, + ), + EuringLookupField( + name="Pullus Age", + key="pullus_age", + euring_type=TYPE_INTEGER, + value_type="code_str", + length=2, + lookup=lookup_pullus_age, ), - EuringLookupField(name="Status", key="status", type_name=TYPE_ALPHABETIC, length=1, lookup=LOOKUP_STATUS), - EuringLookupField(name="Brood Size", key="brood_size", type_name=TYPE_INTEGER, length=2, lookup=lookup_brood_size), - EuringLookupField(name="Pullus Age", key="pullus_age", type_name=TYPE_INTEGER, length=2, lookup=lookup_pullus_age), EuringLookupField( name="Accuracy of Pullus Age", key="accuracy_of_pullus_age", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, + value_type="code_str", length=1, lookup=LOOKUP_ACCURACY_PULLUS_AGE, ), - EuringLookupField(name="Date", key="date", type_name=TYPE_INTEGER, length=8, lookup=lookup_date), + EuringFormattedField( + name="Date", + key="date", + euring_type=TYPE_INTEGER, + value_type="date", + length=8, + parser=parse_date, + lookup=lookup_date, + ), EuringLookupField( name="Accuracy of Date", key="accuracy_of_date", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, + value_type="code_str", length=1, lookup=LOOKUP_ACCURACY_OF_DATE, ), - EuringField(name="Time", key="time", type_name=TYPE_ALPHANUMERIC, length=4), + EuringField(name="Time", key="time", euring_type=TYPE_ALPHANUMERIC, length=4), EuringFormattedField( name="Place Code", key="place_code", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, length=4, parser=parse_place_code, lookup=lookup_place_code, @@ -185,7 +215,7 @@ EuringFormattedField( name="Geographical Co-ordinates", key="geographical_coordinates", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, length=15, parser=parse_geographical_coordinates, lookup=lookup_geographical_coordinates, @@ -193,32 +223,47 @@ EuringLookupField( name="Accuracy of Co-ordinates", key="accuracy_of_coordinates", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, + value_type="code_str", length=1, lookup=LOOKUP_ACCURACY_OF_COORDINATES, ), - EuringLookupField(name="Condition", key="condition", type_name=TYPE_INTEGER, length=1, lookup=LOOKUP_CONDITION), EuringLookupField( - name="Circumstances", key="circumstances", type_name=TYPE_INTEGER, length=2, lookup=LOOKUP_CIRCUMSTANCES + name="Condition", + key="condition", + euring_type=TYPE_INTEGER, + value_type="code_str", + length=1, + lookup=LOOKUP_CONDITION, + ), + EuringLookupField( + name="Circumstances", + key="circumstances", + euring_type=TYPE_INTEGER, + value_type="code_str", + length=2, + lookup=LOOKUP_CIRCUMSTANCES, ), EuringLookupField( name="Circumstances Presumed", key="circumstances_presumed", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, + value_type="code_str", length=1, lookup=LOOKUP_CIRCUMSTANCES_PRESUMED, ), EuringLookupField( name="EURING Code Identifier", key="euring_code_identifier", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, + value_type="code_str", length=1, lookup=LOOKUP_EURING_CODE_IDENTIFIER, ), EuringField( name="Distance", key="distance", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, length=5, variable_length=True, empty_value="-----", @@ -226,7 +271,7 @@ EuringFormattedField( name="Direction", key="direction", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, length=3, empty_value="---", parser=parse_direction, @@ -234,27 +279,27 @@ EuringField( name="Elapsed Time", key="elapsed_time", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, length=5, variable_length=True, empty_value="-----", ), # Starting with Wing Length, fields are no longer required. Source: EURING Exchange Code 2020 v202 (13 Nov 2024). - EuringField(name="Wing Length", key="wing_length", type_name=TYPE_NUMERIC, required=False), - EuringField(name="Third Primary", key="third_primary", type_name=TYPE_NUMERIC, required=False), + EuringField(name="Wing Length", key="wing_length", euring_type=TYPE_NUMERIC, required=False), + EuringField(name="Third Primary", key="third_primary", euring_type=TYPE_NUMERIC, required=False), EuringLookupField( name="State of Wing Point", key="state_of_wing_point", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, required=False, lookup=LOOKUP_STATE_OF_WING_POINT, ), - EuringField(name="Mass", key="mass", type_name=TYPE_NUMERIC, required=False), + EuringField(name="Mass", key="mass", euring_type=TYPE_NUMERIC, required=False), EuringLookupField( name="Moult", key="moult", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, required=False, lookup=LOOKUP_MOULT, @@ -262,38 +307,38 @@ EuringLookupField( name="Plumage Code", key="plumage_code", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, length=1, required=False, lookup=LOOKUP_PLUMAGE_CODE, ), - EuringField(name="Hind Claw", key="hind_claw", type_name=TYPE_NUMERIC, required=False), - EuringField(name="Bill Length", key="bill_length", type_name=TYPE_NUMERIC, required=False), + EuringField(name="Hind Claw", key="hind_claw", euring_type=TYPE_NUMERIC, required=False), + EuringField(name="Bill Length", key="bill_length", euring_type=TYPE_NUMERIC, required=False), EuringLookupField( name="Bill Method", key="bill_method", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, required=False, lookup=LOOKUP_BILL_METHOD, ), - EuringField(name="Total Head Length", key="total_head_length", type_name=TYPE_NUMERIC, required=False), - EuringField(name="Tarsus", key="tarsus", type_name=TYPE_NUMERIC, required=False), + EuringField(name="Total Head Length", key="total_head_length", euring_type=TYPE_NUMERIC, required=False), + EuringField(name="Tarsus", key="tarsus", euring_type=TYPE_NUMERIC, required=False), EuringLookupField( name="Tarsus Method", key="tarsus_method", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, required=False, lookup=LOOKUP_TARSUS_METHOD, ), - EuringField(name="Tail Length", key="tail_length", type_name=TYPE_NUMERIC, required=False), - EuringField(name="Tail Difference", key="tail_difference", type_name=TYPE_NUMERIC, required=False), - EuringField(name="Fat Score", key="fat_score", type_name=TYPE_INTEGER, length=1, required=False), + EuringField(name="Tail Length", key="tail_length", euring_type=TYPE_NUMERIC, required=False), + EuringField(name="Tail Difference", key="tail_difference", euring_type=TYPE_NUMERIC, required=False), + EuringField(name="Fat Score", key="fat_score", euring_type=TYPE_INTEGER, length=1, required=False), EuringLookupField( name="Fat Score Method", key="fat_score_method", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, required=False, lookup=LOOKUP_FAT_SCORE_METHOD, @@ -301,7 +346,7 @@ EuringLookupField( name="Pectoral Muscle Score", key="pectoral_muscle", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, length=1, required=False, lookup=LOOKUP_PECTORAL_MUSCLE_SCORE, @@ -309,7 +354,7 @@ EuringLookupField( name="Brood Patch", key="brood_patch", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, length=1, required=False, lookup=LOOKUP_BROOD_PATCH, @@ -317,25 +362,25 @@ EuringField( name="Primary Score", key="primary_score", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, length=2, variable_length=True, required=False, ), - EuringField(name="Primary Moult", key="primary_moult", type_name=TYPE_ALPHANUMERIC, length=10, required=False), + EuringField(name="Primary Moult", key="primary_moult", euring_type=TYPE_ALPHANUMERIC, length=10, required=False), EuringFormattedField( name="Old Greater Coverts", key="old_greater_coverts", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, length=1, required=False, parser=parse_old_greater_coverts, ), - EuringField(name="Alula", key="alula", type_name=TYPE_INTEGER, length=1, required=False), + EuringField(name="Alula", key="alula", euring_type=TYPE_INTEGER, length=1, required=False), EuringLookupField( name="Carpal Covert", key="carpal_covert", - type_name=TYPE_INTEGER, + euring_type=TYPE_INTEGER, length=1, required=False, lookup=LOOKUP_CARPAL_COVERT, @@ -343,34 +388,34 @@ EuringLookupField( name="Sexing Method", key="sexing_method", - type_name=TYPE_ALPHABETIC, + euring_type=TYPE_ALPHABETIC, length=1, required=False, lookup=LOOKUP_SEXING_METHOD, ), - EuringField(name="Place Name", key="place_name", type_name=TYPE_TEXT, required=False), - EuringField(name="Remarks", key="remarks", type_name=TYPE_TEXT, required=False), - EuringField(name="Reference", key="reference", type_name=TYPE_TEXT, required=False), + EuringField(name="Place Name", key="place_name", euring_type=TYPE_TEXT, required=False), + EuringField(name="Remarks", key="remarks", euring_type=TYPE_TEXT, required=False), + EuringField(name="Reference", key="reference", euring_type=TYPE_TEXT, required=False), EuringFormattedField( - name="Latitude", key="latitude", type_name=TYPE_NUMERIC_SIGNED, required=False, parser=parse_latitude + name="Latitude", key="latitude", euring_type=TYPE_NUMERIC_SIGNED, required=False, parser=parse_latitude ), EuringFormattedField( name="Longitude", key="longitude", - type_name=TYPE_NUMERIC_SIGNED, + euring_type=TYPE_NUMERIC_SIGNED, required=False, parser=parse_longitude, ), EuringFormattedField( name="Current Place Code", key="current_place_code", - type_name=TYPE_ALPHANUMERIC, + euring_type=TYPE_ALPHANUMERIC, length=4, required=False, parser=parse_place_code, lookup=lookup_place_code, ), - EuringField(name="More Other Marks", key="more_other_marks", type_name=TYPE_ALPHABETIC, required=False), + EuringField(name="More Other Marks", key="more_other_marks", euring_type=TYPE_ALPHABETIC, required=False), ] # These are the field definitions per format as per the EURING Code Manual diff --git a/src/euring/parsing.py b/src/euring/parsing.py index 6444e0e..8a4c55a 100644 --- a/src/euring/parsing.py +++ b/src/euring/parsing.py @@ -10,6 +10,7 @@ def euring_decode_value( required: bool = True, length: int | None = None, variable_length: bool = False, + value_type: str | None = None, parser: Callable[[str], Any] | None = None, lookup: Mapping[str, str] | Callable[[str], str] | None = None, ) -> dict[str, Any] | None: @@ -17,7 +18,8 @@ def euring_decode_value( definition = { "name": "Value", "key": "value", - "type_name": type, + "euring_type": type, + "value_type": value_type, "required": required, "length": length, "variable_length": variable_length, diff --git a/src/euring/record.py b/src/euring/record.py index 13bfa3a..fa36e11 100644 --- a/src/euring/record.py +++ b/src/euring/record.py @@ -52,11 +52,8 @@ def set(self, key: str, value: object) -> EuringRecord: field = _FIELD_MAP.get(key) if field is None: raise ValueError(f'Unknown field key "{key}".') - self._fields[key] = { - "name": field["name"], - "value": value, - "order": field["order"], - } + # Setting a typed value should clear any previously captured raw EURING text. + self._fields[key] = {"name": field["name"], "value": value, "order": field["order"]} return self def _set_raw_value(self, key: str, value: object) -> None: @@ -148,23 +145,29 @@ def _validate_fields(self) -> list[dict[str, object]]: if self.format == FORMAT_EURING2000 and field.get("variable_length"): field_def = {**field, "variable_length": False} field_obj = coerce_field(field_def) - raw_value = _serialize_field_value(field, value, self.format) + encoded_value = _serialize_field_value(field, value, self.format) + raw_value = encoded_value + if key == "date" and had_empty_value and raw_value and set(raw_value) == {"-"}: + # Treat placeholder dashes for missing required dates as empty so + # non-strict mode only reports a missing-required-field error. + raw_value = "" if key == "geographical_coordinates" and had_empty_value and needs_geo_dots: raw_value = "." * 15 + encoded_value = raw_value parsed_value = field_obj.parse(raw_value) if had_empty_value and raw_value: parsed_value = None description_value = parsed_value - if ( - field_obj.get("lookup") is not None - and field_obj.get("parser") is None - and raw_value != "" - and parsed_value is not None - ): - description_value = raw_value + if field_obj.get("lookup") is not None and raw_value != "" and parsed_value is not None: + if field_obj.get("parser") is None: + description_value = raw_value + elif field_obj.get("value_type") == "date": + # Date lookups operate on the encoded ddmmyyyy string. + description_value = raw_value description = field_obj.describe(description_value) if key in self._fields: self._fields[key]["value"] = parsed_value + self._fields[key]["encoded_value"] = encoded_value if field_obj.get("parser") is not None: self._fields[key]["parsed_value"] = parsed_value if description is not None: diff --git a/tests/test_fields.py b/tests/test_fields.py index c556579..345d520 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -55,7 +55,7 @@ def test_field_shape_and_types(): for field in EURING_FIELDS: assert field["name"] assert field["key"] - assert field["type_name"] in allowed_types + assert field["euring_type"] in allowed_types assert re.match(r"^[a-z0-9_]+$", field["key"]) is not None if "length" in field: assert isinstance(field["length"], int) diff --git a/tests/test_lookup_data.py b/tests/test_lookup_data.py index a560fd7..50a7921 100644 --- a/tests/test_lookup_data.py +++ b/tests/test_lookup_data.py @@ -14,6 +14,7 @@ lookup_ringing_scheme_details, lookup_species, lookup_species_details, + parse_date, parse_geographical_coordinates, parse_latitude, parse_longitude, @@ -99,6 +100,11 @@ def test_lookup_date_invalid(): lookup_date("32132024") +def test_parse_date_rejects_all_dashes(): + with pytest.raises(EuringConstraintException): + parse_date("--------") + + def test_lookup_other_marks_invalid(): with pytest.raises(EuringLookupException): lookup_other_marks("$$") diff --git a/tests/test_record.py b/tests/test_record.py index e0b2139..a78df7e 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -1,6 +1,7 @@ """Tests for building EURING records.""" import json +from datetime import date from importlib.util import module_from_spec, spec_from_file_location from pathlib import Path @@ -134,6 +135,32 @@ def test_record_export_same_format(): assert record.export("euring2000plus") == record.serialize() +def test_record_set_date_accepts_python_date(): + record = EuringRecord("euring2000plus", strict=False) + record.set("ringing_scheme", "GBB") + record.set("date", date(2024, 1, 1)) + serialized = record.serialize() + fields = _fields_for_format("euring2000plus") + date_index = next(index for index, field in enumerate(fields) if field["key"] == "date") + assert serialized.split("|")[date_index] == "01012024" + assert record.fields["date"]["value"] == date(2024, 1, 1) + assert "raw_value" not in record.fields["date"] + + +def test_record_set_clears_raw_value_from_decoded_input(): + fixture_path = Path(__file__).parent / "fixtures" / "euring2000plus_examples.py" + spec = spec_from_file_location("euring2000plus_examples", fixture_path) + assert spec and spec.loader + module = module_from_spec(spec) + spec.loader.exec_module(module) + record = EuringRecord.decode(module.EURING2000PLUS_EXAMPLES[0]) + assert record.fields["date"]["raw_value"] == "11082006" + record.set("date", date(2024, 1, 2)) + record.validate() + assert "raw_value" not in record.fields["date"] + assert record.fields["date"]["encoded_value"] == "02012024" + + def test_record_export_requires_force_for_loss(): fixture_path = Path(__file__).parent / "fixtures" / "euring2020_examples.py" spec = spec_from_file_location("euring2020_examples", fixture_path) diff --git a/uv.lock b/uv.lock index 9d2d495..b968f26 100644 --- a/uv.lock +++ b/uv.lock @@ -744,16 +744,16 @@ wheels = [ [[package]] name = "rich" -version = "14.2.0" +version = "14.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "markdown-it-py", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/84/4831f881aa6ff3c976f6d6809b58cdfa350593ffc0dc3c58f5f6586780fb/rich-14.3.1.tar.gz", hash = "sha256:b8c5f568a3a749f9290ec6bddedf835cec33696bfc1e48bcfecb276c7386e4b8", size = 230125, upload-time = "2026-01-24T21:40:44.847Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" }, + { url = "https://files.pythonhosted.org/packages/87/2a/a1810c8627b9ec8c57ec5ec325d306701ae7be50235e8fd81266e002a3cc/rich-14.3.1-py3-none-any.whl", hash = "sha256:da750b1aebbff0b372557426fb3f35ba56de8ef954b3190315eb64076d6fb54e", size = 309952, upload-time = "2026-01-24T21:40:42.969Z" }, ] [[package]] From 2cf99508f298d2ff5700ed112ab8dc59a4b7dd56 Mon Sep 17 00:00:00 2001 From: Dylan Verheul Date: Sun, 25 Jan 2026 09:00:20 +0100 Subject: [PATCH 2/2] Remove legacy support --- src/euring/field_schema.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/euring/field_schema.py b/src/euring/field_schema.py index d584e22..e229628 100644 --- a/src/euring/field_schema.py +++ b/src/euring/field_schema.py @@ -274,10 +274,6 @@ def coerce_field(definition: Mapping[str, Any]) -> EuringField: return definition key = definition.get("key", "") name = definition.get("name", key) - if "type" in definition and "euring_type" not in definition: - raise ValueError('Field definitions must use "euring_type" instead of legacy "type".') - if "type_name" in definition: - raise ValueError('Field definitions must use "euring_type" instead of legacy "type_name".') euring_type = definition.get("euring_type") or "" value_type = definition.get("value_type") required = definition.get("required", True)