diff --git a/.gitignore b/.gitignore
index b6e4761..0a10c95 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,4 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+pyrightconfig.json
diff --git a/Generate ecoinvent transitive mapping.ipynb b/Generate ecoinvent transitive mapping.ipynb
new file mode 100644
index 0000000..bb14887
--- /dev/null
+++ b/Generate ecoinvent transitive mapping.ipynb	
@@ -0,0 +1,619 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d8e9c0b3-dfa9-46cc-b973-5fe953f38521",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import randonneur_data as rd\n",
+    "import randonneur as rn\n",
+    "from flowmapper.extraction.ecospold2 import remove_conflicting_synonyms, reformat\n",
+    "from pathlib import Path\n",
+    "import xmltodict\n",
+    "import structlog\n",
+    "import logging\n",
+    "from pathlib import Path\n",
+    "from tqdm import tqdm\n",
+    "from copy import deepcopy\n",
+    "from collections import defaultdict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "303ee863-eabf-42ff-bef7-cb09e654bc7e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logging.config.dictConfig({\n",
+    "    \"version\": 1,\n",
+    "    \"disable_existing_loggers\": False,\n",
+    "    \"handlers\": {\n",
+    "        \"file\": {\n",
+    "            \"level\": \"DEBUG\",\n",
+    "            \"class\": \"logging.handlers.WatchedFileHandler\",\n",
+    "            \"filename\": \"test.log\",\n",
+    "        },\n",
+    "    },\n",
+    "    \"loggers\": {\n",
+    "        \"\": {\n",
+    "            \"handlers\": [\"file\"],\n",
+    "            \"level\": \"DEBUG\",\n",
+    "            \"propagate\": True,\n",
+    "        },\n",
+    "    }\n",
+    "})\n",
+    "structlog.configure(\n",
+    "    processors=[\n",
+    "        structlog.stdlib.filter_by_level,\n",
+    "        structlog.stdlib.add_logger_name,\n",
+    "        structlog.stdlib.add_log_level,\n",
+    "        structlog.stdlib.PositionalArgumentsFormatter(),\n",
+    "        structlog.processors.TimeStamper(fmt=\"iso\"),\n",
+    "        structlog.processors.StackInfoRenderer(),\n",
+    "        structlog.processors.format_exc_info,\n",
+    "        structlog.processors.UnicodeDecoder(),\n",
+    "        structlog.processors.JSONRenderer(),\n",
+    "        structlog.stdlib.ProcessorFormatter.wrap_for_formatter,\n",
+    "    ],\n",
+    "    logger_factory=structlog.stdlib.LoggerFactory(),\n",
+    "    wrapper_class=structlog.stdlib.BoundLogger,\n",
+    "    cache_logger_on_first_use=True,\n",
+    ")\n",
+    "logger = structlog.get_logger(\"ecoinvent-migrate\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e80afdee-7e95-4cb4-8c9b-eb85aed4312e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "registry = rd.Registry()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6890a48b-9a26-4365-8e3d-1d9bcfaa69f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_elem_flow_data(version: str) -> list[dict]:\n",
+    "    path = Path(f\"/Users/cmutel/Library/Application Support/EcoinventInterface/cache/ecoinvent {version}_cutoff_ecoSpold02/MasterData/ElementaryExchanges.xml\")\n",
+    "\n",
+    "    if not path.is_file():\n",
+    "        path = Path(f\"/Users/cmutel/Library/Application Support/EcoinventInterface/cache/ecoinvent {version}_cut-off_ecoSpold02/MasterData/ElementaryExchanges.xml\")\n",
+    "    \n",
+    "    with open(path) as fs:\n",
+    "        ei_xml = xmltodict.parse(fs.read(), strip_whitespace=False)[\n",
+    "            \"validElementaryExchanges\"\n",
+    "        ][\"elementaryExchange\"]\n",
+    "\n",
+    "    data = remove_conflicting_synonyms([reformat(obj) for obj in ei_xml])\n",
+    "\n",
+    "    for obj in data:\n",
+    "        if \"formula\" in obj:\n",
+    "            del obj[\"formula\"]\n",
+    "        if \"cas_number\" in obj and not obj[\"cas_number\"]:\n",
+    "            del obj[\"cas_number\"]\n",
+    "\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c6617ef6-54d4-4c9d-bb98-c4a7dfed81a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_elem_flow_dict(version: str) -> dict:\n",
+    "    return {row['identifier']: row for row in get_elem_flow_data(version)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "35fd877d-ebe2-45be-9381-d49fdcfa067a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def add_comment(comment: str | None, addition: str, deletions: list[str] = [\"replaced\"]) -> str:\n",
+    "    if comment is None:\n",
+    "        comment = \"\"\n",
+    "    \n",
+    "    for deletion in deletions:\n",
+    "        if comment == deletion:\n",
+    "            comment = \"\"\n",
+    "\n",
+    "    if comment and not comment.endswith(\".\"):\n",
+    "        comment += \".\"\n",
+    "\n",
+    "    if comment:\n",
+    "        return comment + \" \" + addition\n",
+    "    else:\n",
+    "        return addition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "49971a58-c14f-4480-86c5-698416308380",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_changes(\n",
+    "    source_version: str, \n",
+    "    target_version: str\n",
+    ") -> list[dict]:\n",
+    "    \"\"\"Use the `uuid` in the change list to add in other attributes not given in change list.\"\"\"\n",
+    "    source_flow_dict = get_elem_flow_dict(source_version)\n",
+    "    target_flow_dict = get_elem_flow_dict(target_version)\n",
+    "\n",
+    "    changes = []\n",
+    "\n",
+    "    for s_key, s_data in source_flow_dict.items():\n",
+    "        if s_key not in target_flow_dict:\n",
+    "            logger.debug(\"Elementary flow deleted: %s\", s_data)\n",
+    "            continue\n",
+    "\n",
+    "        t_data = target_flow_dict[s_key]\n",
+    "        \n",
+    "        if t_data == s_data:\n",
+    "            continue\n",
+    "\n",
+    "        attributes = \", \".join([\n",
+    "            key \n",
+    "            for key, value in t_data.items() \n",
+    "            if key in s_data \n",
+    "            and s_data[key] != value\n",
+    "        ])\n",
+    "        change = {\n",
+    "            \"source\": s_data,\n",
+    "            \"target\": t_data,\n",
+    "            \"comment\": f\"Changed {attributes} from {source_version} to {target_version}.\",\n",
+    "            \"source_version\": f\"ecoinvent-{source_version}-biosphere\",\n",
+    "            \"target_version\": f\"ecoinvent-{target_version}-biosphere\"\n",
+    "        }\n",
+    "        changes.append(change)\n",
+    "\n",
+    "    return changes    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "fe2a20c7-88ea-4019-a657-67110684532c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def supplement_changes(\n",
+    "    changes: list[dict], \n",
+    "    source_version: str, \n",
+    "    target_version: str\n",
+    ") -> list[dict]:\n",
+    "    \"\"\"Use the `uuid` in the change list to add in other attributes not given in change list.\"\"\"\n",
+    "    source_flow_dict = get_elem_flow_dict(source_version)\n",
+    "    target_flow_dict = get_elem_flow_dict(target_version)\n",
+    "    \n",
+    "    for change in changes:\n",
+    "        if \"formula\" in change[\"source\"]:\n",
+    "            del change[\"source\"][\"formula\"]\n",
+    "        if \"formula\" in change[\"target\"]:\n",
+    "            del change[\"target\"][\"formula\"]\n",
+    "\n",
+    "        \n",
+    "        change['source'].update(source_flow_dict[change['source']['uuid']])\n",
+    "        del change['source']['uuid']\n",
+    "        change['target'].update(target_flow_dict[change['target']['uuid']])\n",
+    "        del change['target']['uuid']\n",
+    "\n",
+    "        attributes = \", \".join([\n",
+    "            key \n",
+    "            for key, value in change['target'].items() \n",
+    "            if key in change['source'] \n",
+    "            and change['source'][key] != value\n",
+    "        ])\n",
+    "        comment = add_comment(\n",
+    "            change.get(\"comment\"),\n",
+    "            f\"Changed {attributes} from {source_version} to {target_version}.\"\n",
+    "        )\n",
+    "        change['comment'] = comment\n",
+    "        change[\"source_version\"] = f\"ecoinvent-{source_version}-biosphere\"\n",
+    "        change[\"target_version\"] = f\"ecoinvent-{target_version}-biosphere\"\n",
+    "\n",
+    "    return changes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a7ba6cba-eedb-4502-befc-c4282e9b71a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_filtered_rd_changes(from_v: str, to_v: str) -> list[dict]:\n",
+    "    \"\"\"Return a filtered list of biosphere changes where the name or uuid changed\"\"\"\n",
+    "    raw = registry.get_file(f'ecoinvent-{from_v}-biosphere-ecoinvent-{to_v}-biosphere')\n",
+    "    if 'replace' in raw:\n",
+    "        data = raw['replace']\n",
+    "    elif 'update' in raw:\n",
+    "        data = raw['update']\n",
+    "    else:\n",
+    "        print(\"No update changes found\")\n",
+    "        return []\n",
+    "    data = [\n",
+    "        obj\n",
+    "        for obj in data\n",
+    "        if 'name' in obj['target']\n",
+    "        or obj['target']['uuid'] != obj['source']['uuid']\n",
+    "    ]\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "29185ef0-1fea-4b21-a4bd-3c8e18d2bc42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def remove_only_synonyms_change(data: list[dict]) -> list[dict]:\n",
+    "    good = []\n",
+    "\n",
+    "    for line in data:\n",
+    "        source = {k: v for k, v in line['source'].items() if k != \"synonyms\"}\n",
+    "        target = {k: v for k, v in line['target'].items() if k != \"synonyms\"}\n",
+    "        if source != target:\n",
+    "            good.append(line)\n",
+    "\n",
+    "    return good"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "b900b65d-6af7-4e86-8efd-91675942e248",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def apply_forward_change(data: list[dict], other: list[dict]) -> list[dict]:\n",
+    "    \"\"\"Apply additional changes to get transitive change set.\"\"\"\n",
+    "    other_mapping = {obj['source']['identifier']: obj for obj in other}\n",
+    "    \n",
+    "    for obj in data:\n",
+    "        try:\n",
+    "            transitive = other_mapping[obj['target']['identifier']]\n",
+    "            obj['target'] = transitive['target']\n",
+    "            if transitive.get(\"comment\"):\n",
+    "                obj['comment'] = add_comment(obj.get(\"comment\"), addition=transitive[\"comment\"])\n",
+    "            obj[\"target_version\"] = transitive[\"target_version\"]\n",
+    "\n",
+    "            if \"conversion_factor\" in transitive:\n",
+    "                obj[\"conversion_factor\"] = obj.get(\"conversion_factor\", 1.) * transitive[\"conversion_factor\"]\n",
+    "            \n",
+    "            logger.debug(\"Mapping change: %s\", obj)\n",
+    "        except KeyError:\n",
+    "            continue\n",
+    "\n",
+    "    input_uuids = {obj['source'].get('identifier', None) for obj in data}\n",
+    "    extra = [obj for obj in other if obj['source']['identifier'] not in input_uuids]\n",
+    "    \n",
+    "    return data + remove_only_synonyms_change(extra)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "34d4eb99-3dcf-4f0f-9121-c93b68ce0c85",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_transitive_datapackage(data: list[dict], source_id: str, end_target: str) -> rn.Datapackage:\n",
+    "    dp = rn.Datapackage(\n",
+    "        name=f\"ecoinvent-{source_id}-biosphere-ecoinvent-{end_target}-biosphere-transitive\",\n",
+    "        source_id=f\"ecoinvent-{source_id}-biosphere\",\n",
+    "        target_id=f\"ecoinvent-{end_target}-biosphere\",\n",
+    "        description=f\"Transitive ecoinvent elementary flow correspondence from {source_id} to {end_target}\",\n",
+    "        contributors=[{\"title\": \"Chris Mutel\", \"roles\": [\"author\"], \"path\": \"https://chris.mutel.org\"}],\n",
+    "        mapping_source=rn.MappingConstants.ECOSPOLD2_BIO_FLOWMAPPER,\n",
+    "        mapping_target=rn.MappingConstants.ECOSPOLD2_BIO_FLOWMAPPER,\n",
+    "        version=\"1.0\",\n",
+    "    )\n",
+    "    dp.add_data(verb=\"update\", data=data)\n",
+    "    filename = f\"ecoinvent-{source_id}-biosphere-ecoinvent-{end_target}-biosphere-transitive.json\"\n",
+    "    dp.to_json(filename)\n",
+    "    registry.add_file(filename, replace=True)\n",
+    "    return dp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "9b5d3432-dc49-4089-a416-94a40f070de7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_datapackage(data: list[dict], source_id: str, target_id: str) -> rn.Datapackage:\n",
+    "    dp = rn.Datapackage(\n",
+    "        name=f\"ecoinvent-{source_id}-biosphere-ecoinvent-{target_id}-biosphere\",\n",
+    "        source_id=f\"ecoinvent-{source_id}-biosphere\",\n",
+    "        target_id=f\"ecoinvent-{target_id}-biosphere\",\n",
+    "        description=f\"ecoinvent elementary flow correspondence from {source_id} to {target_id}\",\n",
+    "        contributors=[{\"title\": \"Chris Mutel\", \"roles\": [\"author\"], \"path\": \"https://chris.mutel.org\"}],\n",
+    "        mapping_source=rn.MappingConstants.ECOSPOLD2_BIO_FLOWMAPPER,\n",
+    "        mapping_target=rn.MappingConstants.ECOSPOLD2_BIO_FLOWMAPPER,\n",
+    "        version=\"1.0\",\n",
+    "    )\n",
+    "    dp.add_data(verb=\"update\", data=data)\n",
+    "    filename = f\"ecoinvent-{source_id}-biosphere-ecoinvent-{target_id}-biosphere.json\"\n",
+    "    dp.to_json(filename)\n",
+    "    registry.add_file(filename, replace=True)\n",
+    "    return dp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "46265815-4d3b-442a-ad19-37c98d94d04d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Flowmapper-standard-units-harmonization',\n",
+       " 'SimaPro-2025-ecoinvent-3.12-context',\n",
+       " 'SimaPro-9-ecoinvent-3.8-biosphere',\n",
+       " 'SimaPro-9-ecoinvent-3.8-biosphere-manual-matches',\n",
+       " 'SimaPro-9-ecoinvent-3.9-biosphere',\n",
+       " 'SimaPro-9-ecoinvent-3.9-biosphere-manual-matches',\n",
+       " 'agribalyse-3.1.1-biosphere-ecoinvent-3.8-biosphere',\n",
+       " 'agribalyse-3.1.1-delete-aggregated-ecoinvent',\n",
+       " 'agribalyse-3.1.1-ecoinvent-3.10-biosphere-manual-matches',\n",
+       " 'agribalyse-3.1.1-restore-simapro-ecoinvent-names',\n",
+       " 'agrifootprint-2022-delete-aggregated-ecoinvent',\n",
+       " 'agrifootprint-2022-ecoinvent-3.10-biosphere',\n",
+       " 'agrifootprint-2022-ecoinvent-3.8-biosphere',\n",
+       " 'agrifootprint-2022-restore-simapro-ecoinvent-names',\n",
+       " 'ecoinvent-2.2-biosphere-context-ecoinvent-3.0-biosphere-context',\n",
+       " 'ecoinvent-2.2-biosphere-ecoinvent-3.0-biosphere',\n",
+       " 'ecoinvent-2.2-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.01-biosphere-ecoinvent-3.1-biosphere',\n",
+       " 'ecoinvent-3.01-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.1-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.10.1-biosphere-EF-3.1-biosphere',\n",
+       " 'ecoinvent-3.10.1-biosphere-ecoinvent-3.11-biosphere',\n",
+       " 'ecoinvent-3.10.1-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.10.1-cutoff-ecoinvent-3.11-cutoff',\n",
+       " 'ecoinvent-3.11-biosphere-EF-3.1-biosphere',\n",
+       " 'ecoinvent-3.11-cutoff-ecoinvent-3.12-cutoff',\n",
+       " 'ecoinvent-3.2-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.3-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.3-biosphere-ecoinvent-3.4-biosphere',\n",
+       " 'ecoinvent-3.4-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.4-biosphere-ecoinvent-3.5-biosphere',\n",
+       " 'ecoinvent-3.5-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.5-biosphere-ecoinvent-3.6-biosphere',\n",
+       " 'ecoinvent-3.6-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.6-biosphere-ecoinvent-3.7-biosphere',\n",
+       " 'ecoinvent-3.7-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.7-biosphere-ecoinvent-3.8-biosphere',\n",
+       " 'ecoinvent-3.7.1-cutoff-ecoinvent-3.8-cutoff',\n",
+       " 'ecoinvent-3.8-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.8-biosphere-ecoinvent-3.9-biosphere',\n",
+       " 'ecoinvent-3.8-cutoff-ecoinvent-3.9-cutoff',\n",
+       " 'ecoinvent-3.9.1-biosphere-EF-3.1-biosphere',\n",
+       " 'ecoinvent-3.9.1-biosphere-ecoinvent-3.10-biosphere',\n",
+       " 'ecoinvent-3.9.1-biosphere-ecoinvent-3.12-biosphere-transitive',\n",
+       " 'ecoinvent-3.9.1-cutoff-ecoinvent-3.10-cutoff',\n",
+       " 'generic-brightway-unit-conversions',\n",
+       " 'generic-brightway-units-normalization',\n",
+       " 'simapro-9-ecoinvent-3-context',\n",
+       " 'simapro-9-ecoinvent-3-water-slash-m3',\n",
+       " 'simapro-ecoinvent-3.10-cutoff',\n",
+       " 'simapro-ecoinvent-3.5-apos',\n",
+       " 'simapro-ecoinvent-3.5-consequential',\n",
+       " 'simapro-ecoinvent-3.5-cutoff',\n",
+       " 'simapro-ecoinvent-3.8-cutoff',\n",
+       " 'simapro-ecoinvent-3.9.1-cutoff']"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sorted(list(registry))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "4656eba5-4957-478d-a4c8-b853e0da8677",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "previous = None\n",
+    "config = [\n",
+    "    {\n",
+    "        \"rd_source\": \"3.10.1\",\n",
+    "        \"ei_source\": \"3.10.1\",\n",
+    "        \"rd_target\": \"3.11\",\n",
+    "        \"ei_target\": \"3.11\",\n",
+    "        \"supplement\": True,\n",
+    "    },\n",
+    "    {\n",
+    "        \"rd_source\": \"3.9.1\",\n",
+    "        \"ei_source\": \"3.9.1\",\n",
+    "        \"rd_target\": \"3.10\",\n",
+    "        \"ei_target\": \"3.10.1\",\n",
+    "        \"supplement\": True,\n",
+    "    },\n",
+    "    {\n",
+    "        \"rd_source\": \"3.8\",\n",
+    "        \"ei_source\": \"3.8\",\n",
+    "        \"rd_target\": \"3.9\",\n",
+    "        \"ei_target\": \"3.9.1\",\n",
+    "        \"supplement\": True,\n",
+    "    },\n",
+    "    {\n",
+    "        \"ei_source\": \"3.7\",\n",
+    "        \"ei_target\": \"3.8\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"ei_source\": \"3.6\",\n",
+    "        \"ei_target\": \"3.7\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"ei_source\": \"3.5\",\n",
+    "        \"ei_target\": \"3.6\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"ei_source\": \"3.4\",\n",
+    "        \"ei_target\": \"3.5\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"ei_source\": \"3.3\",\n",
+    "        \"ei_target\": \"3.4\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"ei_source\": \"3.2\",\n",
+    "        \"ei_target\": \"3.3\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"ei_source\": \"3.1\",\n",
+    "        \"ei_target\": \"3.2\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"ei_source\": \"3.01\",\n",
+    "        \"ei_target\": \"3.1\",\n",
+    "    },\n",
+    "]\n",
+    "end_target = \"3.12\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "af525429-ba7a-42db-8509-ee279cfd70c8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|█████████████████████████████████████████████████████████████████████| 11/11 [00:06<00:00,  1.78it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "for line in tqdm(config):\n",
+    "    if line.get(\"supplement\"):\n",
+    "        data = supplement_changes(\n",
+    "            get_filtered_rd_changes(line[\"rd_source\"], line[\"rd_target\"]), \n",
+    "            line[\"ei_source\"], \n",
+    "            line[\"ei_target\"],\n",
+    "        )\n",
+    "    else:\n",
+    "        data = generate_changes(line[\"ei_source\"], line[\"ei_target\"])\n",
+    "\n",
+    "    if not line.get(\"supplement\") and data:\n",
+    "        generate_datapackage(deepcopy(data), line[\"ei_source\"], line[\"ei_target\"])\n",
+    "    \n",
+    "    if previous is not None:\n",
+    "        data = apply_forward_change(data, previous)\n",
+    "\n",
+    "    generate_transitive_datapackage(deepcopy(data), line[\"ei_source\"], end_target)\n",
+    "    previous = data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "c1d13a46-ddc8-4420-8cc9-0dd42dbb5742",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_22 = registry.get_file('ecoinvent-2.2-biosphere-ecoinvent-3.0-biosphere')['replace']\n",
+    "\n",
+    "data_301 = defaultdict(list)\n",
+    "\n",
+    "for obj in get_elem_flow_data(\"3.01\"):\n",
+    "    data_301[obj['name']].append(obj)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "8d94bc46-dc25-41d3-aef2-caf3b7ad1fb9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "changes = []\n",
+    "\n",
+    "for line in data_22:\n",
+    "    s_name, t_name = line['source']['name'], line['target']['name']\n",
+    "    if s_name == t_name:\n",
+    "        continue\n",
+    "    for obj in data_301.get(t_name, []):\n",
+    "        source = {\n",
+    "            \"name\": s_name,\n",
+    "            \"context\": obj[\"context\"],\n",
+    "        }\n",
+    "        changes.append({\n",
+    "            \"source\": source,\n",
+    "            \"target\": obj,\n",
+    "            \"comment\": \"Name change from ecoinvent 2.2 to 3.01\",\n",
+    "            \"source_version\": \"ecoinvent-2.2-biosphere\",\n",
+    "            \"target_version\": \"ecoinvent-3.01-biosphere\",\n",
+    "        })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "e7adaffb-84ba-4359-a1c9-3af052772445",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = apply_forward_change(changes, previous)\n",
+    "\n",
+    "dp = generate_transitive_datapackage(data, \"2.2\", end_target)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d325e762-f400-45fb-b432-754321e44b9d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/README.md b/README.md
index eb724b4..d6c5bab 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ ontology that `flowmapper` uses:
 * context: tuple[str], a hierarchical organization into environmental compartments, e.g. `("air", "urban air close to ground")`
 * unit: str, or complex type with a string representation, e.g. "kg"
 * sector-specific labels: str, or complex type with a string representation, a set of additional fields which can help identify or further specify a flow, e.g. CAS number 000110-63-4
-* synonyms: list[str], a list of alternative unique names for a substance, e.g. `["Butylene glycol", "butane-1,4-diol"]`
+* synonyms: list[str], a list of alternative unique names for a substance, e.g. `["Butylene glycol", "butane-1,4-diol"]`. Synonyms should not overlap.
 
 Flowmapper **assumes that the source and target lists are given in this format**; it comes with or plays well with conversion software for data formats like ecospold, FEDEFL, and SimaPro CSV.
 
diff --git a/ecoinvent-3.10-biosphere-simapro-2024-biosphere.json b/ecoinvent-3.10-biosphere-simapro-2024-biosphere.json
new file mode 100644
index 0000000..ad24edc
--- /dev/null
+++ b/ecoinvent-3.10-biosphere-simapro-2024-biosphere.json
@@ -0,0 +1,1134 @@
+{
+  "name": "ecoinvent-3.10-biosphere-simapro-2024-biosphere",
+  "description": "Manual SimaPro to ecoinvent name matches without conversion factors",
+  "contributors": [
+    {
+      "title": "Chris Mutel",
+      "roles": [
+        "author"
+      ],
+      "path": "https://chris.mutel.org"
+    }
+  ],
+  "created": "2025-11-16T22:04:33.433871+00:00",
+  "version": "1.0",
+  "licenses": [
+    {
+      "name": "CC-BY-4.0",
+      "path": "https://creativecommons.org/licenses/by/4.0/legalcode",
+      "title": "Creative Commons Attribution 4.0 International"
+    }
+  ],
+  "graph_context": [
+    "edges"
+  ],
+  "mapping": {
+    "source": {
+      "expression language": "XPath",
+      "labels": {
+        "name": "//*:elementaryExchange/*:name/text()",
+        "cas_number": "//*:elementaryExchange/@casNumber",
+        "unit": "//*:elementaryExchange/*:unitName/text()",
+        "identifier": "//*:elementaryExchange/@elementaryExchangeId",
+        "context": [
+          "//*:elementaryExchange/*:compartment/*:compartment/text()",
+          "//*:elementaryExchange/*:compartment/*:subcompartment/text()"
+        ],
+        "synonyms": "//*:elementaryExchange/*:synonym/text()"
+      }
+    },
+    "target": {
+      "expression language": "like JSONPath",
+      "labels": {
+        "identifier": "Process[*].\"Process identifier\".text",
+        "name": "Process[*].Products[*].text[0]",
+        "platform_id": "Process[*].\"Platform Identifier\"",
+        "unit": [
+          "[\"Emissions to air/\", Process[*].\"Emissions to air\".[2]]",
+          "[\"Emissions to soil/\", Process[*].\"Emissions to soil\".[2]]",
+          "[\"Emissions to water/\", Process[*].\"Emissions to water\".[2]]",
+          "[\"Resources/\", Process[*].\"Resources\".[2]]"
+        ],
+        "context": [
+          "[\"Emissions to air/\", Process[*].\"Emissions to air\".[1]]",
+          "[\"Emissions to soil/\", Process[*].\"Emissions to soil\".[1]]",
+          "[\"Emissions to water/\", Process[*].\"Emissions to water\".[1]]",
+          "[\"Resources/\", Process[*].\"Resources\".[1]]"
+        ]
+      }
+    }
+  },
+  "source_id": "simapro-2024-biosphere",
+  "target_id": "ecoinvent-3.10-biosphere",
+  "update": [
+    {
+      "source": {
+        "name": "Parathion, methyl"
+      },
+      "target": {
+        "name": "Methyl parathion"
+      }
+    },
+    {
+      "source": {
+        "name": "Thiocyanic acid (-1 ion)"
+      },
+      "target": {
+        "name": "Thiocyanate"
+      }
+    },
+    {
+      "source": {
+        "name": "Quizalofop ethyl ester"
+      },
+      "target": {
+        "name": "Quizalofop-ethyl"
+      }
+    },
+    {
+      "source": {
+        "name": "Prothioconazol"
+      },
+      "target": {
+        "name": "Prothioconazole"
+      }
+    },
+    {
+      "source": {
+        "name": "Pyraclostrobin (prop)"
+      },
+      "target": {
+        "name": "Pyraclostrobin"
+      }
+    },
+    {
+      "source": {
+        "name": "Monosodium acid methanearsonate"
+      },
+      "target": {
+        "name": "MSMA"
+      }
+    },
+    {
+      "source": {
+        "name": "Carbamic acid, [(dibutylamino)thio]methyl-, 2,3-dihydro-2,2-dimethyl-7-benzofuranyl ester"
+      },
+      "target": {
+        "name": "Carbosulfan"
+      }
+    },
+    {
+      "source": {
+        "name": "Benzene, 1-methyl-2-nitro-"
+      },
+      "target": {
+        "name": "o-Nitrotoluene"
+      }
+    },
+    {
+      "source": {
+        "name": "Alkane (unspecified)"
+      },
+      "target": {
+        "name": "Hydrocarbons, aliphatic, alkanes, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "AOX (Adsorbable Organic Halogens)"
+      },
+      "target": {
+        "name": "AOX, Adsorbable Organic Halides"
+      }
+    },
+    {
+      "source": {
+        "name": "AOX, Adsorbable Organic Halogen as Cl"
+      },
+      "target": {
+        "name": "AOX, Adsorbable Organic Halides"
+      }
+    },
+    {
+      "source": {
+        "name": "BOD5 (Biological Oxygen Demand)"
+      },
+      "target": {
+        "name": "BOD5, Biological Oxygen Demand"
+      }
+    },
+    {
+      "source": {
+        "name": "COD (Chemical Oxygen Demand)"
+      },
+      "target": {
+        "name": "COD, Chemical Oxygen Demand"
+      }
+    },
+    {
+      "source": {
+        "name": "Wood, unspecified, standing/m3"
+      },
+      "target": {
+        "name": "Wood, unspecified, standing"
+      }
+    },
+    {
+      "source": {
+        "name": "Particulates, < 2.5 um"
+      },
+      "target": {
+        "name": "Particulate Matter, < 2.5 um"
+      }
+    },
+    {
+      "source": {
+        "name": "Particulates, > 10 um"
+      },
+      "target": {
+        "name": "Particulate Matter, > 10 um"
+      }
+    },
+    {
+      "source": {
+        "name": "Particulates, > 2.5 um, and < 10um"
+      },
+      "target": {
+        "name": "Particulate Matter, > 2.5 um and < 10um"
+      }
+    },
+    {
+      "source": {
+        "name": "Sand"
+      },
+      "target": {
+        "name": "Sand, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Potassium chloride"
+      },
+      "target": {
+        "name": "Sylvite"
+      }
+    },
+    {
+      "source": {
+        "name": "Sodium tetrahydroborate"
+      },
+      "target": {
+        "name": "Sodium tetrahydridoborate"
+      }
+    },
+    {
+      "source": {
+        "name": "Toluene, 2-chloro-"
+      },
+      "target": {
+        "name": "o-Chlorotoluene"
+      }
+    },
+    {
+      "source": {
+        "name": "Pentane, 2,2,4-trimethyl-"
+      },
+      "target": {
+        "name": "2,2,4-Trimethylpentane"
+      }
+    },
+    {
+      "source": {
+        "name": "Dioxin, 2,3,7,8 Tetrachlorodibenzo-p-"
+      },
+      "target": {
+        "name": "Dioxins, measured as 2,3,7,8-tetrachlorodibenzo-p-dioxin"
+      }
+    },
+    {
+      "source": {
+        "name": "Discarded fish, demersal"
+      },
+      "target": {
+        "name": "Discarded fish, demersal, to ocean"
+      }
+    },
+    {
+      "source": {
+        "name": "Methane, tetrachloro-, CFC-10"
+      },
+      "target": {
+        "name": "Carbon tetrachloride"
+      }
+    },
+    {
+      "source": {
+        "name": "Methane, tetrafluoro-, CFC-14"
+      },
+      "target": {
+        "name": "Tetrafluoromethane"
+      }
+    },
+    {
+      "source": {
+        "name": "Metolachlor, (S)"
+      },
+      "target": {
+        "name": "Metolachlor"
+      }
+    },
+    {
+      "source": {
+        "name": "Methane, chlorofluoro-, HCFC-31"
+      },
+      "target": {
+        "name": "Chloro-fluoromethane"
+      }
+    },
+    {
+      "source": {
+        "name": "Metam-sodium dihydrate"
+      },
+      "target": {
+        "name": "Metam-sodium"
+      }
+    },
+    {
+      "source": {
+        "name": "Gas, natural, 36 MJ per m3"
+      },
+      "target": {
+        "name": "Gas, natural, in ground"
+      }
+    },
+    {
+      "source": {
+        "name": "Gas, mine, off-gas, process, coal mining, 36 MJ per m3"
+      },
+      "target": {
+        "name": "Gas, mine, off-gas, process, coal mining"
+      }
+    },
+    {
+      "source": {
+        "name": "Discarded fish, pelagic"
+      },
+      "target": {
+        "name": "Discarded fish, pelagic, to ocean"
+      }
+    },
+    {
+      "source": {
+        "name": "Dipropylthiocarbamic acid S-ethyl ester"
+      },
+      "target": {
+        "name": "EPTC"
+      }
+    },
+    {
+      "source": {
+        "name": "Oxydemeton methyl"
+      },
+      "target": {
+        "name": "Oxydemeton-methyl"
+      }
+    },
+    {
+      "source": {
+        "name": "Thiazole, 2-(thiocyanatemethylthio)benzo-"
+      },
+      "target": {
+        "name": "TCMTB"
+      }
+    },
+    {
+      "source": {
+        "name": "Tri-allate"
+      },
+      "target": {
+        "name": "Triallate"
+      }
+    },
+    {
+      "source": {
+        "name": "Cesium (I)"
+      },
+      "target": {
+        "name": "Caesium I"
+      }
+    },
+    {
+      "source": {
+        "name": "Cesium"
+      },
+      "target": {
+        "name": "Caesium"
+      }
+    },
+    {
+      "source": {
+        "name": "Dimethyl formamide"
+      },
+      "target": {
+        "name": "N,N-Dimethylformamide"
+      }
+    },
+    {
+      "source": {
+        "name": "Methane, land transformation"
+      },
+      "target": {
+        "name": "Methane, from soil or biomass stock"
+      }
+    },
+    {
+      "source": {
+        "name": "Carbon dioxide, land transformation"
+      },
+      "target": {
+        "name": "Carbon dioxide, from soil or biomass stock"
+      }
+    },
+    {
+      "source": {
+        "name": "Carbon monoxide, land transformation"
+      },
+      "target": {
+        "name": "Carbon monoxide, from soil or biomass stock"
+      }
+    },
+    {
+      "source": {
+        "name": "Nitrogen, atmospheric"
+      },
+      "target": {
+        "name": "Nitrogen"
+      }
+    },
+    {
+      "source": {
+        "name": "Butyric acid, 4-(2,4-dichlorophenoxy)-"
+      },
+      "target": {
+        "name": "2,4-DB"
+      }
+    },
+    {
+      "source": {
+        "name": "Benzo(a)anthracene"
+      },
+      "target": {
+        "name": "Benz(a)anthracene"
+      }
+    },
+    {
+      "source": {
+        "name": "Oil, crude, 43.4 MJ per kg"
+      },
+      "target": {
+        "name": "Oil, crude"
+      }
+    },
+    {
+      "source": {
+        "name": "Argon-40/kg"
+      },
+      "target": {
+        "name": "Argon"
+      }
+    },
+    {
+      "source": {
+        "name": "1-Butanol"
+      },
+      "target": {
+        "name": "Butanol"
+      }
+    },
+    {
+      "source": {
+        "name": "Metaldehyde (tetramer)"
+      },
+      "target": {
+        "name": "Metaldehyde"
+      }
+    },
+    {
+      "source": {
+        "name": "Roundup"
+      },
+      "target": {
+        "name": "Glyphosate"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from pasture and meadow, organic"
+      },
+      "target": {
+        "name": "Transformation, from pasture, man made, extensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to pasture and meadow, organic"
+      },
+      "target": {
+        "name": "Transformation, to pasture, man made, extensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from arable, organic"
+      },
+      "target": {
+        "name": "Transformation, from arable land, unspecified use"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to arable, organic"
+      },
+      "target": {
+        "name": "Transformation, to arable land, unspecified use"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to industrial area, built up"
+      },
+      "target": {
+        "name": "Transformation, to industrial area",
+        "context": [
+          "natural resource",
+          "land"
+        ]
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from agriculture"
+      },
+      "target": {
+        "name": "Transformation, from annual crop"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from annual crop, non-irrigated, fallow"
+      },
+      "target": {
+        "name": "Transformation, from pasture, man made"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from forest, intensive, clear-cutting"
+      },
+      "target": {
+        "name": "Transformation, from forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from forest, used"
+      },
+      "target": {
+        "name": "Transformation, from forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from grassland"
+      },
+      "target": {
+        "name": "Transformation, from grassland, natural (non-use)"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from grassland/pasture/meadow"
+      },
+      "target": {
+        "name": "Transformation, from pasture, man made"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from industrial area, benthos"
+      },
+      "target": {
+        "name": "Transformation, from seabed, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from industrial area, built up"
+      },
+      "target": {
+        "name": "Transformation, from industrial area"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from industrial area, vegetation"
+      },
+      "target": {
+        "name": "Transformation, from industrial area"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from permanent crop, fruit"
+      },
+      "target": {
+        "name": "Transformation, from permanent crop, irrigated"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from tropical rain forest"
+      },
+      "target": {
+        "name": "Transformation, from forest, extensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, from unspecified, used"
+      },
+      "target": {
+        "name": "Transformation, from unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to agriculture"
+      },
+      "target": {
+        "name": "Transformation, to annual crop"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to annual crop, fallow"
+      },
+      "target": {
+        "name": "Transformation, to arable land, unspecified use"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to annual crop, non-irrigated, fallow"
+      },
+      "target": {
+        "name": "Transformation, to annual crop, non-irrigated, extensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to dump site, benthos"
+      },
+      "target": {
+        "name": "Transformation, to seabed, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to forest, intensive, clear-cutting"
+      },
+      "target": {
+        "name": "Transformation, to forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to forest, intensive, normal"
+      },
+      "target": {
+        "name": "Transformation, to forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to forest, intensive, short-cycle"
+      },
+      "target": {
+        "name": "Transformation, to forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to forest, used"
+      },
+      "target": {
+        "name": "Transformation, to forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to grassland/pasture/meadow"
+      },
+      "target": {
+        "name": "Transformation, to pasture, man made"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to industrial area, benthos"
+      },
+      "target": {
+        "name": "Transformation, to seabed, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to industrial area, vegetation"
+      },
+      "target": {
+        "name": "Transformation, to industrial area"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to permanent crop, fruit, intensive"
+      },
+      "target": {
+        "name": "Transformation, to permanent crop, irrigated"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to sea and ocean"
+      },
+      "target": {
+        "name": "Transformation, to seabed, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to traffic area, road embankment"
+      },
+      "target": {
+        "name": "Transformation, to traffic area, road network"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to unspecified, used"
+      },
+      "target": {
+        "name": "Transformation, to unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to urban/industrial fallow"
+      },
+      "target": {
+        "name": "Transformation, to industrial area"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to water bodies, artificial"
+      },
+      "target": {
+        "name": "Transformation, to river, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to water courses, artificial"
+      },
+      "target": {
+        "name": "Transformation, to river, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to lakes, artificial"
+      },
+      "target": {
+        "name": "Transformation, to lake, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Transformation, to rivers, artificial"
+      },
+      "target": {
+        "name": "Transformation, to river, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, lakes, artificial"
+      },
+      "target": {
+        "name": "Occupation, lake, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, rivers, artificial"
+      },
+      "target": {
+        "name": "Occupation, river, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, water bodies, artificial"
+      },
+      "target": {
+        "name": "Occupation, river, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, agriculture"
+      },
+      "target": {
+        "name": "Occupation, annual crop"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, dump site, benthos"
+      },
+      "target": {
+        "name": "Occupation, seabed, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, forest, intensive, normal"
+      },
+      "target": {
+        "name": "Occupation, forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, forest, intensive, short-cycle"
+      },
+      "target": {
+        "name": "Occupation, forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, forest, used"
+      },
+      "target": {
+        "name": "Occupation, forest, intensive"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, grassland/pasture/meadow"
+      },
+      "target": {
+        "name": "Occupation, pasture, man made"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, industrial area, benthos"
+      },
+      "target": {
+        "name": "Occupation, seabed, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, industrial area, built up"
+      },
+      "target": {
+        "name": "Occupation, industrial area"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, industrial area, vegetation"
+      },
+      "target": {
+        "name": "Occupation, industrial area"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, permanent crop, fruit, intensive"
+      },
+      "target": {
+        "name": "Occupation, permanent crop, irrigated"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, sea and ocean"
+      },
+      "target": {
+        "name": "Occupation, seabed, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, sea and ocean"
+      },
+      "target": {
+        "name": "Occupation, seabed, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, traffic area"
+      },
+      "target": {
+        "name": "Occupation, traffic area, road network"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, traffic area, road embankment"
+      },
+      "target": {
+        "name": "Occupation, traffic area, road network"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, unspecified, used"
+      },
+      "target": {
+        "name": "Occupation, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, water bodies, artificial"
+      },
+      "target": {
+        "name": "Occupation, river, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, water courses, artificial"
+      },
+      "target": {
+        "name": "Occupation, river, artificial"
+      }
+    },
+    {
+      "source": {
+        "name": "Occupation, wetland"
+      },
+      "target": {
+        "name": "Occupation, inland waterbody, unspecified"
+      }
+    },
+    {
+      "source": {
+        "name": "Bauxite"
+      },
+      "target": {
+        "name": "Gangue"
+      }
+    },
+    {
+      "source": {
+        "name": "Copper ore"
+      },
+      "target": {
+        "name": "Copper"
+      }
+    },
+    {
+      "source": {
+        "name": "Copper, Cu 0.38%, Au 9.7E-4%, Ag 9.7E-4%, Zn 0.63%, Pb 0.014%, in ore"
+      },
+      "target": {
+        "name": "Copper"
+      }
+    },
+    {
+      "source": {
+        "name": "Copper, Cu 3.2E+0%, Pt 2.5E-4%, Pd 7.3E-4%, Rh 2.0E-5%, Ni 2.3E+0% in ore"
+      },
+      "target": {
+        "name": "Copper"
+      }
+    },
+    {
+      "source": {
+        "name": "Copper, Cu 5.2E-2%, Pt 4.8E-4%, Pd 2.0E-4%, Rh 2.4E-5%, Ni 3.7E-2% in ore"
+      },
+      "target": {
+        "name": "Copper"
+      }
+    },
+    {
+      "source": {
+        "name": "Coal, 18 MJ per kg"
+      },
+      "target": {
+        "name": "Coal, hard"
+      }
+    },
+    {
+      "source": {
+        "name": "Coal, brown, 10 MJ per kg"
+      },
+      "target": {
+        "name": "Coal, brown"
+      }
+    },
+    {
+      "source": {
+        "name": "Coal, brown, 8 MJ per kg"
+      },
+      "target": {
+        "name": "Coal, brown"
+      }
+    },
+    {
+      "source": {
+        "name": "Crude oil"
+      },
+      "target": {
+        "name": "Oil, crude"
+      }
+    },
+    {
+      "source": {
+        "name": "Energy, from biomass"
+      },
+      "target": {
+        "name": "Energy, gross calorific value, in biomass"
+      }
+    },
+    {
+      "source": {
+        "name": "Energy, from coal"
+      },
+      "target": {
+        "name": "Energy, gross calorific value, in biomass"
+      }
+    },
+    {
+      "source": {
+        "name": "Gas, mine, off-gas, process, coal mining/m3"
+      },
+      "target": {
+        "name": "Gas, mine, off-gas, process, coal mining",
+        "unit": "Sm3"
+      }
+    },
+    {
+      "source": {
+        "name": "Silver, Ag 9.7E-4%, Au 9.7E-4%, Zn 0.63%, Cu 0.38%, Pb 0.014%, in ore"
+      },
+      "target": {
+        "name": "Silver"
+      }
+    },
+    {
+      "source": {
+        "name": "Zinc, Zn 0.63%, Au 9.7E-4%, Ag 9.7E-4%, Cu 0.38%, Pb 0.014%, in ore"
+      },
+      "target": {
+        "name": "Zinc"
+      }
+    },
+    {
+      "source": {
+        "name": "Lead, Pb 0.014%, Au 9.7E-4%, Ag 9.7E-4%, Zn 0.63%, Cu 0.38%, in ore"
+      },
+      "target": {
+        "name": "Lead"
+      }
+    },
+    {
+      "source": {
+        "name": "Nickel, Ni 2.3E+0%, Pt 2.5E-4%, Pd 7.3E-4%, Rh 2.0E-5%, Cu 3.2E+0% in ore"
+      },
+      "target": {
+        "name": "Nickel"
+      }
+    },
+    {
+      "source": {
+        "name": "Nickel, Ni 3.7E-2%, Pt 4.8E-4%, Pd 2.0E-4%, Rh 2.4E-5%, Cu 5.2E-2% in ore"
+      },
+      "target": {
+        "name": "Nickel"
+      }
+    },
+    {
+      "source": {
+        "name": "Platinum, Pt 4.8E-4%, Pd 2.0E-4%, Rh 2.4E-5%, Ni 3.7E-2%, Cu 5.2E-2% in ore"
+      },
+      "target": {
+        "name": "Platinum"
+      }
+    },
+    {
+      "source": {
+        "name": "Gold, Au 9.7E-4%, Ag 9.7E-4%, Zn 0.63%, Cu 0.38%, Pb 0.014%, in ore"
+      },
+      "target": {
+        "name": "Gold"
+      }
+    },
+    {
+      "source": {
+        "name": "Platinum, Pt 2.5E-4%, Pd 7.3E-4%, Rh 2.0E-5%, Ni 2.3E+0%, Cu 3.2E+0% in ore"
+      },
+      "target": {
+        "name": "Platinum"
+      }
+    },
+    {
+      "source": {
+        "name": "Palladium, Pd 2.0E-4%, Pt 4.8E-4%, Rh 2.4E-5%, Ni 3.7E-2%, Cu 5.2E-2% in ore"
+      },
+      "target": {
+        "name": "Palladium"
+      }
+    },
+    {
+      "source": {
+        "name": "Palladium, Pd 7.3E-4%, Pt 2.5E-4%, Rh 2.0E-5%, Ni 2.3E+0%, Cu 3.2E+0% in ore"
+      },
+      "target": {
+        "name": "Palladium"
+      }
+    },
+    {
+      "source": {
+        "name": "Clay"
+      },
+      "target": {
+        "name": "Clay, bentonite"
+      }
+    },
+    {
+      "source": {
+        "name": "Rhodium, Rh 2.0E-5%, Pt 2.5E-4%, Pd 7.3E-4%, Ni 2.3E+0%, Cu 3.2E+0% in ore"
+      },
+      "target": {
+        "name": "Rhodium"
+      }
+    },
+    {
+      "source": {
+        "name": "Rhodium, Rh 2.4E-5%, Pt 4.8E-4%, Pd 2.0E-4%, Ni 3.7E-2%, Cu 5.2E-2% in ore"
+      },
+      "target": {
+        "name": "Rhodium"
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/file_logger.py b/file_logger.py
new file mode 100644
index 0000000..1025eaa
--- /dev/null
+++ b/file_logger.py
@@ -0,0 +1,337 @@
+import logging
+import structlog
+from pathlib import Path
+from typing import Optional
+
+
+def configure_file_logger(
+    logger_name: str,
+    log_file_path: str | Path,
+    log_level: int = logging.INFO,
+    log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    encoding: str = "utf-8",
+) -> structlog.BoundLogger:
+    """
+    Configure a structlog logger to log messages only to a file.
+    
+    Args:
+        logger_name: Name of the logger
+        log_file_path: Path to the log file
+        log_level: Logging level (default: INFO)
+        log_format: Format string for log messages
+        encoding: File encoding (default: utf-8)
+        
+    Returns:
+        structlog.BoundLogger: Configured structlog logger
+        
+    Example:
+        >>> logger = configure_file_logger("my_app", "logs/app.log")
+        >>> logger.info("Application started")
+        >>> logger.error("An error occurred", error_code=500)
+    """
+    # Convert path to Path object if it's a string
+    log_file_path = Path(log_file_path)
+    
+    # Create log directory if it doesn't exist
+    log_file_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Get the standard library logger
+    std_logger = logging.getLogger(logger_name)
+    std_logger.setLevel(log_level)
+    
+    # Remove only FileHandler handlers to avoid duplicates while preserving other handlers
+    for handler in std_logger.handlers[:]:
+        if isinstance(handler, logging.FileHandler):
+            std_logger.removeHandler(handler)
+    
+    # Create a simple file handler
+    file_handler = logging.FileHandler(
+        filename=log_file_path,
+        encoding=encoding,
+    )
+    
+    # Create formatter
+    formatter = logging.Formatter(log_format)
+    file_handler.setFormatter(formatter)
+    
+    # Add handler to logger
+    std_logger.addHandler(file_handler)
+    
+    # Prevent propagation to root logger to avoid console output
+    std_logger.propagate = False
+    
+    # Get the structlog logger
+    logger = structlog.get_logger(logger_name)
+    
+    return logger
+
+
+def configure_structured_file_logger(
+    logger_name: str,
+    log_file_path: str | Path,
+    log_level: int = logging.INFO,
+    encoding: str = "utf-8",
+    include_timestamp: bool = True,
+    include_logger_name: bool = True,
+    include_level: bool = True,
+) -> structlog.BoundLogger:
+    """
+    Configure a structlog logger with structured logging to a file.
+    
+    Args:
+        logger_name: Name of the logger
+        log_file_path: Path to the log file
+        log_level: Logging level (default: INFO)
+        encoding: File encoding (default: utf-8)
+        include_timestamp: Whether to include timestamp in logs (default: True)
+        include_logger_name: Whether to include logger name in logs (default: True)
+        include_level: Whether to include log level in logs (default: True)
+        
+    Returns:
+        structlog.BoundLogger: Configured structlog logger with structured logging
+        
+    Example:
+        >>> logger = configure_structured_file_logger("my_app", "logs/app.json")
+        >>> logger.info("User logged in", user_id=123, ip="192.168.1.1")
+    """
+    import json
+    
+    # Convert path to Path object if it's a string
+    log_file_path = Path(log_file_path)
+    
+    # Create log directory if it doesn't exist
+    log_file_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Get the standard library logger
+    std_logger = logging.getLogger(logger_name)
+    std_logger.setLevel(log_level)
+    
+    # Remove only FileHandler handlers to avoid duplicates while preserving other handlers
+    for handler in std_logger.handlers[:]:
+        if isinstance(handler, logging.FileHandler):
+            std_logger.removeHandler(handler)
+    
+    # Create a simple file handler
+    file_handler = logging.FileHandler(
+        filename=log_file_path,
+        encoding=encoding,
+    )
+    
+    # Create JSON formatter for structured logging
+    class JSONFormatter(logging.Formatter):
+        def format(self, record):
+            log_entry = {
+                "message": record.getMessage(),
+            }
+            
+            if include_timestamp:
+                log_entry["timestamp"] = self.formatTime(record)
+            
+            if include_logger_name:
+                log_entry["logger"] = record.name
+            
+            if include_level:
+                log_entry["level"] = record.levelname
+            
+            # Add any extra fields from structlog
+            if hasattr(record, "structlog"):
+                log_entry.update(record.structlog)
+            
+            return json.dumps(log_entry)
+    
+    formatter = JSONFormatter()
+    file_handler.setFormatter(formatter)
+    
+    # Add handler to logger
+    std_logger.addHandler(file_handler)
+    
+    # Prevent propagation to root logger to avoid console output
+    std_logger.propagate = False
+    
+    # Get the structlog logger
+    logger = structlog.get_logger(logger_name)
+    
+    return logger
+
+
+def get_file_logger(
+    logger_name: str,
+    log_file_path: str | Path,
+    structured: bool = False,
+    **kwargs
+) -> structlog.BoundLogger:
+    """
+    Convenience function to get a file logger with either standard or structured logging.
+    
+    Args:
+        logger_name: Name of the logger
+        log_file_path: Path to the log file
+        structured: Whether to use structured (JSON) logging (default: False)
+        **kwargs: Additional arguments passed to the configuration function
+        
+    Returns:
+        structlog.BoundLogger: Configured structlog logger
+        
+    Example:
+        >>> # Standard logging
+        >>> logger = get_file_logger("app", "logs/app.log")
+        >>> 
+        >>> # Structured logging
+        >>> logger = get_file_logger("app", "logs/app.json", structured=True)
+    """
+    if structured:
+        return configure_structured_file_logger(logger_name, log_file_path, **kwargs)
+    else:
+        return configure_file_logger(logger_name, log_file_path, **kwargs)
+
+
+def reset_logger_to_defaults(logger_name: str) -> structlog.BoundLogger:
+    """
+    Reset a named structlog logger to its default configuration.
+    
+    This function removes all custom handlers and resets the logger to use
+    the default structlog configuration, which typically outputs to console.
+    
+    Args:
+        logger_name: Name of the logger to reset
+        
+    Returns:
+        structlog.BoundLogger: Reset structlog logger
+        
+    Example:
+        >>> # Configure a file logger
+        >>> logger = configure_file_logger("my_app", "logs/app.log")
+        >>> logger.info("This goes to file")
+        >>> 
+        >>> # Reset to defaults (console output)
+        >>> logger = reset_logger_to_defaults("my_app")
+        >>> logger.info("This goes to console")
+    """
+    # Get the standard library logger
+    std_logger = logging.getLogger(logger_name)
+    
+    # Remove all existing handlers
+    for handler in std_logger.handlers[:]:
+        std_logger.removeHandler(handler)
+    
+    # Reset logger level to default (NOTSET)
+    std_logger.setLevel(logging.NOTSET)
+    
+    # Re-enable propagation to parent loggers
+    std_logger.propagate = True
+    
+    # Get the structlog logger (this will use default structlog configuration)
+    logger = structlog.get_logger(logger_name)
+    
+    return logger
+
+
+def reset_all_loggers_to_defaults() -> None:
+    """
+    Reset all loggers to their default configuration.
+    
+    This function removes all custom handlers from all loggers and resets
+    them to use the default structlog configuration.
+    
+    Example:
+        >>> # Configure multiple file loggers
+        >>> logger1 = configure_file_logger("app1", "logs/app1.log")
+        >>> logger2 = configure_file_logger("app2", "logs/app2.log")
+        >>> 
+        >>> # Reset all loggers to defaults
+        >>> reset_all_loggers_to_defaults()
+        >>> 
+        >>> # Now all loggers will output to console by default
+        >>> logger1.info("This goes to console")
+        >>> logger2.info("This also goes to console")
+    """
+    # Get all existing loggers
+    loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
+    
+    # Also include the root logger
+    loggers.append(logging.getLogger())
+    
+    for logger in loggers:
+        # Remove all existing handlers
+        for handler in logger.handlers[:]:
+            logger.removeHandler(handler)
+        
+        # Reset logger level to default
+        logger.setLevel(logging.NOTSET)
+        
+        # Re-enable propagation
+        logger.propagate = True
+
+
+def get_logger_info(logger_name: str) -> dict:
+    """
+    Get information about a logger's current configuration.
+    
+    Args:
+        logger_name: Name of the logger to inspect
+        
+    Returns:
+        dict: Information about the logger's configuration
+        
+    Example:
+        >>> logger = configure_file_logger("my_app", "logs/app.log")
+        >>> info = get_logger_info("my_app")
+        >>> print(info)
+        >>> # Output: {'name': 'my_app', 'level': 20, 'handlers': 1, 'propagate': False}
+    """
+    std_logger = logging.getLogger(logger_name)
+    
+    return {
+        "name": logger_name,
+        "level": std_logger.level,
+        "handlers": len(std_logger.handlers),
+        "propagate": std_logger.propagate,
+        "handler_types": [type(handler).__name__ for handler in std_logger.handlers],
+    }
+
+
+# Example usage and testing
+if __name__ == "__main__":
+    # Example 1: Standard file logging
+    logger1 = configure_file_logger("test_app", "logs/test.log")
+    logger1.info("This is a test message")
+    logger1.error("This is an error message", error_code=500)
+    
+    # Example 2: Structured file logging
+    logger2 = configure_structured_file_logger("test_app_structured", "logs/test.json")
+    logger2.info("User action", user_id=123, action="login", ip="192.168.1.1")
+    logger2.error("Database error", error_code=500, table="users", query="SELECT *")
+    
+    # Example 3: Using convenience function
+    logger3 = get_file_logger("convenience_app", "logs/convenience.log")
+    logger3.info("Using convenience function")
+    
+    logger4 = get_file_logger("convenience_structured", "logs/convenience.json", structured=True)
+    logger4.info("Structured logging with convenience", event="test", data={"key": "value"})
+    
+    # Example 4: Demonstrating reset functionality
+    print("\n=== Testing Reset Functionality ===")
+    
+    # Show logger info before reset
+    print("Before reset:")
+    print(f"test_app logger info: {get_logger_info('test_app')}")
+    
+    # Reset specific logger
+    reset_logger = reset_logger_to_defaults("test_app")
+    reset_logger.info("This message goes to console (after reset)")
+    
+    # Show logger info after reset
+    print("After reset:")
+    print(f"test_app logger info: {get_logger_info('test_app')}")
+    
+    # Example 5: Reset all loggers
+    print("\n=== Resetting All Loggers ===")
+    reset_all_loggers_to_defaults()
+    
+    # All loggers now use default configuration
+    logger1.info("This also goes to console now")
+    logger2.info("This also goes to console now")
+    
+    print("\nLog files created successfully!")
+    print("Check the 'logs' directory for the generated log files.")
+    print("After reset, all loggers output to console by default.") 
\ No newline at end of file
diff --git a/flowmapper/__init__.py b/flowmapper/__init__.py
deleted file mode 100644
index c8c0b81..0000000
--- a/flowmapper/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-__all__ = (
-    "__version__",
-    "CASField",
-    "ContextField",
-    "Flow",
-    "Flowmap",
-    "flowmapper",
-    "OutputFormat",
-    "UnitField",
-)
-
-__version__ = "0.4.2"
-
-from flowmapper.cas import CASField
-from flowmapper.context import ContextField
-from flowmapper.flow import Flow
-from flowmapper.flowmap import Flowmap
-from flowmapper.main import OutputFormat, flowmapper
-from flowmapper.unit import UnitField
diff --git a/flowmapper/cas.py b/flowmapper/cas.py
deleted file mode 100644
index b243365..0000000
--- a/flowmapper/cas.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from functools import cached_property
-
-
-class CASField:
-    """
-    Class for CAS Registry Numbers that accepts padded or non-padded strings
-    """
-
-    def __init__(self, cas: str | None):
-        if not isinstance(cas, str) and cas is not None:
-            raise TypeError(f"cas should be a str, not {type(cas).__name__}")
-        else:
-            self.original = cas
-            self.transformed = ("" if cas is None else cas).strip().lstrip("0").strip()
-            self.digits = tuple(int(d) for d in self.transformed.replace("-", ""))
-
-    @property
-    def export(self):
-        if self.original:
-            return "{}-{}-{}".format(
-                "".join([str(x) for x in self.digits[:-3]]),
-                "".join([str(x) for x in self.digits[-3:-1]]),
-                self.digits[-1],
-            )
-        else:
-            return ""
-
-    def __repr__(self):
-        if not self.original:
-            return "CASField with missing original value"
-        else:
-            return "{} CASField: '{}' -> '{}'".format(
-                "Valid" if self.valid else "Invalid", self.original, self.export
-            )
-
-    def __eq__(self, other):
-        if isinstance(other, CASField):
-            return self.original and self.digits == other.digits
-        if isinstance(other, str):
-            try:
-                return self.digits == CASField(other).digits
-            except (TypeError, ValueError):
-                return False
-        return False
-
-    @cached_property
-    def check_digit_expected(self):
-        """
-        Expected digit acording to https://www.cas.org/support/documentation/chemical-substances/checkdig algorithm
-        """
-        result = (
-            sum(
-                [
-                    index * value
-                    for index, value in enumerate(self.digits[::-1], start=1)
-                ]
-            )
-            % 10
-        )
-        return result
-
-    @property
-    def valid(self):
-        """
-        True if check if CAS number is valid acording to https://www.cas.org/support/documentation/chemical-substances/checkdig algorithm
-        """
-        return self.digits[-1] == self.check_digit_expected
diff --git a/flowmapper/context.py b/flowmapper/context.py
deleted file mode 100644
index 941c44f..0000000
--- a/flowmapper/context.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from collections.abc import Iterable
-from typing import Any
-
-MISSING_VALUES = {
-    "",
-    "(unknown)",
-    "(unspecified)",
-    "null",
-    "unknown",
-    "unspecified",
-}
-
-
-class ContextField(Iterable):
-    def __init__(self, original: Any, transformed: Any = None):
-        self.original = original
-        self.transformed = transformed or original
-        self.normalized = self.normalize(self.transformed)
-
-    def normalize(self, value: Any) -> tuple[str, ...]:
-        if isinstance(value, (tuple, list)):
-            intermediate = list(value)
-        elif isinstance(value, str) and "/" in value:
-            intermediate = list(value.split("/"))
-        elif isinstance(value, str):
-            intermediate = [value]
-        else:
-            raise ValueError(f"Can't understand input context {value}")
-
-        intermediate = [elem.lower().strip() for elem in intermediate]
-
-        if intermediate[-1] in MISSING_VALUES:
-            intermediate = intermediate[:-1]
-
-        return tuple(intermediate)
-
-    def export_as_string(self):
-        if isinstance(self.original, str):
-            return self.original
-        elif isinstance(self.original, (list, tuple)):
-            return "✂️".join(self.original)
-        else:
-            # Only reachable by manually changing `self.original`
-            raise ValueError("Invalid context data")
-
-    def __iter__(self):
-        return iter(self.normalized)
-
-    def __eq__(self, other):
-        if self and other and isinstance(other, ContextField):
-            return self.original and self.normalized == other.normalized
-        else:
-            try:
-                normalized_other = self.normalize(other)
-                return (self.normalized == normalized_other) or (
-                    self.original == normalized_other
-                )
-            except ValueError:
-                return False
-
-    def __repr__(self):
-        return f"ContextField: '{self.original}' -> '{self.normalized}'"
-
-    def __bool__(self):
-        return bool(self.normalized)
-
-    def __hash__(self):
-        return hash(self.normalized)
-
-    def __contains__(self, other):
-        """This context is more generic than the `other` context.
-
-        ```python
-        Context("a/b/c") in Context("a/b")
-        >>> True
-        ```
-
-        """
-        if not isinstance(other, ContextField):
-            return False
-        return self.normalized == other.normalized[: len(self.normalized)]
diff --git a/flowmapper/extraction/__init__.py b/flowmapper/extraction/__init__.py
deleted file mode 100644
index 05a1d14..0000000
--- a/flowmapper/extraction/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# from .ecoinvent import ecoinvent_biosphere_extractor
-from .ecospold2 import ecospold2_biosphere_extractor
-from .simapro_csv import simapro_csv_biosphere_extractor
-from .simapro_ecospold1 import simapro_ecospold1_biosphere_extractor
diff --git a/flowmapper/flow.py b/flowmapper/flow.py
deleted file mode 100644
index db40b6f..0000000
--- a/flowmapper/flow.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from typing import List
-
-from flowmapper.cas import CASField
-from flowmapper.context import ContextField
-from flowmapper.string_field import StringField
-from flowmapper.string_list import StringList
-from flowmapper.unit import UnitField
-from flowmapper.utils import apply_transformations, generate_flow_id
-
-
-class Flow:
-    def __init__(
-        self,
-        data: dict,
-        transformations: List[dict] | None = None,
-    ):
-        # Hash of sorted dict keys and values
-        self.id = generate_flow_id(data)
-        self.data = data
-        self.transformed = apply_transformations(data, transformations)
-        self.conversion_factor = self.transformed.get("conversion_factor")
-        self.identifier = StringField(
-            original=self.data.get("identifier"),
-            transformed=self.transformed.get("identifier"),
-            use_lowercase=False,
-        )
-        self.name = StringField(
-            original=self.data.get("name"),
-            transformed=self.transformed.get("name"),
-        )
-        self.unit = UnitField(
-            original=self.data.get("unit"),
-            transformed=self.transformed.get("unit"),
-        )
-        self.context = ContextField(
-            original=self.data.get("context"),
-            transformed=self.transformed.get("context"),
-        )
-        self.cas = CASField(data.get("CAS number"))
-        self.synonyms = StringList(
-            original=self.data.get("synonyms", []),
-            transformed=self.transformed.get("synonyms", []),
-        )
-
-    @property
-    def uniqueness_id(self):
-        tupleize = lambda x: tuple(x) if isinstance(x, list) else x
-        return (
-            self.name.original,
-            tupleize(self.context.original),
-            self.unit.original,
-            self.identifier.original,
-        )
-
-    @property
-    def missing(self):
-        """This flow has been marked as missing in target list"""
-        return self.transformed.get("__missing__")
-
-    @property
-    def export(self) -> dict:
-        return {
-            k: v
-            for k, v in [
-                ("name", self.name.original),
-                ("unit", self.unit.original),
-                ("identifier", self.identifier.original),
-                ("context", self.context.original),
-                ("CAS number", self.cas.export),
-            ]
-            if v
-        }
-
-    def __repr__(self) -> str:
-        return f"""Flow object:
-    Identifier: {self.identifier}
-    Name: {self.name}
-    Context: {self.context}
-    Unit: {self.unit}"""
-
-    def __eq__(self, other):
-        return self.id == other.id
-
-    def __hash__(self):
-        return hash(self.id)
-
-    # Used in sorting
-    def __lt__(self, other):
-        return self.name.normalized < other.name.normalized
diff --git a/flowmapper/flowmap.py b/flowmapper/flowmap.py
deleted file mode 100644
index 37b7fb2..0000000
--- a/flowmapper/flowmap.py
+++ /dev/null
@@ -1,554 +0,0 @@
-import math
-import warnings
-from collections import Counter
-from functools import cached_property
-from numbers import Number
-from pathlib import Path
-from typing import Callable, Optional, Union
-
-import pandas as pd
-import pint
-import randonneur
-from tqdm import tqdm
-
-from flowmapper import __version__
-from flowmapper.errors import DifferingConversions, DifferingMatches
-from flowmapper.flow import Flow
-from flowmapper.match import format_match_result, match_rules
-from flowmapper.utils import match_sort_order
-
-
-def source_flow_id(obj: Flow, ensure_id: bool = False) -> str:
-    return (
-        str(obj.identifier.original or "")
-        if (obj.identifier.original or not ensure_id)
-        else str(obj.id or "")
-    )
-
-
-class Flowmap:
-    """
-    Crosswalk of flows from a source flow list to a target flow list.
-
-    This class provides functionalities to map flows between different flow lists using a series of predefined match rules.
-
-    Attributes
-    ----------
-    source_flows : list[Flow]
-        The list of (unique) source flows to be mapped.
-    source_flows_nomatch : list[Flow]
-        The list of (unique) source flows that do not match any rule.
-    target_flows : list[Flow]
-        The list of target flows for mapping.
-    target_flows_nomatch : list[Flow]
-        The list of target flows that do not match any rule.
-
-    """
-
-    def __init__(
-        self,
-        source_flows: list[Flow],
-        target_flows: list[Flow],
-        rules: list[Callable[..., bool]] = None,
-        nomatch_rules: list[Callable[..., bool]] = None,
-        disable_progress: bool = False,
-    ):
-        """
-        Initializes the Flowmap with source and target flows, along with optional matching rules.
-
-        Duplicated flows are removed from both source and targets lists.
-
-        Parameters
-        ----------
-        source_flows : list[Flow]
-            The list of source flows to be mapped.
-        target_flows : list[Flow]
-            The list of target flows for mapping.
-        rules : list[Callable[..., bool]], optional
-            Custom rules for matching source flows to target flows. Default is the set of rules defined in `match_rules`.
-        nomatch_rules : list[Callable[..., bool]], optional
-            Rules to identify flows that should not be matched.
-        disable_progress : bool, optional
-            If True, progress bar display during the mapping process is disabled.
-
-        """
-        self.disable_progress = disable_progress
-        self.rules = rules if rules else match_rules()
-        if nomatch_rules:
-            self.source_flows = []
-            self.source_flows_nomatch = []
-
-            for flow in source_flows:
-                matched = False
-                for rule in nomatch_rules:
-                    if rule(flow):
-                        self.source_flows_nomatch.append(flow)
-                        matched = True
-                        break
-                if not matched:
-                    self.source_flows.append(flow)
-            self.source_flows = list(dict.fromkeys(self.source_flows))
-            self.source_flows_nomatch = list(dict.fromkeys(self.source_flows_nomatch))
-
-            self.target_flows = []
-            self.target_flows_nomatch = []
-
-            for flow in target_flows:
-                matched = False
-                for rule in nomatch_rules:
-                    if rule(flow):
-                        self.target_flows_nomatch.append(flow)
-                        matched = True
-                        break
-                if not matched:
-                    self.target_flows.append(flow)
-            self.target_flows = list(dict.fromkeys(self.target_flows))
-            self.target_flows_nomatch = list(dict.fromkeys(self.target_flows_nomatch))
-        else:
-            self.source_flows = list(dict.fromkeys(source_flows))
-            self.source_flows_nomatch = []
-            self.target_flows = list(dict.fromkeys(target_flows))
-            self.target_flows_nomatch = []
-
-    def get_single_match(
-        self, source: Flow, target_flows: list, rules: list
-    ) -> Union[dict, None]:
-        """
-        Try to find a single match for `source` in `target_flows` using `rules`.
-
-        Adds to `all_mappings` if found.
-        """
-
-        def get_conversion_factor(s: Flow, t: Flow, data: dict) -> float | None:
-            cf_data = data.get("conversion_factor")
-            cf_s = s.conversion_factor
-            if cf_data and cf_s:
-                return cf_data * cf_s
-            elif cf_data or cf_s:
-                return cf_data or cf_s
-            else:
-                return s.unit.conversion_factor(t.unit)
-
-        for target in target_flows:
-            for rule in rules:
-                is_match = rule(source, target)
-                if is_match:
-                    try:
-                        return {
-                            "from": source,
-                            "to": target,
-                            "conversion_factor": get_conversion_factor(
-                                source, target, is_match
-                            ),
-                            "match_rule": rule.__name__,
-                            "match_rule_priority": self.rules.index(rule),
-                            "info": is_match,
-                        }
-                    except pint.errors.UndefinedUnitError:
-                        warnings.warng(
-                            f"Pint Units error converting source {source.export} to target {target.export}"
-                        )
-                        raise
-
-    @cached_property
-    def mappings(self):
-        """
-        Generates and returns a list of mappings from source flows to target flows based on the defined rules.
-
-        Each mapping includes the source flow, target flow, conversion factor, the rule that determined the match, and additional information.
-
-        A single match using the match rule with highest priority is returned for each source flow.
-
-        Returns
-        -------
-        list[dict]
-            A list of dictionaries containing the mapping details.
-
-        """
-        results = [
-            self.get_single_match(
-                source=source, target_flows=self.target_flows, rules=self.rules
-            )
-            for source in tqdm(self.source_flows, disable=self.disable_progress)
-        ]
-
-        result, seen_sources, seen_combos = [], set(), {}
-        for mapping in sorted([elem for elem in results if elem], key=match_sort_order):
-            from_id = mapping["from"].uniqueness_id
-            combo_key = (from_id, mapping["to"].uniqueness_id)
-            if combo_key in seen_combos:
-                other = seen_combos[combo_key]
-                if (
-                    isinstance(other["conversion_factor"], Number)
-                    and isinstance(mapping["conversion_factor"], Number)
-                    and not math.isclose(
-                        other["conversion_factor"],
-                        mapping["conversion_factor"],
-                        1e-5,
-                        1e-5,
-                    )
-                ):
-                    raise DifferingConversions(
-                        f"""
-Found two different conversion factors for the same match from
-
-{mapping['from']}
-
-to
-
-{mapping['to']}
-
-Conversion factors:
-    {other['match_rule']}: {other['conversion_factor']}
-    {mapping['match_rule']}: {mapping['conversion_factor']}
-"""
-                    )
-                elif not isinstance(other["conversion_factor"], Number) and isinstance(
-                    mapping["conversion_factor"], Number
-                ):
-                    seen_combos[combo_key] = mapping
-            elif from_id in seen_sources:
-                other = next(
-                    value for key, value in seen_combos.items() if key[0] == from_id
-                )
-                raise DifferingMatches(
-                    f"""
-{mapping['from']}
-
-Matched to multiple targets, including:
-
-Match rule: {mapping['match_rule']}:
-{mapping['to']}
-
-Match rule: {other['match_rule']}
-{other['to']}
-"""
-                )
-            else:
-                seen_sources.add(from_id)
-                seen_combos[combo_key] = mapping
-                result.append(mapping)
-
-        return result
-
-    @cached_property
-    def _matched_source_flows_ids(self):
-        return {map_entry["from"].id for map_entry in self.mappings}
-
-    @cached_property
-    def _matched_target_flows_ids(self):
-        return {map_entry["to"].id for map_entry in self.mappings}
-
-    @cached_property
-    def matched_source(self):
-        """
-        Provides a list of source flows that have been successfully matched to target flows.
-
-        Returns
-        -------
-        list[Flow]
-            A list of matched source flow objects.
-
-        """
-        result = [
-            flow
-            for flow in self.source_flows
-            if flow.id in self._matched_source_flows_ids
-        ]
-        return result
-
-    @cached_property
-    def unmatched_source(self):
-        """
-        Provides a list of source flows that have not been matched to any target flows.
-
-        Returns
-        -------
-        list[Flow]
-            A list of unmatched source flow objects.
-
-        """
-        result = [
-            flow
-            for flow in self.source_flows
-            if flow.id not in self._matched_source_flows_ids
-        ]
-        return result
-
-    @cached_property
-    def matched_source_statistics(self):
-        """
-        Calculates statistics for matched source flows, including the number of matches and the matching percentage for each context.
-
-        Returns
-        -------
-        pandas.DataFrame
-            A DataFrame containing matching statistics for source flows.
-
-        """
-        matched = Counter([flow.context.value for flow in self.matched_source])
-        matched = pd.Series(matched).reset_index()
-        matched.columns = ["context", "matched"]
-
-        total = Counter([flow.context.value for flow in self.source_flows])
-        total = pd.Series(total).reset_index()
-        total.columns = ["context", "total"]
-
-        df = pd.merge(matched, total, on="context", how="outer")
-        df = df.fillna(0).astype({"matched": "int", "total": "int"})
-
-        df["percent"] = df.matched / df.total
-        result = df.sort_values("percent")
-        return result
-
-    @cached_property
-    def matched_target(self):
-        """
-        Provides a list of target flows that have been successfully matched to source flows.
-
-        Returns
-        -------
-        list[Flow]
-            A list of matched target flow objects.
-
-        """
-        result = [
-            flow
-            for flow in self.target_flows
-            if flow.id in self._matched_target_flows_ids
-        ]
-        return result
-
-    @cached_property
-    def unmatched_target(self):
-        """
-        Provides a list of target flows that have not been matched to any source flows.
-
-        Returns
-        -------
-        list[Flow]
-            A list of unmatched target flow objects.
-
-        """
-        result = [
-            flow
-            for flow in self.target_flows
-            if flow.id not in self._matched_target_flows_ids
-        ]
-        return result
-
-    @cached_property
-    def matched_target_statistics(self):
-        """
-        Calculates statistics for matched target flows, including the number of matches and the matching percentage for each context.
-
-        Returns
-        -------
-        pandas.DataFrame
-            A DataFrame containing matching statistics for target flows.
-
-        """
-        matched = Counter([flow.context.value for flow in self.matched_target])
-        matched = pd.Series(matched).reset_index()
-        matched.columns = ["context", "matched"]
-
-        total = Counter([flow.context.value for flow in self.target_flows])
-        total = pd.Series(total).reset_index()
-        total.columns = ["context", "total"]
-
-        df = pd.merge(matched, total, on="context", how="outer")
-        df = df.fillna(0).astype({"matched": "int", "total": "int"})
-
-        df["percent"] = df.matched / df.total
-        result = df.sort_values("percent")
-        return result
-
-    def statistics(self):
-        """
-        Prints out summary statistics for the flow mapping process.
-
-        """
-        source_msg = (
-            f"{len(self.source_flows)} source flows ({len(self.source_flows_nomatch)} excluded)..."
-            if self.source_flows_nomatch
-            else f"{len(self.source_flows)} source flows..."
-        )
-        print(source_msg)
-        target_msg = (
-            f"{len(self.target_flows)} target flows ({len(self.target_flows_nomatch)} excluded)..."
-            if self.target_flows_nomatch
-            else f"{len(self.target_flows)} target flows..."
-        )
-        print(target_msg)
-        print(
-            f"{len(self.mappings)} mappings ({len(self.matched_source) / len(self.source_flows):.2%} of total)."
-        )
-        cardinalities = dict(Counter([x["cardinality"] for x in self._cardinalities]))
-        print(f"Mappings cardinalities: {str(cardinalities)}")
-
-    @cached_property
-    def _cardinalities(self):
-        """
-        Calculates and returns the cardinalities of mappings between source and target flows.
-
-        Returns
-        -------
-        list[dict]
-            A sorted list of dictionaries, each indicating the cardinality relationship between a pair of source and target flows.
-
-        """
-        mappings = [
-            (mapentry["from"].id, mapentry["to"].id) for mapentry in self.mappings
-        ]
-        lhs_counts = Counter([pair[0] for pair in mappings])
-        rhs_counts = Counter([pair[1] for pair in mappings])
-
-        result = []
-
-        for lhs, rhs in mappings:
-            lhs_count = lhs_counts[lhs]
-            rhs_count = rhs_counts[rhs]
-            if lhs_count == 1 and rhs_count == 1:
-                result.append({"from": lhs, "to": rhs, "cardinality": "1:1"})
-            elif lhs_count == 1 and rhs_count > 1:
-                result.append({"from": lhs, "to": rhs, "cardinality": "N:1"})
-            elif lhs_count > 1 and rhs_count == 1:
-                result.append({"from": lhs, "to": rhs, "cardinality": "1:N"})
-            elif lhs_count > 1 and rhs_count > 1:
-                result.append({"from": lhs, "to": rhs, "cardinality": "N:M"})
-
-        return sorted(result, key=lambda x: x["from"])
-
-    def to_randonneur(
-        self,
-        source_id: str,
-        target_id: str,
-        contributors: list,
-        mapping_source: dict,
-        mapping_target: dict,
-        version: str = "1.0.0",
-        licenses: Optional[list] = None,
-        homepage: Optional[str] = None,
-        name: Optional[str] = None,
-        path: Optional[Path] = None,
-    ) -> randonneur.Datapackage:
-        """
-        Export mappings using randonneur data migration file format.
-
-        Parameters
-        ----------
-        path : Path, optional
-            If provided export the output file to disk.
-
-        Returns
-        -------
-        randonneur.Datapackage object.
-
-        """
-        dp = randonneur.Datapackage(
-            name=name or f"{source_id}-{target_id}",
-            source_id=source_id,
-            target_id=target_id,
-            description=f"Flowmapper {__version__} elementary flow correspondence from {source_id} to {target_id}",
-            contributors=contributors,
-            mapping_source=mapping_source,
-            mapping_target=mapping_target,
-            homepage=homepage,
-            version=version,
-            licenses=licenses,
-        )
-
-        result = [
-            format_match_result(
-                map_entry["from"],
-                map_entry["to"],
-                map_entry["conversion_factor"],
-                map_entry["info"],
-            )
-            for map_entry in self.mappings
-        ]
-
-        dp.add_data(verb="update", data=result)
-
-        if path is not None:
-            dp.to_json(path)
-        return dp
-
-    def to_glad(
-        self,
-        path: Optional[Path] = None,
-        ensure_id: bool = False,
-        missing_source: bool = False,
-    ):
-        """
-        Export mappings using GLAD flow mapping format, optionally ensuring each flow has an identifier.
-
-        Formats the mapping results according to Global LCA Data Access (GLAD) network initiative flow mapping format.
-
-        Parameters
-        ----------
-        path : Path, optional
-            If provided export the output file to disk.
-        ensure_id : bool, optional
-            If True, ensures each flow has an identifier, default is False.
-
-        Returns
-        -------
-        pandas.DataFrame
-            A DataFrame containing the formatted mapping results in GLAD format.
-
-        """
-        data = []
-        for map_entry in self.mappings:
-            data.append(
-                {
-                    "SourceFlowName": map_entry["from"].name.original,
-                    "SourceFlowUUID": source_flow_id(
-                        map_entry["from"], ensure_id=ensure_id
-                    ),
-                    "SourceFlowContext": map_entry["from"].context.export_as_string(),
-                    "SourceUnit": map_entry["from"].unit.original,
-                    "MatchCondition": "=",
-                    "ConversionFactor": map_entry["conversion_factor"],
-                    "TargetFlowName": map_entry["to"].name.original,
-                    "TargetFlowUUID": map_entry["to"].identifier.original,
-                    "TargetFlowContext": map_entry["to"].context.export_as_string(),
-                    "TargetUnit": map_entry["to"].unit.original,
-                    "MemoMapper": map_entry["info"].get("comment"),
-                }
-            )
-
-        if missing_source:
-            for flow_obj in self.unmatched_source:
-                data.append(
-                    {
-                        "SourceFlowName": flow_obj.name.original,
-                        "SourceFlowUUID": source_flow_id(flow_obj, ensure_id=ensure_id),
-                        "SourceFlowContext": flow_obj.context.export_as_string(),
-                        "SourceUnit": flow_obj.unit.original,
-                    }
-                )
-
-        result = pd.DataFrame(data)
-
-        if not path:
-            return result
-        else:
-            path = Path(path)
-            path.parent.mkdir(parents=True, exist_ok=True)
-
-            writer = pd.ExcelWriter(
-                path,
-                engine="xlsxwriter",
-                engine_kwargs={"options": {"strings_to_formulas": False}},
-            )
-            result.to_excel(writer, sheet_name="Mapping", index=False, na_rep="NaN")
-
-            for column in result:
-                column_length = max(
-                    result[column].astype(str).map(len).max(), len(column)
-                )
-                col_idx = result.columns.get_loc(column)
-                writer.sheets["Mapping"].set_column(col_idx, col_idx, column_length)
-
-            writer.close()
diff --git a/flowmapper/main.py b/flowmapper/main.py
deleted file mode 100644
index e3a4330..0000000
--- a/flowmapper/main.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import json
-import logging
-from enum import Enum
-from pathlib import Path
-from typing import Optional
-
-from flowmapper.flow import Flow
-from flowmapper.flowmap import Flowmap
-from flowmapper.transformation_mapping import prepare_transformations
-from flowmapper.utils import load_standard_transformations, read_migration_files
-
-logger = logging.getLogger(__name__)
-
-
-def sorting_function(obj: dict) -> tuple:
-    return (
-        obj.get("name", "ZZZ"),
-        str(obj.get("context", "ZZZ")),
-        obj.get("unit", "ZZZ"),
-    )
-
-
-class OutputFormat(str, Enum):
-    all = "all"
-    glad = "glad"
-    randonneur = "randonneur"
-
-
-def flowmapper(
-    source: Path,
-    target: Path,
-    mapping_source: dict,
-    mapping_target: dict,
-    source_id: str,
-    target_id: str,
-    contributors: list,
-    output_dir: Path,
-    format: OutputFormat,
-    version: str = "1.0.0",
-    default_transformations: bool = True,
-    transformations: Optional[list[Path | str]] = None,
-    unmatched_source: bool = True,
-    unmatched_target: bool = True,
-    matched_source: bool = False,
-    matched_target: bool = False,
-    licenses: Optional[list] = None,
-    homepage: Optional[str] = None,
-    name: Optional[str] = None,
-) -> Flowmap:
-    """
-    Generate mappings between elementary flows lists
-    """
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    loaded_transformations = []
-    if default_transformations:
-        loaded_transformations.extend(load_standard_transformations())
-    if transformations:
-        loaded_transformations.extend(read_migration_files(*transformations))
-
-    prepared_transformations = prepare_transformations(loaded_transformations)
-
-    source_flows = [
-        Flow(flow, prepared_transformations) for flow in json.load(open(source))
-    ]
-    source_flows = [flow for flow in source_flows if not flow.missing]
-    target_flows = [
-        Flow(flow, prepared_transformations) for flow in json.load(open(target))
-    ]
-
-    flowmap = Flowmap(source_flows, target_flows)
-    flowmap.statistics()
-
-    stem = f"{source.stem}-{target.stem}"
-
-    if matched_source:
-        with open(output_dir / f"{stem}-matched-source.json", "w") as fs:
-            json.dump(
-                sorted(
-                    [flow.export for flow in flowmap.matched_source],
-                    key=sorting_function,
-                ),
-                fs,
-                indent=True,
-            )
-
-    if unmatched_source:
-        with open(output_dir / f"{stem}-unmatched-source.json", "w") as fs:
-            json.dump(
-                sorted(
-                    [flow.export for flow in flowmap.unmatched_source],
-                    key=sorting_function,
-                ),
-                fs,
-                indent=True,
-            )
-
-    if matched_target:
-        with open(output_dir / f"{stem}-matched-target.json", "w") as fs:
-            json.dump(
-                sorted(
-                    [flow.export for flow in flowmap.matched_target],
-                    key=sorting_function,
-                ),
-                fs,
-                indent=True,
-            )
-
-    if unmatched_target:
-        with open(output_dir / f"{stem}-unmatched-target.json", "w") as fs:
-            json.dump(
-                sorted(
-                    [flow.export for flow in flowmap.unmatched_target],
-                    key=sorting_function,
-                ),
-                fs,
-                indent=True,
-            )
-
-    if format.value == "randonneur":
-        flowmap.to_randonneur(
-            source_id=source_id,
-            target_id=target_id,
-            contributors=contributors,
-            mapping_source=mapping_source,
-            mapping_target=mapping_target,
-            version=version,
-            licenses=licenses,
-            homepage=homepage,
-            name=name,
-            path=output_dir / f"{stem}.json",
-        )
-    elif format.value == "glad":
-        flowmap.to_glad(output_dir / f"{stem}.xlsx", missing_source=True)
-    else:
-        flowmap.to_randonneur(
-            source_id=source_id,
-            target_id=target_id,
-            contributors=contributors,
-            mapping_source=mapping_source,
-            mapping_target=mapping_target,
-            version=version,
-            licenses=licenses,
-            homepage=homepage,
-            name=name,
-            path=output_dir / f"{stem}.json",
-        )
-        flowmap.to_glad(output_dir / f"{stem}.xlsx", missing_source=True)
-
-    return flowmap
diff --git a/flowmapper/match.py b/flowmapper/match.py
deleted file mode 100644
index 822b7d4..0000000
--- a/flowmapper/match.py
+++ /dev/null
@@ -1,245 +0,0 @@
-import logging
-
-from flowmapper.constants import RESOURCE_PARENT_CATEGORY
-from flowmapper.flow import Flow
-from flowmapper.utils import (
-    ends_with_location,
-    location_reverser,
-    names_and_locations,
-    rm_parentheses_roman_numerals,
-    rm_roman_numerals_ionic_state,
-)
-from flowmapper.preferred_synonyms import (
-    match_identical_names_in_preferred_synonyms,
-    match_identical_names_in_synonyms,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def format_match_result(s: Flow, t: Flow, conversion_factor: float, match_info: dict):
-    return match_info | {
-        "source": s.export,
-        "target": t.export,
-        "conversion_factor": conversion_factor,
-    }
-
-
-def match_identical_identifier(s: Flow, t: Flow, comment: str = "Identical identifier"):
-    if s.identifier and (s.identifier == t.identifier):
-        return {"comment": comment}
-
-
-def match_identical_cas_numbers(
-    s: Flow, t: Flow, comment: str = "Identical CAS numbers"
-):
-    if (s.cas == t.cas) and (s.context == t.context):
-        return {"comment": comment}
-
-
-def match_identical_names(s: Flow, t: Flow, comment="Identical names"):
-    if (s.name == t.name) and (s.context == t.context):
-        return {"comment": comment}
-
-
-def match_identical_names_without_commas(
-    s: Flow, t: Flow, comment="Identical names when commas removed"
-):
-    if (s.name.normalized.replace(",", "") == t.name.normalized.replace(",", "")) and (
-        s.context == t.context
-    ):
-        return {"comment": comment}
-
-
-def match_resources_with_wrong_subcontext(s: Flow, t: Flow):
-    if (
-        s.context.normalized[0].lower() in RESOURCE_PARENT_CATEGORY
-        and t.context.normalized[0].lower() in RESOURCE_PARENT_CATEGORY
-        and s.name == t.name
-    ):
-        return {"comment": "Resources with identical name but wrong subcontext"}
-
-
-def match_identical_names_except_missing_suffix(
-    s: Flow, t: Flow, suffix: str, comment: str = "Identical names except missing suffix"
-) -> dict:
-    if (
-        (f"{s.name.normalized}, {suffix}" == t.name)
-        or (f"{t.name.normalized}, {suffix}" == s.name)
-        or (f"{s.name.normalized} {suffix}" == t.name)
-        or (f"{t.name.normalized} {suffix}" == s.name)
-    ) and s.context == t.context:
-        return {"comment": comment}
-
-
-def match_names_with_roman_numerals_in_parentheses(
-    s: Flow, t: Flow, comment="With/without roman numerals in parentheses"
-):
-    if (
-        rm_parentheses_roman_numerals(s.name.normalized)
-        == rm_parentheses_roman_numerals(t.name.normalized)
-        and s.context == t.context
-    ):
-        return {"comment": comment}
-
-
-def match_custom_names_with_location_codes(
-    s: Flow, t: Flow, comment="Custom names with location code"
-):
-    """Matching which pulls out location codes but also allows for custom name transformations."""
-    match = ends_with_location.search(s.name.normalized)
-    if match:
-        location = location_reverser[match.group("code")]
-        # Don't use replace, it will find e.g. ", fr" in "transformation, from"
-        name = s.name.normalized[: -len(match.group())]
-        try:
-            mapped_name = names_and_locations[name]["target"]
-        except KeyError:
-            return
-        if mapped_name == t.name.normalized and s.context == t.context:
-            result = {"comment": comment, "location": location} | names_and_locations[
-                name
-            ].get("extra", {})
-            if (
-                s.name.normalized.startswith("water")
-                and s.unit.normalized == "cubic_meter"
-                and t.unit.normalized == "kilogram"
-            ):
-                result["conversion_factor"] = 1000
-            elif (
-                s.name.normalized.startswith("water")
-                and t.unit.normalized == "cubic_meter"
-                and s.unit.normalized == "kilogram"
-            ):
-                result["conversion_factor"] = 0.001
-            return result
-
-
-def match_names_with_location_codes(
-    s: Flow, t: Flow, comment="Name matching with location code"
-):
-    match = ends_with_location.search(s.name.normalized)
-    if match:
-        location = location_reverser[match.group("code")]
-        name = s.name.normalized.replace(match.group(), "")
-        if name == t.name.normalized and s.context == t.context:
-            result = {"comment": comment, "location": location}
-            if (
-                s.name.normalized.startswith("water")
-                and s.unit.normalized == "cubic_meter"
-                and t.unit.normalized == "kilogram"
-            ):
-                result["conversion_factor"] = 1000.0
-            elif (
-                s.name.normalized.startswith("water")
-                and t.unit.normalized == "cubic_meter"
-                and s.unit.normalized == "kilogram"
-            ):
-                result["conversion_factor"] = 0.001
-            return result
-
-
-def match_resource_names_with_location_codes_and_parent_context(
-    s: Flow, t: Flow, comment="Name matching with location code and parent context"
-):
-    """Sometimes we have flows in a parent context,"""
-    match = ends_with_location.search(s.name.normalized)
-    if match:
-        location = location_reverser[match.group("code")]
-        name = s.name.normalized.replace(match.group(), "")
-        if (
-            name == t.name.normalized
-            and s.context.normalized[0].lower() in RESOURCE_PARENT_CATEGORY
-            and t.context.normalized[0].lower() in RESOURCE_PARENT_CATEGORY
-        ):
-            result = {"comment": comment, "location": location}
-            if (
-                s.name.normalized.startswith("water")
-                and s.unit.normalized == "cubic_meter"
-                and t.unit.normalized == "kilogram"
-            ):
-                result["conversion_factor"] = 1000.0
-            elif (
-                s.name.normalized.startswith("water")
-                and t.unit.normalized == "cubic_meter"
-                and s.unit.normalized == "kilogram"
-            ):
-                result["conversion_factor"] = 0.001
-            return result
-
-
-def match_non_ionic_state(
-    s: Flow, t: Flow, comment="Non-ionic state if no better match"
-):
-    if (
-        (rm_roman_numerals_ionic_state(s.name.normalized) == t.name)
-        or (rm_roman_numerals_ionic_state(s.name.normalized) + ", ion" == t.name)
-    ) and s.context == t.context:
-        return {"comment": comment}
-
-
-def match_biogenic_to_non_fossil(
-    s: Flow, t: Flow, comment="Biogenic to non-fossil if no better match"
-):
-    if (
-        s.name.normalized.removesuffix(", biogenic")
-        == t.name.normalized.removesuffix(", non-fossil")
-        and s.context == t.context
-    ):
-        return {"comment": comment}
-
-
-def match_resources_with_suffix_in_ground(s: Flow, t: Flow):
-    return match_identical_names_except_missing_suffix(
-        s, t, suffix="in ground", comment="Resources with suffix in ground"
-    )
-
-
-def match_flows_with_suffix_unspecified_origin(s: Flow, t: Flow):
-    return match_identical_names_except_missing_suffix(
-        s,
-        t,
-        suffix="unspecified origin",
-        comment="Flows with suffix unspecified origin",
-    )
-
-
-def match_resources_with_suffix_in_water(s: Flow, t: Flow):
-    return match_identical_names_except_missing_suffix(
-        s, t, suffix="in water", comment="Resources with suffix in water"
-    )
-
-
-def match_resources_with_suffix_in_air(s: Flow, t: Flow):
-    return match_identical_names_except_missing_suffix(
-        s, t, suffix="in air", comment="Resources with suffix in air"
-    )
-
-
-def match_emissions_with_suffix_ion(s: Flow, t: Flow):
-    return match_identical_names_except_missing_suffix(
-        s, t, suffix="ion", comment="Match emissions with suffix ion"
-    )
-
-
-def match_rules():
-    return [
-        match_identical_identifier,
-        match_identical_names,
-        match_identical_names_without_commas,
-        match_resources_with_suffix_in_ground,
-        match_resources_with_suffix_in_water,
-        match_resources_with_suffix_in_air,
-        match_flows_with_suffix_unspecified_origin,
-        match_resources_with_wrong_subcontext,
-        match_emissions_with_suffix_ion,
-        match_names_with_roman_numerals_in_parentheses,
-        match_names_with_location_codes,
-        match_resource_names_with_location_codes_and_parent_context,
-        match_custom_names_with_location_codes,
-        match_identical_cas_numbers,
-        match_non_ionic_state,
-        match_biogenic_to_non_fossil,
-        match_identical_names_in_preferred_synonyms,
-        match_identical_names_in_synonyms,
-    ]
diff --git a/flowmapper/string_field.py b/flowmapper/string_field.py
deleted file mode 100644
index c607eea..0000000
--- a/flowmapper/string_field.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from typing import Any, Generic, TypeVar
-
-from flowmapper.utils import normalize_str
-
-SF = TypeVar("SF")
-
-
-class StringField(Generic[SF]):
-    def __init__(
-        self,
-        original: str | None,
-        transformed: str | None = None,
-        use_lowercase: bool = True,
-    ):
-        self.original = original
-        self.normalized = normalize_str(transformed or original)
-        self.use_lowercase = use_lowercase
-        if self.use_lowercase:
-            self.normalized = self.normalized.lower()
-
-    def __eq__(self, other: Any) -> bool:
-        if self.normalized == "":
-            return False
-        elif isinstance(other, StringField):
-            return (
-                self.normalized == other.normalized or self.original == other.original
-            )
-        elif isinstance(other, str):
-            if self.use_lowercase:
-                return self.normalized == other.lower()
-            else:
-                return self.normalized == other
-        else:
-            return False
-
-    def __bool__(self) -> bool:
-        return bool(self.original)
-
-    def __repr__(self) -> str:
-        if not self.original:
-            return "StringField with missing original value"
-        else:
-            return f"StringField: '{self.original}' -> '{self.normalized}'"
diff --git a/flowmapper/string_list.py b/flowmapper/string_list.py
deleted file mode 100644
index e76c021..0000000
--- a/flowmapper/string_list.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from collections.abc import Collection, Iterable
-from typing import Any, List
-
-from flowmapper.string_field import StringField
-
-
-class StringList(Collection):
-    def __init__(self, original: List[str], transformed: List[str] | None = None):
-        transformed = transformed or original
-        if original is None:
-            self.data = []
-        else:
-            self.data = [
-                StringField(original=a, transformed=b)
-                for a, b in zip(original, transformed)
-            ]
-
-    def __contains__(self, obj: Any) -> bool:
-        return any(obj == elem for elem in self.data)
-
-    def __iter__(self) -> Iterable:
-        yield from self.data
-
-    def __len__(self) -> int:
-        return len(self.data)
-
-    def __bool__(self) -> bool:
-        return bool(self.data)
-
-    def __repr__(self):
-        if self:
-            return "StringList: {}".format([repr(o) for o in self.data])
-        else:
-            return "StringList: Empty"
diff --git a/flowmapper/transformation_mapping.py b/flowmapper/transformation_mapping.py
deleted file mode 100644
index 9b0c1a9..0000000
--- a/flowmapper/transformation_mapping.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from collections import UserDict
-from functools import partial
-from typing import Any, List
-
-from flowmapper.context import ContextField
-from flowmapper.string_field import StringField
-from flowmapper.unit import UnitField
-
-ATTRIBUTE_MAPPING = {
-    "unit": partial(UnitField, use_lowercase=True),
-    "context": ContextField,
-    "identifier": partial(StringField, use_lowercase=True),
-}
-
-
-class ComparableFlowMapping(UserDict):
-    def __init__(self, initialdata: dict):
-        self.data = {
-            key: ATTRIBUTE_MAPPING.get(key, StringField)(value)
-            for key, value in initialdata.items()
-        }
-
-    def __setitem__(self, key: Any, value: Any) -> None:
-        self.data[key] = ATTRIBUTE_MAPPING.get(key, StringField)(value)
-
-    def __eq__(self, other: Any) -> bool:
-        return all(value == other.get(key) for key, value in self.data.items() if value)
-
-
-def prepare_transformations(transformations: List[dict] | None) -> List[dict]:
-    if not transformations:
-        return []
-
-    prepared_transformations = []
-
-    for transformation_dataset in transformations:
-        for transformation_mapping in transformation_dataset.get("update", []):
-            transformation_mapping["source"] = ComparableFlowMapping(
-                transformation_mapping["source"]
-            )
-            for other_dataset in prepared_transformations:
-                for other_mapping in other_dataset.get("update", []):
-                    if other_mapping["source"] == transformation_mapping["source"]:
-                        for key, value in other_mapping["target"].items():
-                            transformation_mapping["source"][key] = value
-                        break
-
-        prepared_transformations.append(transformation_dataset)
-
-    return prepared_transformations
diff --git a/flowmapper/unit.py b/flowmapper/unit.py
deleted file mode 100644
index ca9ddf9..0000000
--- a/flowmapper/unit.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import importlib.resources as resource
-import math
-from typing import Any, Generic, TypeVar
-
-from pint import UnitRegistry, errors
-
-from flowmapper.constants import PINT_MAPPING
-from flowmapper.utils import normalize_str
-
-ureg = UnitRegistry()
-
-with resource.as_file(resource.files("flowmapper") / "data" / "units.txt") as filepath:
-    ureg.load_definitions(filepath)
-
-U = TypeVar("U")
-
-
-class UnitField(Generic[U]):
-    def __init__(
-        self, original: str, transformed: str | None = None, use_lowercase: bool = False
-    ):
-        if transformed is None:
-            transformed = original
-        self.original = original
-        if self.is_uri(transformed):
-            # Private attribute, could change in future
-            self._glossary_entry = self.resolve_uri(transformed)
-            self.normalized = normalize_str(self._glossary_entry["label"])
-        else:
-            self.normalized = normalize_str(transformed)
-
-        self.use_lowercase = use_lowercase
-        if self.use_lowercase:
-            self.normalized = self.normalized.lower()
-
-        # Private attribute, could change in future
-        self._pint_compatible = PINT_MAPPING.get(self.normalized, self.normalized)
-
-    def is_uri(self, value: str) -> bool:
-        # Placeholder for when we support glossary entries
-        return False
-
-    def resolve_uri(self, uri: str) -> None:
-        # Placeholder
-        pass
-
-    def __repr__(self) -> str:
-        return f"UnitField: '{self.original}' -> '{self.normalized}'"
-
-    def __bool__(self) -> bool:
-        return bool(self.original)
-
-    def __eq__(self, other: Any):
-        if isinstance(other, UnitField):
-            return (
-                self.normalized == other.normalized
-                or self.conversion_factor(other) == 1
-            )
-        elif isinstance(other, str) and self.use_lowercase:
-            return self.normalized == other.lower()
-        elif isinstance(other, str):
-            return self.normalized == other
-        else:
-            return False
-
-    def compatible(self, other: Any):
-        if not isinstance(other, UnitField):
-            return False
-        else:
-            return math.isfinite(self.conversion_factor(other))
-
-    def conversion_factor(self, to: U | Any) -> float:
-        if self.normalized == to.normalized:
-            result = 1.0
-        else:
-            try:
-                result = (
-                    ureg(self._pint_compatible).to(ureg(to._pint_compatible)).magnitude
-                )
-            except (errors.DimensionalityError, errors.UndefinedUnitError):
-                result = float("nan")
-        return result
diff --git a/flowmapper/utils.py b/flowmapper/utils.py
deleted file mode 100644
index 9432041..0000000
--- a/flowmapper/utils.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import copy
-import hashlib
-import importlib.resources as resource
-import json
-import re
-import unicodedata
-from collections.abc import Collection, Mapping
-from pathlib import Path
-from typing import Any, List, Union
-
-RESULTS_DIR = Path(__file__).parent / "manual_matching" / "results"
-
-with resource.as_file(
-    resource.files("flowmapper") / "data" / "places.json"
-) as filepath:
-    places = json.load(open(filepath))
-
-ends_with_location = re.compile(
-    ",[ \t\r\f]+(?P<code>{})$".format(
-        "|".join([re.escape(string) for string in places])
-    ),
-    re.IGNORECASE,
-)
-# All solutions I found for returning original string instead of
-# lower case one were very ugly
-location_reverser = {obj.lower(): obj for obj in places}
-if len(location_reverser) != len(places):
-    raise ValueError("Multiple possible locations after lower case conversion")
-
-us_lci_ends_with_location = re.compile(
-    "/(?P<location>{})$".format(
-        "|".join(
-            [
-                re.escape(string)
-                for string in places
-                if 2 <= len(string) <= 3 and string.upper() == string
-            ]
-        )
-    ),
-)
-
-with resource.as_file(
-    resource.files("flowmapper") / "data" / "names_and_locations.json"
-) as filepath:
-    names_and_locations = {o["source"]: o for o in json.load(open(filepath))}
-
-
-def load_standard_transformations() -> List:
-    # with resource.as_file(
-    #     resource.files("flowmapper") / "data" / "standard-units-harmonization.json"
-    # ) as filepath:
-    #     units = json.load(open(filepath))
-    with resource.as_file(
-        resource.files("flowmapper") / "data" / "simapro-2023-ecoinvent-3-contexts.json"
-    ) as filepath:
-        contexts = json.load(open(filepath))
-    # return [units, contexts]
-    return [contexts]
-
-
-def generate_flow_id(flow: dict):
-    flow_str = json.dumps(flow, sort_keys=True)
-    result = hashlib.md5(flow_str.encode("utf-8")).hexdigest()
-    return result
-
-
-def read_migration_files(*filepaths: Union[str, Path]) -> List[dict]:
-    """
-    Read and aggregate migration data from multiple JSON files.
-
-    This function opens and reads a series of JSON files, each containing migration data as a list of dicts without the change type.
-    It aggregates all changes into a single list and returns it wrapped in a dictionary
-    under the change type 'update'.
-
-    Parameters
-    ----------
-    *filepaths : Path
-        Variable length argument list of Path objects.
-
-    Returns
-    -------
-    dict
-        A dictionary containing a single key 'update', which maps to a list. This list is
-        an aggregation of the data from all the JSON files read.
-    """
-    migration_data = []
-
-    for filepath in filepaths:
-        if (RESULTS_DIR / filepath).is_file():
-            filepath = RESULTS_DIR / filepath
-        with open(Path(filepath), "r") as fs:
-            migration_data.append(json.load(fs))
-
-    return migration_data
-
-
-def rm_parentheses_roman_numerals(s: str):
-    pattern = r"\(\s*([ivxlcdm]+)\s*\)"
-    return re.sub(pattern, r"\1", s)
-
-
-def rm_roman_numerals_ionic_state(s: str):
-    pattern = r"\s*\(\s*[ivxlcdm]+\s*\)$"
-    return re.sub(pattern, "", s)
-
-
-def normalize_str(s):
-    if s is not None:
-        return unicodedata.normalize("NFC", s).strip()
-    else:
-        return ""
-
-
-def transform_flow(flow, transformation):
-    result = copy.copy(flow)
-    result.update(transformation["target"])
-    return result
-
-
-def matcher(source, target):
-    return all(target.get(key) == value for key, value in source.items())
-
-
-def rowercase(obj: Any) -> Any:
-    """Recursively transform everything to lower case recursively"""
-    if isinstance(obj, str):
-        return obj.lower()
-    elif isinstance(obj, Mapping):
-        return type(obj)([(rowercase(k), rowercase(v)) for k, v in obj.items()])
-    elif isinstance(obj, Collection):
-        return type(obj)([rowercase(o) for o in obj])
-    else:
-        return obj
-
-
-def match_sort_order(obj: dict) -> tuple:
-    return (
-        not obj["from"].name,
-        obj["from"].name.normalized,
-        not obj["from"].context,
-        obj["from"].context.export_as_string(),
-    )
-
-
-def apply_transformations(obj: dict, transformations: List[dict] | None) -> dict:
-    if not transformations:
-        return obj
-    obj = copy.deepcopy(obj)
-    lower = rowercase(obj)
-
-    for dataset in transformations:
-        for transformation_obj in dataset.get("create", []):
-            if matcher(
-                transformation_obj,
-                lower if dataset.get("case-insensitive") else obj,
-            ):
-                # Marked an needs to be created; missing in target list
-                obj["__missing__"] = True
-                break
-        for transformation_obj in dataset.get("update", []):
-            if transformation_obj["source"] == obj:
-                obj.update(transformation_obj["target"])
-                if "conversion_factor" in transformation_obj:
-                    obj["conversion_factor"] = transformation_obj["conversion_factor"]
-                break
-
-    return obj
diff --git a/pyproject.toml b/pyproject.toml
index 2fb6a6d..7c64211 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,23 +20,26 @@ classifiers = [
     "Development Status :: 4 - Beta",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Natural Language :: English",
     "Operating System :: OS Independent",
     "Topic :: Scientific/Engineering"
 ]
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 dependencies = [
     "bw_simapro_csv",
     "pandas[excel]",
     "pint",
     "pydantic",
     "pyecospold",
-    "randonneur>=0.6",
-    "randonneur_data",
+    "randonneur>=0.7.1",
+    "randonneur_data>=0.7.2",
+    "RapidFuzz",
+    "roman",
+    "structlog",
     "tqdm",
     "typer",
     "xmltodict",
@@ -60,6 +63,7 @@ testing = [
 dev = [
     "build",
     "pre-commit",
+    "pyinstrument",
     "pylint",
     "pytest",
     "pytest-cov",
@@ -72,7 +76,9 @@ flowmapper = "flowmapper.cli:app"
 [tool.setuptools]
 license-files = ["LICENSE"]
 include-package-data = true
-packages = ["flowmapper", "flowmapper.extraction", "flowmapper.manual_matching"]
+
+[tool.setuptools.packages.find]
+where = ["src"]
 
 [tool.setuptools.dynamic]
 version = {attr = "flowmapper.__version__"}
@@ -87,7 +93,7 @@ norecursedirs = [
     "build",
     ".tox"
 ]
-testpaths = ["tests/*.py"]
+testpaths = ["tests/**/*.py"]
 
 [tool.flake8]
 # Some sane defaults for the code style checker flake8
diff --git a/src/flowmapper/__init__.py b/src/flowmapper/__init__.py
new file mode 100644
index 0000000..70a878b
--- /dev/null
+++ b/src/flowmapper/__init__.py
@@ -0,0 +1,23 @@
+__all__ = (
+    "__version__",
+    "CASField",
+    "ContextField",
+    "Flow",
+    "Flowmap",
+    "flowmapper",
+    "Match",
+    "MatchCondition",
+    "NormalizedFlow",
+    "UnitField",
+)
+
+__version__ = "0.4.2"
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match import Match
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.fields import CASField, ContextField
+from flowmapper.flowmap import Flowmap
+from flowmapper.main import flowmapper
+from flowmapper.unit import UnitField
diff --git a/flowmapper/cli.py b/src/flowmapper/cli.py
similarity index 57%
rename from flowmapper/cli.py
rename to src/flowmapper/cli.py
index 8130edf..9775170 100644
--- a/flowmapper/cli.py
+++ b/src/flowmapper/cli.py
@@ -1,15 +1,23 @@
 import importlib.metadata
-import logging
 from pathlib import Path
-from typing import Optional
+from typing import Annotated
 
+import structlog
 import typer
-from typing_extensions import Annotated
 
-from .extraction import ecospold2_biosphere_extractor, simapro_csv_biosphere_extractor
-from .main import OutputFormat, flowmapper
+from flowmapper.extraction import (
+    ecospold2_biosphere_extractor,
+    simapro_csv_biosphere_extractor,
+)
+from flowmapper.main import flowmapper
 
-logger = logging.getLogger(__name__)
+try:
+    from pyinstrument import Profiler
+except ImportError:
+    Profiler = None
+
+
+logger = structlog.get_logger("flowmapper")
 
 app = typer.Typer()
 
@@ -23,7 +31,7 @@ def version_callback(value: bool):
 @app.callback()
 def main(
     version: Annotated[
-        Optional[bool],
+        bool | None,
         typer.Option("--version", callback=version_callback, is_eager=True),
     ] = None,
 ):
@@ -34,20 +42,16 @@ def main(
 
 @app.command()
 def map(
-    source: Annotated[Path, typer.Argument(help="Path to source flowlist")],
-    target: Annotated[Path, typer.Argument(help="Path to target flowlist")],
+    source: Annotated[Path, typer.Argument(help="Path to source flow list")],
+    target: Annotated[Path, typer.Argument(help="Path to target flow list")],
     output_dir: Annotated[
         Path, typer.Option(help="Directory to save mapping and diagnostics files")
     ] = Path("."),
-    format: Annotated[
-        OutputFormat,
-        typer.Option(help="Mapping file output format", case_sensitive=False),
-    ] = "all",
     default_transformations: Annotated[
         bool, typer.Option(help="Include default context and unit transformations?")
     ] = True,
     transformations: Annotated[
-        Optional[list[Path]],
+        list[Path] | None,
         typer.Option(
             "--transformations",
             "-t",
@@ -70,12 +74,45 @@ def map(
         bool,
         typer.Option(help="Write original target matched flows into separate file?"),
     ] = False,
+    profile: Annotated[
+        bool,
+        typer.Option(help="Profile matching code with pyinstrument"),
+    ] = False,
 ):
-    return flowmapper(
+    # Default generic mapping for JSON flow lists
+    generic_mapping = {
+        "expression language": "JSONPath",
+        "labels": {
+            "name": "name",
+            "context": "context",
+            "unit": "unit",
+            "identifier": "identifier",
+            "cas_number": "cas_number",
+            "location": "location",
+        },
+    }
+
+    if profile:
+        if Profiler is None:
+            raise ImportError("`pyinstrument` not installed")
+        profiler = Profiler(interval=0.01)
+        profiler.start()
+
+    result = flowmapper(
         source=source,
         target=target,
+        mapping_source=generic_mapping,
+        mapping_target=generic_mapping,
+        source_id=source.stem,
+        target_id=target.stem,
+        contributors=[
+            {
+                "title": "flowmapper",
+                "roles": ["author"],
+                "path": "https://github.com/cmutel/flowmapper",
+            }
+        ],
         output_dir=output_dir,
-        format=format,
         default_transformations=default_transformations,
         transformations=transformations,
         unmatched_source=unmatched_source,
@@ -84,17 +121,24 @@ def map(
         matched_target=matched_target,
     )
 
+    if profile:
+        profiler.stop()
+        with open(f"{source.stem}-{target.stem}.html", "w") as f:
+            f.write(profiler.output_html())
+
+    return result
+
 
 @app.command()
 def extract_simapro_csv(
     simapro_csv_filepath: Annotated[
-        Path, typer.Argument(help="Path to source SimaPro CSV file")
+        Path, typer.Argument(help="Path to SimaPro CSV input file")
     ],
-    output_dir: Annotated[
-        Path, typer.Argument(help="Directory to save mapping and diagnostics files")
+    output_filepath: Annotated[
+        Path, typer.Argument(help="File path for JSON results data")
     ],
 ) -> None:
-    simapro_csv_biosphere_extractor(simapro_csv_filepath, output_dir)
+    simapro_csv_biosphere_extractor(simapro_csv_filepath, output_filepath)
 
 
 @app.command()
@@ -102,8 +146,8 @@ def extract_ecospold2(
     elementary_exchanges_filepath: Annotated[
         Path, typer.Argument(help="Path to source `ElementaryExchanges.xml` file")
     ],
-    output_dir: Annotated[
-        Path, typer.Argument(help="Directory to save mapping and diagnostics files")
+    output_filepath: Annotated[
+        Path, typer.Argument(help="File path for JSON results data")
     ],
 ) -> None:
-    ecospold2_biosphere_extractor(elementary_exchanges_filepath, output_dir)
+    ecospold2_biosphere_extractor(elementary_exchanges_filepath, output_filepath)
diff --git a/flowmapper/constants.py b/src/flowmapper/constants.py
similarity index 53%
rename from flowmapper/constants.py
rename to src/flowmapper/constants.py
index 4499e9a..591c5c8 100644
--- a/flowmapper/constants.py
+++ b/src/flowmapper/constants.py
@@ -1,11 +1,11 @@
-PINT_MAPPING = {
-    "livestock unit": "livestock_unit",
-    "kilowatt hour": "kilowatt_hour",
-}
-
 RESOURCE_PARENT_CATEGORY = {
     "natural resources",
     "natural resource",
     "resources",
     "resource",
+    "land use",
+    "economic",
+    "social",
+    "raw materials",
+    "raw",
 }
diff --git a/flowmapper/data/manual_name_match_simapro_ecoinvent_3.8.json b/src/flowmapper/data/manual_name_match_simapro_ecoinvent_3.8.json
similarity index 100%
rename from flowmapper/data/manual_name_match_simapro_ecoinvent_3.8.json
rename to src/flowmapper/data/manual_name_match_simapro_ecoinvent_3.8.json
diff --git a/flowmapper/data/manual_name_match_simapro_ecoinvent_3.9.json b/src/flowmapper/data/manual_name_match_simapro_ecoinvent_3.9.json
similarity index 100%
rename from flowmapper/data/manual_name_match_simapro_ecoinvent_3.9.json
rename to src/flowmapper/data/manual_name_match_simapro_ecoinvent_3.9.json
diff --git a/flowmapper/data/names_and_locations.json b/src/flowmapper/data/names_and_locations.json
similarity index 100%
rename from flowmapper/data/names_and_locations.json
rename to src/flowmapper/data/names_and_locations.json
diff --git a/flowmapper/data/places.json b/src/flowmapper/data/places.json
similarity index 100%
rename from flowmapper/data/places.json
rename to src/flowmapper/data/places.json
diff --git a/flowmapper/data/simapro-2023-ecoinvent-3-contexts.json b/src/flowmapper/data/simapro-2025-ecoinvent-3-contexts.json
similarity index 51%
rename from flowmapper/data/simapro-2023-ecoinvent-3-contexts.json
rename to src/flowmapper/data/simapro-2025-ecoinvent-3-contexts.json
index cd2a7d7..738c131 100644
--- a/flowmapper/data/simapro-2023-ecoinvent-3-contexts.json
+++ b/src/flowmapper/data/simapro-2025-ecoinvent-3-contexts.json
@@ -1,5 +1,5 @@
 {
-  "name": "SimaPro-ecoinvent-3-context",
+  "name": "SimaPro-2025-ecoinvent-3.12-context",
   "licenses": [
     {
       "name": "CC BY 4.0",
@@ -8,17 +8,45 @@
     }
   ],
   "version": "1.0.0",
-  "description": "Context mapping from 2023 SimaPro to ecoinvent 3",
-  "created": "2024-04-12T09:29:02.823409",
+  "description": "Context mapping from 2025 SimaPro to ecoinvent 3.12",
   "case-insensitive": true,
+  "created": "2025-11-10T12:34:56Z",
   "contributors": [
     {
       "title": "Chris Mutel",
       "path": "https://chris.mutel.org/",
-      "role": "author"
+      "roles": ["author"]
     }
   ],
+  "graph_context": [
+    "nodes"
+  ],
+  "mapping": {
+    "source": {
+      "expression language": "JSONPath",
+      "labels": {
+          "context": "$.context"
+      }
+    },
+    "target": {
+      "expression language": "JSONPath",
+      "labels": {
+          "context": "$.context"
+      }
+    }
+  },
   "update": [
+    {
+      "source": {
+        "context": "air/unspecified"
+      },
+      "target": {
+        "context": [
+          "air",
+          "unspecified"
+        ]
+      }
+    },
     {
       "source": {
         "context": "air/high. pop."
@@ -76,17 +104,62 @@
     },
     {
       "source": {
-        "context": "emissions to air"
+        "context": "Emissions to air"
+      },
+      "target": {
+        "context": [
+          "air",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Emissions to air/"
+      },
+      "target": {
+        "context": [
+          "air",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Emissions to air/unspecified"
+      },
+      "target": {
+        "context": [
+          "air",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Airborne emissions/(unspecified)"
+      },
+      "target": {
+        "context": [
+          "air",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Emissions to air/high. pop."
       },
       "target": {
         "context": [
-          "air"
+          "air",
+          "urban air close to ground"
         ]
       }
     },
     {
       "source": {
-        "context": "emissions to air/high. pop."
+        "context": "Airborne emissions/high. pop."
       },
       "target": {
         "context": [
@@ -97,7 +170,40 @@
     },
     {
       "source": {
-        "context": "emissions to air/low. pop."
+        "context": "Emissions to air/low. pop."
+      },
+      "target": {
+        "context": [
+          "air",
+          "non-urban air or from high stacks"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Airborne emissions/low. pop."
+      },
+      "target": {
+        "context": [
+          "air",
+          "non-urban air or from high stacks"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Emissions to air/indoor"
+      },
+      "target": {
+        "context": [
+          "air",
+          "non-urban air or from high stacks"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Airborne emissions/indoor"
       },
       "target": {
         "context": [
@@ -108,7 +214,18 @@
     },
     {
       "source": {
-        "context": "emissions to air/low. pop., long-term"
+        "context": "Emissions to air/low. pop., long-term"
+      },
+      "target": {
+        "context": [
+          "air",
+          "low population density, long-term"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Airborne emissions/low. pop., long-term"
       },
       "target": {
         "context": [
@@ -119,7 +236,7 @@
     },
     {
       "source": {
-        "context": "emissions to air/stratosphere + troposphere"
+        "context": "Emissions to air/stratosphere + troposphere"
       },
       "target": {
         "context": [
@@ -130,17 +247,62 @@
     },
     {
       "source": {
-        "context": "emissions to soil"
+        "context": "Airborne emissions/stratosphere + troposphere"
+      },
+      "target": {
+        "context": [
+          "air",
+          "lower stratosphere + upper troposphere"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Emissions to soil/unspecified"
+      },
+      "target": {
+        "context": [
+          "soil",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Emissions to soil/(unspecified)"
+      },
+      "target": {
+        "context": [
+          "soil",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Emissions to soil"
       },
       "target": {
         "context": [
-          "soil"
+          "soil",
+          "unspecified"
         ]
       }
     },
     {
       "source": {
-        "context": "emissions to soil/agricultural"
+        "context": "Emissions to soil/"
+      },
+      "target": {
+        "context": [
+          "soil",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Emissions to soil/agricultural"
       },
       "target": {
         "context": [
@@ -151,7 +313,7 @@
     },
     {
       "source": {
-        "context": "emissions to soil/forestry"
+        "context": "Emissions to soil/forestry"
       },
       "target": {
         "context": [
@@ -162,7 +324,7 @@
     },
     {
       "source": {
-        "context": "emissions to soil/industrial"
+        "context": "Emissions to soil/industrial"
       },
       "target": {
         "context": [
@@ -171,6 +333,39 @@
         ]
       }
     },
+    {
+      "source": {
+        "context": "Final waste flows/(unspecified)"
+      },
+      "target": {
+        "context": [
+          "inventory indicator",
+          "waste"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Final waste flows/"
+      },
+      "target": {
+        "context": [
+          "inventory indicator",
+          "waste"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "water/unspecified"
+      },
+      "target": {
+        "context": [
+          "water",
+          "unspecified"
+        ]
+      }
+    },
     {
       "source": {
         "context": "water/groundwater"
@@ -221,7 +416,41 @@
       },
       "target": {
         "context": [
-          "water"
+          "water",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "emissions to water/"
+      },
+      "target": {
+        "context": [
+          "water",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "emissions to water/unspecified"
+      },
+      "target": {
+        "context": [
+          "water",
+          "unspecified"
+        ]
+      }
+    },
+    {
+      "source": {
+        "context": "Waterborne emissions/(unspecified)"
+      },
+      "target": {
+        "context": [
+          "water",
+          "unspecified"
         ]
       }
     },
@@ -247,6 +476,17 @@
         ]
       }
     },
+    {
+      "source": {
+        "context": "Waterborne emissions/groundwater, long-term"
+      },
+      "target": {
+        "context": [
+          "water",
+          "ground-, long-term"
+        ]
+      }
+    },
     {
       "source": {
         "context": "emissions to water/lake"
@@ -269,6 +509,17 @@
         ]
       }
     },
+    {
+      "source": {
+        "context": "Waterborne emissions/ocean"
+      },
+      "target": {
+        "context": [
+          "water",
+          "ocean"
+        ]
+      }
+    },
     {
       "source": {
         "context": "emissions to water/river"
@@ -282,7 +533,7 @@
     },
     {
       "source": {
-        "context": "emissions to water/river, long-term"
+        "context": "Waterborne emissions/river"
       },
       "target": {
         "context": [
@@ -293,11 +544,12 @@
     },
     {
       "source": {
-        "context": "resources"
+        "context": "emissions to water/river, long-term"
       },
       "target": {
         "context": [
-          "natural resource"
+          "water",
+          "surface water"
         ]
       }
     },
@@ -358,17 +610,40 @@
     },
     {
       "source": {
-        "context": "raw"
+        "context": "Raw"
       },
       "target": {
         "context": [
           "natural resource"
         ]
-      }
+      },
+      "comment": "Dummy value used for matching; not a real ecoinvent context"
     },
     {
       "source": {
-        "context": "raw/biotic"
+        "context": "Raw/"
+      },
+      "target": {
+        "context": [
+          "natural resource"
+        ]
+      },
+      "comment": "Dummy value used for matching; not a real ecoinvent context"
+    },
+    {
+      "source": {
+        "context": "Raw materials/"
+      },
+      "target": {
+        "context": [
+          "natural resource"
+        ]
+      },
+      "comment": "Dummy value used for matching; not a real ecoinvent context"
+    },
+    {
+      "source": {
+        "context": "Raw/biotic"
       },
       "target": {
         "context": [
@@ -379,7 +654,7 @@
     },
     {
       "source": {
-        "context": "raw/in air"
+        "context": "Raw/in air"
       },
       "target": {
         "context": [
@@ -390,7 +665,7 @@
     },
     {
       "source": {
-        "context": "raw/in ground"
+        "context": "Raw/in ground"
       },
       "target": {
         "context": [
@@ -401,7 +676,7 @@
     },
     {
       "source": {
-        "context": "raw/land"
+        "context": "Raw/land"
       },
       "target": {
         "context": [
@@ -409,9 +684,21 @@
           "land"
         ]
       }
-    },    {
+    },
+    {
+      "source": {
+        "context": "Raw materials/land"
+      },
+      "target": {
+        "context": [
+          "natural resource",
+          "land"
+        ]
+      }
+    },
+    {
       "source": {
-        "context": "raw/in water"
+        "context": "Raw/in water"
       },
       "target": {
         "context": [
diff --git a/flowmapper/data/standard-units-harmonization.json b/src/flowmapper/data/standard-units-harmonization.json
similarity index 73%
rename from flowmapper/data/standard-units-harmonization.json
rename to src/flowmapper/data/standard-units-harmonization.json
index 2b5de70..caa8d44 100644
--- a/flowmapper/data/standard-units-harmonization.json
+++ b/src/flowmapper/data/standard-units-harmonization.json
@@ -1,5 +1,5 @@
 {
-  "name": "Standard-units-harmonization",
+  "name": "Flowmapper-standard-units-harmonization",
   "licenses": [
     {
       "name": "CC BY 4.0",
@@ -8,16 +8,33 @@
     }
   ],
   "version": "1.0.0",
-  "description": "Standard Brightway unit mapping",
-  "created": "2024-04-12T09:29:02.823409",
+  "description": "Standard flowmapper unit harmonization linked with Pint customization",
+  "created": "2025-11-10T12:34:56Z",
   "case-insensitive": true,
   "contributors": [
     {
       "title": "Chris Mutel",
       "path": "https://chris.mutel.org/",
-      "role": "author"
+      "roles": ["author"]
     }
   ],
+  "graph_context": [
+    "nodes"
+  ],
+  "mapping": {
+    "source": {
+      "expression language": "JSONPath",
+      "labels": {
+          "unit": "$.unit"
+      }
+    },
+    "target": {
+      "expression language": "JSONPath",
+      "labels": {
+          "unit": "$.unit"
+      }
+    }
+  },
   "update": [
     {
       "source": {
@@ -51,6 +68,14 @@
         "unit": "gigajoule"
       }
     },
+    {
+      "source": {
+        "unit": "GJ"
+      },
+      "target": {
+        "unit": "gigajoule"
+      }
+    },
     {
       "source": {
         "unit": "h"
@@ -83,6 +108,30 @@
         "unit": "kilobecquerel"
       }
     },
+    {
+      "source": {
+        "unit": "livestock unit"
+      },
+      "target": {
+        "unit": "livestock_unit"
+      }
+    },
+    {
+      "source": {
+        "unit": "kilowatt hour"
+      },
+      "target": {
+        "unit": "kilowatt_hour"
+      }
+    },
+    {
+      "source": {
+        "unit": "kBq"
+      },
+      "target": {
+        "unit": "kilobecquerel"
+      }
+    },
     {
       "source": {
         "unit": "kilo becquerel"
@@ -123,6 +172,14 @@
         "unit": "kilojoule"
       }
     },
+    {
+      "source": {
+        "unit": "kJ"
+      },
+      "target": {
+        "unit": "kilojoule"
+      }
+    },
     {
       "source": {
         "unit": "kwh"
@@ -187,6 +244,14 @@
         "unit": "square_meter_year"
       }
     },
+    {
+      "source": {
+        "unit": "cubic meter-year"
+      },
+      "target": {
+        "unit": "cubic_meter_year"
+      }
+    },
     {
       "source": {
         "unit": "m2a"
@@ -275,6 +340,14 @@
         "unit": "megajoule"
       }
     },
+    {
+      "source": {
+        "unit": "MJ"
+      },
+      "target": {
+        "unit": "megajoule"
+      }
+    },
     {
       "source": {
         "unit": "my"
@@ -283,6 +356,14 @@
         "unit": "meter_year"
       }
     },
+    {
+      "source": {
+        "unit": "standard cubic meter"
+      },
+      "target": {
+        "unit": "standard_cubic_meter"
+      }
+    },
     {
       "source": {
         "unit": "sm3"
@@ -291,6 +372,14 @@
         "unit": "standard_cubic_meter"
       }
     },
+    {
+      "source": {
+        "unit": "normal cubic meter"
+      },
+      "target": {
+        "unit": "normal_cubic_meter"
+      }
+    },
     {
       "source": {
         "unit": "nm3"
@@ -299,6 +388,22 @@
         "unit": "normal_cubic_meter"
       }
     },
+    {
+      "source": {
+        "unit": "sM3"
+      },
+      "target": {
+        "unit": "standard_cubic_meter"
+      }
+    },
+    {
+      "source": {
+        "unit": "nM3"
+      },
+      "target": {
+        "unit": "normal_cubic_meter"
+      }
+    },
     {
       "source": {
         "unit": "p"
@@ -370,6 +475,14 @@
       "target": {
         "unit": "watt_hour"
       }
+    },
+    {
+      "source": {
+        "unit": "eur2005"
+      },
+      "target": {
+        "unit": "eur_2005"
+      }
     }
   ]
 }
diff --git a/flowmapper/data/units.txt b/src/flowmapper/data/units.txt
similarity index 54%
rename from flowmapper/data/units.txt
rename to src/flowmapper/data/units.txt
index ff6e03a..d9b14e3 100644
--- a/flowmapper/data/units.txt
+++ b/src/flowmapper/data/units.txt
@@ -9,9 +9,15 @@ square_meter_year = m2 * year = m2y = m2a
 cubic_meter_year = m3 * year = m3y = m3a
 
 # Gas volume at given conditions
-[gas_volume] = [pressure] * [volume]
-standard_cubic_meter = atmosphere * (meter ** 3) = sm3
-normal_cubic_meter = 1.0732 * standard_cubic_meter = nm3
+# https://en.wikipedia.org/wiki/Standard_temperature_and_pressure
+# 273.15 K (0 °C) and an absolute pressure of exactly 1 bar (100 kPa)
+standard_cubic_meter = 44.095 * mole = sm3
+# https://www.sciencedirect.com/topics/engineering/cubic-metre
+# There are multiple definitions for this but as we only care about natural gas, using the
+# Gas Industry Standards Board seems reasonable.
+# 288.15 K (15 °C) and an absolute pressure of exactly 1 atm (101.325 kPa)
+# See also https://github.com/qudt/qudt-public-repo/issues/1227
+normal_cubic_meter = 41.739 * mole = nm3
 
 # Livestock
 livestock_unit = [livestock] = LU
@@ -31,3 +37,7 @@ vehicle_kilometer = vehicle * kilometer = vkm
 # Personal travel
 person = [personal_travel]
 person_kilometer = person * kilometer = pkm
+
+# Currency
+eur = [currency]
+eur_2005 = 1 * eur
diff --git a/src/flowmapper/domain/__init__.py b/src/flowmapper/domain/__init__.py
new file mode 100644
index 0000000..1cb2e68
--- /dev/null
+++ b/src/flowmapper/domain/__init__.py
@@ -0,0 +1,15 @@
+"""Domain entities for flowmapper.
+
+This package contains the core domain model classes:
+- Flow: Represents an elementary flow with all its attributes
+- NormalizedFlow: Manages flow transformations and matching state
+- Match: Represents a mapping between source and target flows
+- MatchCondition: Enumeration of match quality levels
+"""
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match import Match
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+
+__all__ = ["Flow", "NormalizedFlow", "Match", "MatchCondition"]
diff --git a/src/flowmapper/domain/flow.py b/src/flowmapper/domain/flow.py
new file mode 100644
index 0000000..4a65200
--- /dev/null
+++ b/src/flowmapper/domain/flow.py
@@ -0,0 +1,337 @@
+"""Flow class representing an elementary flow with all its attributes."""
+
+import itertools
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Self
+
+from flowmapper.errors import MissingLocation
+from flowmapper.fields import (
+    CASField,
+    ContextField,
+    OxidationState,
+    StringField,
+    replace_location_suffix,
+    split_location_suffix,
+)
+from flowmapper.unit import UnitField
+from flowmapper.utils import remove_unit_slash
+
+global_counter = itertools.count(0)
+
+
+@dataclass(frozen=True)
+class Flow:
+    """
+    Represents an elementary flow with all its attributes.
+
+    A Flow is an immutable dataclass that represents an elementary flow (e.g., a substance
+    or material) with its name, unit, context, and optional attributes like location,
+    CAS number, and synonyms. Flows can be normalized to a standard form for matching
+    and comparison.
+
+    Attributes
+    ----------
+    name : StringField
+        The name of the flow (e.g., "Carbon dioxide").
+    unit : UnitField
+        The unit of measurement (e.g., "kg", "m3").
+    context : ContextField
+        The context or category of the flow (e.g., "air", "water").
+    identifier : str | None, optional
+        An optional unique identifier for the flow.
+    location : str | None, optional
+        An optional location code (e.g., "NL", "DE", "US").
+    oxidation_state : OxidationState | None, optional
+        The oxidation state of the flow if applicable.
+    cas_number : CASField | None, optional
+        The CAS (Chemical Abstracts Service) registry number.
+    synonyms : list[str], default=[]
+        A list of alternative names for the flow.
+    _id : int
+        Internal unique identifier (auto-generated).
+
+    Examples
+    --------
+    >>> flow = Flow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> normalized = flow.normalize()
+    >>> print(normalized.name.data)
+    carbon dioxide
+    """
+
+    name: StringField
+    unit: UnitField
+    context: ContextField
+    identifier: str | None = None  # Internal ID, not necessarily present or unique...
+    location: str | None = None
+    oxidation_state: OxidationState | None = None
+    cas_number: CASField | None = None
+    synonyms: list[str] = field(default_factory=lambda: [])
+    conversion_factor: float | None = None
+    _id: int = field(default_factory=lambda: next(global_counter))
+
+    @staticmethod
+    def randonneur_mapping() -> dict:
+        """
+        Return the randonneur mapping configuration for Flow objects.
+
+        Returns
+        -------
+        dict
+            A dictionary containing JSONPath expressions for mapping Flow attributes
+            to randonneur transformation format.
+        """
+        return {
+            "expression language": "JSONPath",
+            "labels": {
+                "unit": "$.unit",
+                "name": "$.name",
+                "context": "$.context",
+                "identifier": "$.identifier",
+                "location": "$.location",
+                "cas_number": "$.cas_number",
+                "synonyms": "$.synonyms",
+                "conversion_factor": "$.conversion_factor",
+            },
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> Self:
+        """
+        Create a Flow instance from a dictionary.
+
+        Parameters
+        ----------
+        data : dict
+            Dictionary containing flow data with keys: name, unit, context, and
+            optionally identifier, location, oxidation_state, cas_number, synonyms.
+
+        Returns
+        -------
+        Flow
+            A new Flow instance created from the dictionary data.
+
+        Examples
+        --------
+        >>> flow = Flow.from_dict({
+        ...     "name": "Carbon dioxide",
+        ...     "context": "air",
+        ...     "unit": "kg",
+        ...     "location": "NL"
+        ... })
+        """
+        return cls(
+            name=StringField(data["name"]),
+            unit=UnitField(data["unit"]),
+            context=ContextField(data["context"]),
+            identifier=data.get("identifier"),
+            location=data.get("location") or None,
+            oxidation_state=(
+                OxidationState(data["oxidation_state"])
+                if data.get("oxidation_state")
+                else None
+            ),
+            cas_number=CASField.from_string(data.get("cas_number") or None),
+            synonyms=data.get("synonyms") or [],
+            conversion_factor=data.get("conversion_factor"),
+        )
+
+    def to_dict(self) -> dict:
+        """
+        Convert the Flow to a dictionary representation.
+
+        Returns
+        -------
+        dict
+            Dictionary containing the flow's data. Only non-None optional fields
+            are included.
+
+        Examples
+        --------
+        >>> flow = Flow.from_dict({"name": "CO2", "context": "air", "unit": "kg"})
+        >>> flow.to_dict()
+        {'name': 'CO2', 'unit': 'kg', 'context': ('air',), 'identifier': None}
+        """
+        data = {
+            "name": self.name.data,
+            "unit": self.unit.data,
+            "context": self.context.as_tuple(),
+            "identifier": self.identifier,
+        }
+        for key in (
+            "location",
+            "oxidation_state",
+            "cas_number",
+            "synonyms",
+            "conversion_factor",
+        ):
+            if getattr(self, key):
+                data[key] = getattr(self, key)
+        return data
+
+    def normalize(self) -> Self:
+        """
+        Normalize the flow to a standard form for matching.
+
+        This method performs several normalization steps:
+        1. Removes unit references from the name (e.g., "/kg")
+        2. Extracts location from the name suffix (e.g., ", NL")
+        3. Extracts oxidation state from the name (e.g., "Iron(II)")
+        4. Normalizes the name, unit, and context fields
+
+        Returns
+        -------
+        Flow
+            A new Flow instance with normalized attributes.
+
+        Examples
+        --------
+        >>> flow = Flow.from_dict({
+        ...     "name": "Carbon dioxide, NL",
+        ...     "context": "air",
+        ...     "unit": "kg"
+        ... })
+        >>> normalized = flow.normalize()
+        >>> normalized.location
+        'NL'
+        """
+        location, oxidation_state = self.location, self.oxidation_state
+        name = remove_unit_slash(self)
+        name, other_location = split_location_suffix(name)
+        if other_location:
+            location = other_location
+        if OxidationState.has_oxidation_state(name):
+            oxidation_state, name = OxidationState.from_string(name)
+
+        return type(self)(
+            identifier=self.identifier,
+            name=StringField(name).normalize(),
+            location=location,
+            oxidation_state=oxidation_state,
+            unit=self.unit.normalize(),
+            context=self.context.normalize(),
+            cas_number=self.cas_number,
+            synonyms=self.synonyms,
+            conversion_factor=self.conversion_factor,
+        )
+
+    def copy_with_new_location(self, location: str) -> Self:
+        """
+        Create a copy of the flow with a new location in the name.
+
+        This method replaces the location suffix in the flow's name with a new
+        location value. If no location suffix is found, it appends the location
+        to the name. The original flow is not modified.
+
+        The new flow will have a new UUID identifier, regardless of whether
+        the original flow had an identifier.
+
+        Parameters
+        ----------
+        location : str
+            The new location code to use (e.g., "DE", "FR").
+
+        Returns
+        -------
+        Flow
+            A new Flow instance with the updated location in the name and a new
+            UUID identifier.
+
+        Notes
+        -----
+        - If the flow's name contains a location suffix (matched by the
+          ends_with_location regex), it is replaced with the new location.
+        - If no location suffix is found, the location is appended to the name
+          in the format ", <location>".
+        - The new flow always gets a new UUID identifier via `uuid.uuid4()`.
+
+        Examples
+        --------
+        >>> flow = Flow.from_dict({
+        ...     "name": "Carbon dioxide, NL",
+        ...     "context": "air",
+        ...     "unit": "kg"
+        ... })
+        >>> new_flow = flow.copy_with_new_location("DE")
+        >>> new_flow.name.data
+        'Carbon dioxide, DE'
+        >>> new_flow.identifier != flow.identifier
+        True
+        >>> # If no location suffix exists, location is appended
+        >>> flow2 = Flow.from_dict({
+        ...     "name": "Carbon dioxide",
+        ...     "context": "air",
+        ...     "unit": "kg"
+        ... })
+        >>> new_flow2 = flow2.copy_with_new_location("DE")
+        >>> new_flow2.name.data
+        'Carbon dioxide, DE'
+        """
+        if not location:
+            raise ValueError("No location parameter given")
+
+        data = self.to_dict()
+        try:
+            data["name"] = replace_location_suffix(
+                string=data["name"], new_location=location
+            )
+        except MissingLocation:
+            data["name"] = (
+                data["name"].strip()
+                + (", " if not data["name"].endswith(",") else " ")
+                + location
+            )
+        data["identifier"] = str(uuid.uuid4())
+        return type(self).from_dict(data)
+
+    def __repr__(self) -> str:
+        """Return a string representation showing all non-None attributes."""
+        parts = [
+            f"name={self.name!r}",
+            f"unit={self.unit!r}",
+            f"context={self.context!r}",
+        ]
+        if self.identifier is not None:
+            parts.append(f"identifier={self.identifier!r}")
+        if self.location is not None:
+            parts.append(f"location={self.location!r}")
+        if self.oxidation_state is not None:
+            parts.append(f"oxidation_state={self.oxidation_state!r}")
+        if self.cas_number is not None:
+            parts.append(f"cas_number={self.cas_number!r}")
+        if self.synonyms:
+            parts.append(f"synonyms={self.synonyms!r}")
+        if self.conversion_factor:
+            parts.append(f"conversion_factor={self.conversion_factor!r}")
+        return f"Flow({', '.join(parts)})"
+
+    def __eq__(self, other: Any) -> bool:
+        """Check equality based on internal _id."""
+        if not isinstance(other, Flow):
+            return False
+        return self._id == other._id
+
+    def __lt__(self, other: Self) -> bool:
+        """
+        Compare flows for sorting.
+
+        Flows are compared by name, unit, context, and identifier in that order.
+        """
+        if not isinstance(other, Flow):
+            return False
+        else:
+            return (
+                self.name.data,
+                self.unit.data,
+                self.context.value,
+                self.identifier,
+            ) < (
+                other.name.data,
+                other.unit.data,
+                other.context.value,
+                other.identifier,
+            )
diff --git a/src/flowmapper/domain/match.py b/src/flowmapper/domain/match.py
new file mode 100644
index 0000000..1542cf5
--- /dev/null
+++ b/src/flowmapper/domain/match.py
@@ -0,0 +1,149 @@
+"""Match class representing a mapping between source and target flows."""
+
+from __future__ import annotations
+
+from collections import UserString
+from dataclasses import asdict, dataclass, field
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from flowmapper.domain.flow import Flow
+    from flowmapper.domain.match_condition import MatchCondition
+
+
+@dataclass
+class Match:
+    """
+    Represents a match between a source flow and a target flow.
+
+    A Match object contains information about how a source flow maps to a target
+    flow, including the match quality (condition), conversion factor, and metadata
+    about how the match was found.
+
+    Attributes
+    ----------
+    source : Flow
+        The source flow being matched.
+    target : Flow
+        The target flow that the source maps to.
+    function_name : str
+        The name of the matching function that found this match.
+    condition : MatchCondition
+        The quality/type of the match (exact, close, related, etc.).
+    conversion_factor : float, default=1.0
+        The factor to convert from source unit to target unit.
+    comment : str, default=""
+        Optional comment describing the match.
+    new_target_flow : bool, default=False
+        Whether this match created a new target flow that didn't exist before.
+
+    Examples
+    --------
+    >>> from flowmapper.domain.flow import Flow
+    >>> from flowmapper.domain.match import Match
+    >>> from flowmapper.domain.match_condition import MatchCondition
+    >>> source = Flow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> target = Flow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> match = Match(
+    ...     source=source,
+    ...     target=target,
+    ...     function_name="match_identical_names",
+    ...     condition=MatchCondition.exact
+    ... )
+    >>> match.export()
+    {'source': {...}, 'target': {...}, 'condition': '...', ...}
+    """
+
+    source: Flow
+    target: Flow
+    function_name: str
+    condition: MatchCondition
+    conversion_factor: float = 1.0
+    comment: str = field(default_factory=lambda: "")
+    new_target_flow: bool = False
+
+    def export(self, flowmapper_metadata: bool = False) -> dict:
+        """
+        Export the match to a dictionary format.
+
+        This method serializes the match to a dictionary suitable for JSON
+        export or storage. The source and target flows are converted to
+        dictionaries, and special objects are serialized appropriately.
+
+        Parameters
+        ----------
+        flowmapper_metadata : bool, default=False
+            If True, include flowmapper-specific metadata (version, function_name)
+            in the export.
+
+        Returns
+        -------
+        dict
+            Dictionary containing the match data, with source and target as
+            dictionaries and condition as a string URI.
+
+        Examples
+        --------
+        >>> match.export()
+        {'source': {...}, 'target': {...}, 'condition': '...', ...}
+        >>> match.export(flowmapper_metadata=True)
+        {'source': {...}, 'target': {...}, 'condition': '...', 'flowmapper_metadata': {...}, ...}
+        """
+        from flowmapper import __version__
+        from flowmapper.fields import ContextField
+
+        def serializable(obj: Any) -> Any:
+            if isinstance(obj, UserString):
+                return str(obj)
+            elif isinstance(obj, ContextField):
+                return obj.value
+            return obj
+
+        data = asdict(self)
+        data["source"] = {
+            k: serializable(v)
+            for k, v in data["source"].items()
+            if v and not k.startswith("_")
+        }
+        data["target"] = {
+            k: serializable(v)
+            for k, v in data["target"].items()
+            if v and not k.startswith("_")
+        }
+        data["condition"] = str(data["condition"])
+
+        function_name = data.pop("function_name")
+        if flowmapper_metadata:
+            data["flowmapper_metadata"] = {
+                "version": __version__,
+                "function_name": function_name,
+            }
+
+        return data
+
+    def __lt__(self, other: Match) -> bool:
+        """
+        Compare matches for sorting.
+
+        Matches are sorted by source name, source context, target name,
+        and target context in that order.
+        """
+        return (
+            self.source.name,
+            self.source.context,
+            self.target.name,
+            self.target.context,
+        ) < (
+            other.source.name,
+            other.source.context,
+            other.target.name,
+            other.target.context,
+        )
diff --git a/src/flowmapper/domain/match_condition.py b/src/flowmapper/domain/match_condition.py
new file mode 100644
index 0000000..b24d2f0
--- /dev/null
+++ b/src/flowmapper/domain/match_condition.py
@@ -0,0 +1,83 @@
+"""MatchCondition enum for representing match quality levels."""
+
+from enum import StrEnum
+
+
+class MatchCondition(StrEnum):
+    """
+    Enumeration of match quality conditions based on SKOS vocabulary.
+
+    Match conditions represent the semantic relationship between source and target
+    flows in a mapping. They follow the SKOS (Simple Knowledge Organization System)
+    vocabulary for concept matching.
+
+    Attributes
+    ----------
+    exact : str
+        Exact match - the flows are semantically identical.
+        SKOS URI: http://www.w3.org/2004/02/skos/core#exactMatch
+    close : str
+        Close match - the flows are very similar but not identical.
+        SKOS URI: http://www.w3.org/2004/02/skos/core#closeMatch
+    related : str
+        Related match - the flows are related but not equivalent.
+        SKOS URI: http://www.w3.org/2004/02/skos/core#relatedMatch
+    narrow : str
+        Narrow match - the target is more specific than the source.
+        SKOS URI: http://www.w3.org/2004/02/skos/core#narrowMatch
+    broad : str
+        Broad match - the target is more general than the source.
+        SKOS URI: http://www.w3.org/2004/02/skos/core#broadMatch
+
+    Examples
+    --------
+    >>> condition = MatchCondition.exact
+    >>> condition.as_glad()
+    '='
+    >>> condition = MatchCondition.related
+    >>> condition.as_glad()
+    '~'
+    """
+
+    exact = "http://www.w3.org/2004/02/skos/core#exactMatch"
+    close = "http://www.w3.org/2004/02/skos/core#closeMatch"
+    related = "http://www.w3.org/2004/02/skos/core#relatedMatch"
+    # A triple <A> skos:broader <B> asserts that <B>, the object of the triple, is a broader concept
+    # than <A>, the subject of the triple.
+    narrow = "http://www.w3.org/2004/02/skos/core#narrowMatch"  # in SKOS the *target* is narrower than the *source*
+    broad = "http://www.w3.org/2004/02/skos/core#broadMatch"  # in SKOS the *target* is broader than the *source*
+
+    def as_glad(self) -> str:
+        """
+        Convert match condition to GLAD format symbol.
+
+        GLAD (Global LCA Data Access) network uses single-character symbols
+        to represent match conditions in flow mappings.
+
+        Returns
+        -------
+        str
+            Single character symbol:
+            - "=" for exact match
+            - "~" for close or related match
+            - ">" for narrow match
+            - "<" for broad match
+
+        Examples
+        --------
+        >>> MatchCondition.exact.as_glad()
+        '='
+        >>> MatchCondition.related.as_glad()
+        '~'
+        """
+        if self.value == "http://www.w3.org/2004/02/skos/core#exactMatch":
+            return "="
+        elif self.value == "http://www.w3.org/2004/02/skos/core#closeMatch":
+            return "~"
+        elif self.value == "http://www.w3.org/2004/02/skos/core#relatedMatch":
+            return "~"
+        elif self.value == "http://www.w3.org/2004/02/skos/core#narrowMatch":
+            return ">"
+        elif self.value == "http://www.w3.org/2004/02/skos/core#broadMatch":
+            return "<"
+        raise ValueError  # Just for silly type checking
diff --git a/src/flowmapper/domain/normalized_flow.py b/src/flowmapper/domain/normalized_flow.py
new file mode 100644
index 0000000..6c925ca
--- /dev/null
+++ b/src/flowmapper/domain/normalized_flow.py
@@ -0,0 +1,239 @@
+"""NormalizedFlow class for managing flow transformations and matching state."""
+
+from __future__ import annotations
+
+from copy import copy
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Self
+
+if TYPE_CHECKING:
+    from flowmapper.domain.flow import Flow
+
+
+@dataclass
+class NormalizedFlow:
+    """
+    Represents a flow with its original, normalized, and current states.
+
+    NormalizedFlow tracks a flow through its lifecycle:
+    - `original`: The flow as it was initially created
+    - `normalized`: The flow after normalization (standard form for matching)
+    - `current`: The current state (can be modified for transformations)
+
+    This class is used for matching flows where transformations may be temporarily
+    applied to the `current` state, then reset back to `normalized`.
+
+    Attributes
+    ----------
+    original : Flow
+        The original flow as created from source data.
+    normalized : Flow
+        The normalized version of the flow (standard form).
+    current : Flow
+        The current state of the flow (can be modified).
+    matched : bool, default=False
+        Whether this flow has been matched to a target flow.
+
+    Examples
+    --------
+    >>> from flowmapper.domain.flow import Flow
+    >>> from flowmapper.domain.normalized_flow import NormalizedFlow
+    >>> flow = Flow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> normalized = flow.normalize()
+    >>> nf = NormalizedFlow(
+    ...     original=flow,
+    ...     normalized=normalized,
+    ...     current=copy(normalized)
+    ... )
+    >>> nf.update_current(name="Modified")
+    >>> nf.reset_current()  # Reset to normalized state
+    """
+
+    original: Flow
+    normalized: Flow
+    current: Flow
+    matched: bool = False
+
+    @property
+    def name(self) -> str:
+        """Return the current flow's name."""
+        return self.current.name.data
+
+    @property
+    def unit(self) -> str:
+        """Return the current flow's unit."""
+        return self.current.unit.data
+
+    @property
+    def context(self) -> str | list[str] | tuple[str]:
+        """Return the current flow's context."""
+        return self.current.context.value
+
+    @property
+    def identifier(self) -> str | None:
+        """Return the current flow's identifier."""
+        return self.current.identifier
+
+    @property
+    def location(self) -> str | None:
+        """Return the current flow's location."""
+        return self.current.location
+
+    @property
+    def oxidation_state(self) -> int | None:
+        """Return the current flow's oxidation state value."""
+        return (
+            self.current.oxidation_state.value if self.current.oxidation_state else None
+        )
+
+    @property
+    def cas_number(self) -> str | None:
+        """Return the current flow's CAS number."""
+        return self.current.cas_number.data if self.current.cas_number else None
+
+    @property
+    def synonyms(self) -> list[str] | None:
+        """Return the current flow's synonyms."""
+        return self.current.synonyms
+
+    @property
+    def id(self) -> int:
+        """Return the original flow's internal ID."""
+        return self.original._id
+
+    def reset_current(self) -> None:
+        """
+        Reset the current flow to the normalized state.
+
+        This method creates a copy of the normalized flow and sets it as the
+        current flow. Useful after applying temporary transformations.
+        """
+        self.current = copy(self.normalized)
+
+    def update_current(self, **kwargs) -> None:
+        """
+        Update the current flow with new attribute values.
+
+        This method creates a new Flow based on the normalized flow's data,
+        updated with the provided keyword arguments. The normalized flow
+        remains unchanged.
+
+        Parameters
+        ----------
+        **kwargs
+            Keyword arguments corresponding to Flow attributes (name, unit,
+            context, location, etc.).
+
+        Examples
+        --------
+        >>> nf.update_current(name="Modified name", unit="g")
+        """
+        from flowmapper.domain.flow import Flow
+
+        data = self.normalized.to_dict()
+        data.update(kwargs)
+        self.current = Flow.from_dict(data)
+
+    @staticmethod
+    def from_dict(data: dict) -> NormalizedFlow:
+        """
+        Create a NormalizedFlow from a dictionary.
+
+        This method creates the original flow, normalizes it, and sets up
+        the NormalizedFlow with all three states.
+
+        Parameters
+        ----------
+        data : dict
+            Dictionary containing flow data.
+
+        Returns
+        -------
+        NormalizedFlow
+            A new NormalizedFlow instance.
+        """
+        from flowmapper.domain.flow import Flow
+
+        original = Flow.from_dict(data)
+        # Do data preprocessing here
+        normalized = original.normalize()
+        return NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+    def unit_compatible(self, other: Self) -> bool:
+        """
+        Check if this flow's unit is compatible with another flow's unit.
+
+        Parameters
+        ----------
+        other : NormalizedFlow
+            Another NormalizedFlow to compare units with.
+
+        Returns
+        -------
+        bool
+            True if the units are compatible (can be converted), False otherwise.
+        """
+        return self.current.unit.compatible(other.current.unit)
+
+    def conversion_factor(self, other: Self) -> float:
+        """
+        Calculate the conversion factor from this flow's unit to another flow's unit.
+
+        Parameters
+        ----------
+        other : NormalizedFlow
+            Another NormalizedFlow to convert to.
+
+        Returns
+        -------
+        float
+            The conversion factor to multiply this flow's value by to get the
+            equivalent value in the other flow's unit.
+        """
+        from_transformation = self.current.conversion_factor or 1.0
+        return from_transformation * self.current.unit.conversion_factor(
+            other.current.unit
+        )
+
+    def export(self) -> dict:
+        """
+        Export the flow data for serialization.
+
+        Returns a dictionary containing the original flow's data, suitable
+        for JSON serialization or export to external formats.
+
+        Returns
+        -------
+        dict
+            Dictionary containing flow data with only non-None values.
+        """
+        data = [
+            ("name", self.original.name.data),
+            ("unit", self.original.unit.data),
+            ("context", self.original.context.value),
+            ("identifier", self.original.identifier),
+            ("location", self.original.location),
+            (
+                "cas_number",
+                (
+                    self.normalized.cas_number.export()
+                    if self.normalized.cas_number
+                    else None
+                ),
+            ),
+        ]
+        return {k: v for k, v in data if v}
+
+    def __repr__(self) -> str:
+        """Return a string representation showing non-None attributes of original and current."""
+        return f"""NormalizedFlow(
+    original={self.original!r}
+    current={self.current!r}
+    matched={self.matched!r}
+)"""
diff --git a/flowmapper/errors.py b/src/flowmapper/errors.py
similarity index 66%
rename from flowmapper/errors.py
rename to src/flowmapper/errors.py
index 6f512ca..e0e7d13 100644
--- a/flowmapper/errors.py
+++ b/src/flowmapper/errors.py
@@ -4,3 +4,7 @@ class DifferingMatches(Exception):
 
 class DifferingConversions(Exception):
     """Multiple, different conversion factors provided for a given match"""
+
+
+class MissingLocation(Exception):
+    """Expected a location element in a name, but didn't find any"""
diff --git a/src/flowmapper/extraction/__init__.py b/src/flowmapper/extraction/__init__.py
new file mode 100644
index 0000000..ca59247
--- /dev/null
+++ b/src/flowmapper/extraction/__init__.py
@@ -0,0 +1,6 @@
+# from flowmapper.extraction.ecoinvent import ecoinvent_biosphere_extractor
+from flowmapper.extraction.ecospold2 import ecospold2_biosphere_extractor
+from flowmapper.extraction.simapro_csv import simapro_csv_biosphere_extractor
+from flowmapper.extraction.simapro_ecospold1 import (
+    simapro_ecospold1_biosphere_extractor,
+)
diff --git a/flowmapper/extraction/ecospold2.py b/src/flowmapper/extraction/ecospold2.py
similarity index 92%
rename from flowmapper/extraction/ecospold2.py
rename to src/flowmapper/extraction/ecospold2.py
index 7a2ec9f..9489a84 100644
--- a/flowmapper/extraction/ecospold2.py
+++ b/src/flowmapper/extraction/ecospold2.py
@@ -20,7 +20,7 @@ def reformat(obj: dict) -> dict:
     elif obj.get("synonym") and "#text" in obj["synonym"]:
         data["synonyms"] = [obj["synonym"]["#text"]]
     if "@casNumber" in obj:
-        data["CAS number"] = obj["@casNumber"]
+        data["cas_number"] = obj["@casNumber"]
     return data
 
 
@@ -43,7 +43,9 @@ def remove_conflicting_synonyms(data: list[dict]) -> list[dict]:
         if not (obj.get("synonyms") and obj.get("context")):
             continue
         obj["synonyms"] = [
-            syn for syn in obj["synonyms"] if syn.lower() not in base_names[obj["context"][0]]
+            syn
+            for syn in obj["synonyms"]
+            if syn.lower() not in base_names[obj["context"][0]]
         ]
 
     return data
diff --git a/flowmapper/extraction/simapro_csv.py b/src/flowmapper/extraction/simapro_csv.py
similarity index 81%
rename from flowmapper/extraction/simapro_csv.py
rename to src/flowmapper/extraction/simapro_csv.py
index 64a0313..d6bd383 100644
--- a/flowmapper/extraction/simapro_csv.py
+++ b/src/flowmapper/extraction/simapro_csv.py
@@ -2,7 +2,9 @@
 from pathlib import Path
 
 import bw_simapro_csv
-from loguru import logger
+import structlog
+
+logger = structlog.get_logger("filemapper")
 
 
 def is_simapro_csv_file(fp: Path) -> bool:
@@ -14,7 +16,9 @@ def is_simapro_csv_file(fp: Path) -> bool:
         ].project
         return True
     except:
-        logger.critical("Skipping {a} as we can't read it as a SimaPro file", a=fp.name)
+        logger.critical(
+            "Skipping file %s as we can't read it as a SimaPro file", fp.name
+        )
         return False
 
 
@@ -46,7 +50,8 @@ def simapro_csv_biosphere_extractor(input_path: Path, output_path: Path) -> None
                 process.blocks.values(),
             ):
                 for line in block.parsed:
-                    flows.add((line["context"], line["name"], line["unit"]))
+                    # Restore context to single string as this is expected in our mapping
+                    flows.add(("/".join(line["context"]), line["name"], line["unit"]))
 
     with open(output_path, "w") as f:
         json.dump(
diff --git a/flowmapper/extraction/simapro_ecospold1.py b/src/flowmapper/extraction/simapro_ecospold1.py
similarity index 64%
rename from flowmapper/extraction/simapro_ecospold1.py
rename to src/flowmapper/extraction/simapro_ecospold1.py
index 148d99c..224e704 100644
--- a/flowmapper/extraction/simapro_ecospold1.py
+++ b/src/flowmapper/extraction/simapro_ecospold1.py
@@ -2,17 +2,6 @@
 from pathlib import Path
 
 import pyecospold
-from loguru import logger
-
-# def is_simapro_csv_file(fp: Path) -> bool:
-#     if not fp.is_file() or not fp.suffix.lower() == ".csv":
-#         return False
-#     try:
-#         bw_simapro_csv.header.parse_header(open(fp, encoding="sloppy-windows-1252"))[0].project
-#         return True
-#     except:
-#         logger.critical("Skipping {a} as we can't read it as a SimaPro file", a=fp.name)
-#         return False
 
 
 def simapro_ecospold1_biosphere_extractor(dirpath: Path, output_fp: Path) -> None:
diff --git a/src/flowmapper/fields/__init__.py b/src/flowmapper/fields/__init__.py
new file mode 100644
index 0000000..30192aa
--- /dev/null
+++ b/src/flowmapper/fields/__init__.py
@@ -0,0 +1,24 @@
+"""Field classes and utilities for Flow attributes.
+
+This package contains field classes and related utilities used by Flow objects:
+- CASField: Chemical Abstracts Service registry number field
+- ContextField: Context field for flow categorization
+- StringField: String field with normalization support
+- OxidationState: Oxidation state representation
+- Location utilities: Functions for extracting and manipulating location codes
+"""
+
+from flowmapper.fields.cas import CASField
+from flowmapper.fields.context import ContextField
+from flowmapper.fields.location import replace_location_suffix, split_location_suffix
+from flowmapper.fields.oxidation_state import OxidationState
+from flowmapper.fields.string_field import StringField
+
+__all__ = [
+    "CASField",
+    "ContextField",
+    "StringField",
+    "OxidationState",
+    "replace_location_suffix",
+    "split_location_suffix",
+]
diff --git a/src/flowmapper/fields/cas.py b/src/flowmapper/fields/cas.py
new file mode 100644
index 0000000..880bb3d
--- /dev/null
+++ b/src/flowmapper/fields/cas.py
@@ -0,0 +1,69 @@
+import re
+from collections import UserString
+from functools import cached_property
+from typing import Any
+
+valid_cas = re.compile(r"^\s*[0-9]{2,7}-[0-9]{2}-[0-9]{1}\s*$")
+
+
+class CASField(UserString):
+    def __init__(self, string: str):
+        if not isinstance(string, (str, UserString)):
+            raise TypeError(
+                f"CASField takes only `str`, but got {type(string)} for {string}"
+            )
+        if not valid_cas.search(str(string)):
+            raise ValueError(f"Given input is not valid CAS formatting: '{string}'")
+        super().__init__(str(string))
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, CASField):
+            return self.data == other.data
+        elif isinstance(other, (str, UserString)):
+            other_cas = CASField.from_string(str(other))
+            if other_cas is None:
+                return False
+            return self.data == other_cas.data
+        return False
+
+    @staticmethod
+    def from_string(string: str | None) -> "CASField | None":
+        """Returns `None` if CAS number is invalid"""
+        if string is None or not isinstance(string, (str, UserString)):
+            return None
+        new_cas = CASField(string.strip().lstrip("0").strip())
+        if not new_cas.valid():
+            return None
+        return new_cas
+
+    @property
+    def digits(self) -> list[int]:
+        return [int(d) for d in self.data.replace("-", "")]
+
+    def export(self):
+        return "{}-{}-{}".format(
+            "".join([str(x) for x in self.digits[:-3]]),
+            "".join([str(x) for x in self.digits[-3:-1]]),
+            self.digits[-1],
+        )
+
+    @cached_property
+    def check_digit_expected(self):
+        """
+        Expected digit acording to https://www.cas.org/support/documentation/chemical-substances/checkdig algorithm
+        """
+        result = (
+            sum(
+                [
+                    index * value
+                    for index, value in enumerate(self.digits[-2::-1], start=1)
+                ]
+            )
+            % 10
+        )
+        return result
+
+    def valid(self):
+        return (self.digits[-1] == self.check_digit_expected) and bool(
+            valid_cas.search(self.data)
+        )
diff --git a/src/flowmapper/fields/context.py b/src/flowmapper/fields/context.py
new file mode 100644
index 0000000..b610296
--- /dev/null
+++ b/src/flowmapper/fields/context.py
@@ -0,0 +1,75 @@
+from collections.abc import Iterable
+from typing import Any, Self
+
+from flowmapper.utils import as_normalized_tuple
+
+RESOURCE_CATEGORY = {
+    "natural resources",
+    "natural resource",
+    "resources",
+    "resource",
+    "land use",
+    "economic",
+    "social",
+    "raw materials",
+    "raw",
+}
+
+
+class ContextField:
+    def __init__(self, value: str | list[str] | tuple[str]):
+        self.value = value
+
+    def normalize(self, obj: Any | None = None, mapping: dict | None = None) -> Self:
+        return type(self)(value=as_normalized_tuple(value=obj or self.value))
+
+    def is_resource(self) -> bool:
+        if isinstance(self.value, str):
+            return any(cat in self.value.lower() for cat in RESOURCE_CATEGORY)
+        else:
+            lowered = [elem.lower() for elem in self.value]
+            return any(cat in lowered for cat in RESOURCE_CATEGORY)
+
+    def as_tuple(self) -> tuple | str:
+        if isinstance(self.value, str):
+            return self.value
+        return tuple(self.value)
+
+    def export_as_string(self, join_character: str = "✂️"):
+        if isinstance(self.value, (list, tuple)):
+            return join_character.join(self.value)
+        return self.value
+
+    def __iter__(self) -> Iterable:
+        return iter(self.value)
+
+    def __eq__(self, other: Any) -> bool:
+        if self and other and isinstance(other, ContextField):
+            return self.value == other.value
+        else:
+            try:
+                return self.value == self.normalize(other).value
+            except ValueError:
+                return False
+
+    def __repr__(self) -> str:
+        return str(self.value)
+
+    def __bool__(self) -> bool:
+        return bool(self.value)
+
+    def __hash__(self) -> int:
+        return hash(self.value)
+
+    def __contains__(self, other: Any) -> bool:
+        """`self` context is more generic than the `other` context.
+
+        ```python
+        Context("a/b") in Context("a/b/c")
+        >>> True
+        ```
+
+        """
+        if not isinstance(other, ContextField):
+            return False
+        return self.value == other.value[: len(self.value)]
diff --git a/src/flowmapper/fields/location.py b/src/flowmapper/fields/location.py
new file mode 100644
index 0000000..d994e34
--- /dev/null
+++ b/src/flowmapper/fields/location.py
@@ -0,0 +1,153 @@
+"""
+Location code extraction and manipulation utilities.
+
+This module provides functions for working with location codes that appear as
+suffixes in flow names. Location codes are typically appended to flow names
+in the format ", <location>" where location is a recognized location code
+from the places.json data file.
+
+The module uses a compiled regex pattern (ends_with_location) to identify
+location codes at the end of strings, following the pattern of a comma,
+whitespace, and a recognized location code.
+"""
+
+import importlib.resources as resource
+import json
+import re
+from pathlib import Path
+
+import structlog
+
+from flowmapper.errors import MissingLocation
+
+logger = structlog.get_logger("flowmapper")
+
+RESULTS_DIR = Path(__file__).parent.parent / "manual_matching" / "results"
+
+with resource.as_file(
+    resource.files("flowmapper") / "data" / "places.json"
+) as filepath:
+    places = json.load(open(filepath))
+
+# Compiled regex pattern that matches location codes at the end of strings.
+# Pattern matches: comma (not preceded by whitespace), one or more spaces,
+# followed by a recognized location code from places.json, optionally followed
+# by whitespace, at the end of the string.
+# The location code is captured in a named group "location".
+ends_with_location = re.compile(
+    r"(?<!\s),\s+(?P<location>{})\s*$".format(
+        "|".join([re.escape(string) for string in places])
+    ),
+)
+# All solutions I found for returning original string instead of
+# lower case one were very ugly
+# location_reverser = {obj.lower(): obj for obj in places}
+# if len(location_reverser) != len(places):
+#     raise ValueError("Multiple possible locations after lower case conversion")
+
+
+# us_lci_ends_with_location = re.compile(
+#     "/(?P<location>{})$".format(
+#         "|".join(
+#             [
+#                 re.escape(string)
+#                 for string in places
+#                 if 2 <= len(string) <= 3 and string.upper() == string
+#             ]
+#         )
+#     ),
+# )
+
+with resource.as_file(
+    resource.files("flowmapper") / "data" / "names_and_locations.json"
+) as filepath:
+    names_and_locations = {o["source"]: o for o in json.load(open(filepath))}
+
+
+def split_location_suffix(string: str) -> tuple[str, str | None]:
+    """
+    Split a string into name and location code if a location suffix is present.
+
+    This function searches for a location code at the end of the input string
+    using the ends_with_location regex pattern. If found, it returns the name
+    part (without the location suffix) and the location code. If no location
+    is found, it returns the original string and None.
+
+    The location code must appear at the end of the string in the format
+    ", <location>" where the comma is not preceded by whitespace, followed
+    by one or more spaces, and then a recognized location code.
+
+    Parameters
+    ----------
+    string : str
+        The input string that may contain a location suffix at the end.
+
+    Returns
+    -------
+    tuple[str, str | None]
+        A tuple containing:
+        - The name part without the location suffix (or original string if no
+          location found)
+        - The location code if found, otherwise None
+
+    Examples
+    --------
+    >>> split_location_suffix("Ammonia, NL")
+    ('Ammonia', 'NL')
+    >>> split_location_suffix("Ammonia, pure, NL")
+    ('Ammonia, pure', 'NL')
+    >>> split_location_suffix("Ammonia")
+    ('Ammonia', None)
+    >>> split_location_suffix("Ammonia, NL, pure")
+    ('Ammonia, NL, pure', None)
+    >>> split_location_suffix(", NL")
+    ('', 'NL')
+    """
+    if match := ends_with_location.search(string):
+        return string[: match.start()], match.group("location")
+    return string, None
+
+
+def replace_location_suffix(string: str, new_location: str) -> str:
+    """
+    Replace the location value found by ends_with_location regex with a new value.
+
+    If the string ends with a location code (matched by ends_with_location regex),
+    replace it with the new location value. If no location is found, raises
+    ValueError.
+
+    Parameters
+    ----------
+    string : str
+        The input string that must contain a location suffix at the end.
+    new_location : str
+        The new location value to replace the existing location with.
+
+    Returns
+    -------
+    str
+        The string with the location replaced.
+
+    Raises
+    ------
+    MissingLocation
+        If no location suffix is found in the input string.
+
+    Examples
+    --------
+    >>> replace_location_suffix("Ammonia, NL", "DE")
+    'Ammonia, DE'
+    >>> replace_location_suffix("Ammonia, pure, NL", "FR")
+    'Ammonia, pure, FR'
+    >>> replace_location_suffix("Ammonia", "DE")
+    Traceback (most recent call last):
+        ...
+    MissingLocation: No location suffix found in string 'Ammonia'
+    """
+    if match := ends_with_location.search(string):
+        return (
+            string[: match.start("location")]
+            + new_location
+            + string[match.end("location") :]
+        )
+    raise MissingLocation(f"No location suffix found in string {string!r}")
diff --git a/src/flowmapper/fields/oxidation_state.py b/src/flowmapper/fields/oxidation_state.py
new file mode 100644
index 0000000..730f14d
--- /dev/null
+++ b/src/flowmapper/fields/oxidation_state.py
@@ -0,0 +1,62 @@
+import re
+from typing import Any, Self
+
+import roman
+
+roman_numberals_optional_parentheses = re.compile(
+    r"[\,\s]+\(?\s*(?P<numeral>[IVX]+)\s*(?P<sign>[+-]*)\)?\s*$",
+    flags=re.IGNORECASE,
+)
+numbers_optional_parentheses = re.compile(
+    r"[\,\s]+\(?\s*(?P<sign>[+-]+)(?P<numeral>[0-9]+)\)?\s*$"
+)
+
+
+class OxidationState:
+    def __init__(self, value: int):
+        self.value = value
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, OxidationState):
+            return self.value == other.value
+        else:
+            return self.value == other
+
+    def __hash__(self) -> int:
+        return hash(self.value)
+
+    def __repr__(self) -> str:
+        return str(self.value)
+
+    @staticmethod
+    def has_oxidation_state(obj: str) -> bool:
+        return roman_numberals_optional_parentheses.search(
+            obj
+        ) or numbers_optional_parentheses.search(obj)
+
+    @classmethod
+    def from_string(cls, obj: str) -> tuple[Self, str]:
+        if match := roman_numberals_optional_parentheses.search(obj):
+            obj_dict = match.groupdict()
+            try:
+                value = roman.fromRoman(obj_dict["numeral"].upper())
+            except roman.InvalidRomanNumeralError:
+                raise ValueError(
+                    f"{obj_dict['numeral']} in string {obj} is not a valid roman numeral"
+                )
+            if "-" in obj_dict["sign"]:
+                value *= -1
+        elif match := numbers_optional_parentheses.search(obj):
+            obj_dict = match.groupdict()
+            value = eval(obj_dict["numeral"].lstrip("0"))
+            if "-" in obj_dict["sign"]:
+                value *= -1
+        else:
+            raise ValueError("No match found")
+
+        if value < -5 or value > 9:
+            raise ValueError(
+                f"Oxidation state {value} from name {obj} is outside physical bounds of [-5, +9]"
+            )
+
+        return OxidationState(value), obj[: match.start()]
diff --git a/src/flowmapper/fields/string_field.py b/src/flowmapper/fields/string_field.py
new file mode 100644
index 0000000..33e975f
--- /dev/null
+++ b/src/flowmapper/fields/string_field.py
@@ -0,0 +1,23 @@
+from collections import UserString
+from typing import Any, Self
+
+from flowmapper.utils import normalize_str
+
+
+class StringField(UserString):
+    def normalize(self, lowercase: bool = True) -> Self:
+        value = normalize_str(self.data)
+        if lowercase:
+            value = value.lower()
+        return type(self)(value)
+
+    def __eq__(self, other: Any) -> bool:
+        if not self.data:
+            # Empty strings aren't equal for our use case
+            return False
+        elif isinstance(other, StringField):
+            return self.data == other.data
+        elif isinstance(other, str):
+            return self.data == other or self.data == normalize_str(other)
+        else:
+            return False
diff --git a/src/flowmapper/flowmap.py b/src/flowmapper/flowmap.py
new file mode 100644
index 0000000..6477ddd
--- /dev/null
+++ b/src/flowmapper/flowmap.py
@@ -0,0 +1,723 @@
+from collections import Counter
+from collections.abc import Callable
+from functools import cached_property
+from pathlib import Path
+from time import time
+
+import pandas as pd
+import randonneur
+from structlog import get_logger
+
+from flowmapper import __version__
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match import Match
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.matching import match_rules
+from flowmapper.utils import apply_transformation_and_convert_flows_to_normalized_flows
+
+logger = get_logger("flowmapper")
+
+
+class Flowmap:
+    """
+    Crosswalk of flows from a source flow list to a target flow list.
+
+    The Flowmap class manages the mapping process between source and target flow lists
+    using a series of matching rules. It tracks matches, generates statistics, and
+    provides export functionality for various formats (randonneur, GLAD).
+
+    The class applies matching rules sequentially to find correspondences between
+    source and target flows. As matches are found, source flows are marked as matched
+    and excluded from subsequent rule applications. New target flows can be created
+    during the matching process and added to the target flow list.
+
+    Attributes
+    ----------
+    source_flows : list[NormalizedFlow]
+        The list of source flows to be mapped. These flows are checked against
+        matching rules to find correspondences with target flows.
+    target_flows : list[NormalizedFlow]
+        The list of target flows for mapping. This list can grow during matching
+        if new target flows are created by matching rules.
+    matches : list[Match]
+        List of Match objects representing successful mappings between source
+        and target flows. Initially empty, populated by `generate_matches()`.
+    rules : list[Callable[..., list[Match]]]
+        List of matching rule functions to apply. Each rule is a callable that
+        takes source_flows and target_flows and returns a list of Match objects.
+    data_preparation_functions : list[Callable[..., list[NormalizedFlow]]]
+        List of transformation functions used to prepare flows for matching and
+        to normalize newly created target flows.
+    show_progressbar : bool
+        Whether to display a progress bar during matching (currently not used).
+
+    Examples
+    --------
+    >>> from flowmapper.domain.flow import Flow
+    >>> from flowmapper.domain.normalized_flow import NormalizedFlow
+    >>> from flowmapper.flowmap import Flowmap
+    >>> from copy import copy
+    >>>
+    >>> # Create source and target flows
+    >>> source_flow = Flow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> source_nf = NormalizedFlow(
+    ...     original=source_flow,
+    ...     normalized=source_flow.normalize(),
+    ...     current=copy(source_flow.normalize())
+    ... )
+    >>>
+    >>> target_flow = Flow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> target_nf = NormalizedFlow(
+    ...     original=target_flow,
+    ...     normalized=target_flow.normalize(),
+    ...     current=copy(target_flow.normalize())
+    ... )
+    >>>
+    >>> # Create Flowmap and generate matches
+    >>> flowmap = Flowmap(
+    ...     source_flows=[source_nf],
+    ...     target_flows=[target_nf],
+    ...     data_preparation_functions=[]
+    ... )
+    >>> flowmap.generate_matches()
+    >>> len(flowmap.matches)
+    1
+    """
+
+    def __init__(
+        self,
+        source_flows: list[NormalizedFlow],
+        target_flows: list[NormalizedFlow],
+        data_preparation_functions: list[Callable[..., list[NormalizedFlow]]],
+        rules: list[Callable[..., list[Match]]] | None = None,
+        show_progressbar: bool = True,
+    ):
+        """
+        Initialize a Flowmap with source and target flows.
+
+        Creates a new Flowmap instance to manage the mapping process between
+        source and target flow lists. The matching rules and data preparation
+        functions are set up for use during the matching process.
+
+        Parameters
+        ----------
+        source_flows : list[NormalizedFlow]
+            The list of source flows to be mapped. These flows will be checked
+            against matching rules to find correspondences with target flows.
+        target_flows : list[NormalizedFlow]
+            The list of target flows for mapping. This list can grow during
+            matching if new target flows are created.
+        data_preparation_functions : list[Callable[..., list[NormalizedFlow]]]
+            List of transformation functions used to prepare flows for matching.
+            These functions are also used to normalize newly created target flows
+            when they are added via `add_new_target_flows()`.
+        rules : list[Callable[..., list[Match]]] | None, optional
+            Custom matching rules to use. Each rule is a callable that takes
+            `source_flows` and `target_flows` as arguments and returns a list
+            of Match objects. If None, defaults to the rules returned by
+            `match_rules()`.
+        show_progressbar : bool, default=True
+            Whether to show a progress bar during matching (currently not
+            implemented).
+
+        Notes
+        -----
+        - The `matches` list is initialized as empty and populated by calling
+          `generate_matches()`.
+        - Source flows are filtered by their `matched` attribute during rule
+          application, so only unmatched flows are passed to each rule.
+        - New target flows created during matching are automatically normalized
+          using the data preparation functions before being added to the target
+          flow list.
+
+        Examples
+        --------
+        >>> from flowmapper.flowmap import Flowmap
+        >>> from flowmapper.matching import match_rules
+        >>>
+        >>> flowmap = Flowmap(
+        ...     source_flows=[source_nf1, source_nf2],
+        ...     target_flows=[target_nf1, target_nf2],
+        ...     data_preparation_functions=[],
+        ...     rules=match_rules()
+        ... )
+        """
+        self.show_progressbar = show_progressbar
+        self.rules = rules if rules else match_rules()
+        self.data_preparation_functions = data_preparation_functions
+        self.source_flows = source_flows
+        self.target_flows = target_flows
+        self.matches = []
+
+    @cached_property
+    def _matched_source_flows_ids(self) -> set[int]:
+        """Get a set of source flow IDs that have been matched.
+
+        Returns
+        -------
+        set[int]
+            Set of internal IDs (_id) from source flows that appear in matches.
+            Empty set if no matches exist.
+
+        Notes
+        -----
+        - This is a cached property used internally by `matched_source()` and
+          `unmatched_source` to efficiently determine which flows have been matched
+        - The cache is invalidated when `matches` changes
+        """
+        return {match.source._id for match in self.matches}
+
+    def generate_matches(self) -> None:
+        """Generate matches by applying all matching rules sequentially.
+
+        This method iterates through all matching rules and applies them to
+        find correspondences between source and target flows. For each rule:
+        1. Filters source flows to only include unmatched flows
+        2. Calls the rule function with unmatched source flows and all target flows
+        3. Extends the matches list with results from the rule
+        4. If any matches create new target flows, adds them to the target flow list
+        5. Logs the number of matches found and time taken
+
+        After this method completes, the `matches` list contains all matches
+        found by all rules, and source flows that were matched will have their
+        `matched` attribute set to True.
+
+        Notes
+        -----
+        - Rules are applied in the order they appear in `self.rules`
+        - Each rule only receives source flows that haven't been matched yet
+        - New target flows are automatically normalized before being added
+        - The method logs information about each rule's performance
+
+        Examples
+        --------
+        >>> flowmap = Flowmap(
+        ...     source_flows=[source_nf],
+        ...     target_flows=[target_nf],
+        ...     data_preparation_functions=[]
+        ... )
+        >>> flowmap.generate_matches()
+        >>> len(flowmap.matches)
+        1
+        """
+        for rule in self.rules:
+            start = time()
+            result = rule(
+                source_flows=[flow for flow in self.source_flows if not flow.matched],
+                target_flows=self.target_flows,
+            )
+            elapsed = time() - start
+
+            if new_target_flows := [
+                obj.target for obj in result if obj.new_target_flow
+            ]:
+                self.add_new_target_flows(new_target_flows)
+                logger.info(
+                    f"Match function {rule.__name__} produced {len(result)} matches and added {len(new_target_flows)} new target flows. It took {elapsed:.3} seconds."
+                )
+            else:
+                logger.info(
+                    f"Match function {rule.__name__} produced {len(result)} matches. It took {elapsed:.3} seconds."
+                )
+            self.matches.extend(result)
+
+    def add_new_target_flows(self, flows: list[Flow]) -> None:
+        """Add new target flows to the target flow list.
+
+        This method is called automatically by `generate_matches()` when a
+        matching rule creates new target flows (indicated by `new_target_flow=True`
+        in Match objects). The new flows are normalized using the data
+        preparation functions before being added to the target flow list.
+
+        Parameters
+        ----------
+        flows : list[Flow]
+            List of Flow objects to add as new target flows. These flows are
+            normalized using `data_preparation_functions` before being added.
+
+        Notes
+        -----
+        - The flows are normalized using `apply_transformation_and_convert_flows_to_normalized_flows`
+        - Normalized flows are appended to `self.target_flows`
+        - This method is typically called automatically during `generate_matches()`
+
+        Examples
+        --------
+        >>> new_flow = Flow.from_dict({
+        ...     "name": "New flow",
+        ...     "context": "air",
+        ...     "unit": "kg"
+        ... })
+        >>> flowmap.add_new_target_flows([new_flow])
+        >>> len(flowmap.target_flows)
+        2
+        """
+        normalized_flows = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=self.data_preparation_functions, flows=flows
+        )
+        self.target_flows.extend(normalized_flows)
+
+    def matched_source(self) -> list[NormalizedFlow]:
+        """Get a list of source flows that have been successfully matched.
+
+        Returns all source flows that have been matched to at least one target
+        flow. A source flow is considered matched if its ID appears in any
+        Match object in the `matches` list.
+
+        Returns
+        -------
+        list[NormalizedFlow]
+            List of NormalizedFlow objects that have been matched. The list
+            is empty if no matches have been generated yet.
+
+        Notes
+        -----
+        - Uses the `_matched_source_flows_ids` cached property to determine
+          which flows have been matched
+        - Returns flows in the same order as they appear in `source_flows`
+        - Call `generate_matches()` first to populate matches
+
+        Examples
+        --------
+        >>> flowmap.generate_matches()
+        >>> matched = flowmap.matched_source()
+        >>> len(matched)
+        1
+        >>> matched[0].matched
+        True
+        """
+        result = [
+            flow
+            for flow in self.source_flows
+            if flow.id in self._matched_source_flows_ids
+        ]
+        return result
+
+    @cached_property
+    def unmatched_source(self) -> list[NormalizedFlow]:
+        """Get a list of source flows that have not been matched.
+
+        Returns all source flows that have not been matched to any target flow.
+        A source flow is considered unmatched if its ID does not appear in any
+        Match object in the `matches` list.
+
+        Returns
+        -------
+        list[NormalizedFlow]
+            List of NormalizedFlow objects that have not been matched. Returns
+            all source flows if no matches have been generated yet.
+
+        Notes
+        -----
+        - This is a cached property, so it's computed once and cached
+        - Uses the `_matched_source_flows_ids` cached property to determine
+          which flows have been matched
+        - Returns flows in the same order as they appear in `source_flows`
+        - The cache is invalidated if the `matches` list changes
+
+        Examples
+        --------
+        >>> flowmap.generate_matches()
+        >>> unmatched = flowmap.unmatched_source
+        >>> len(unmatched)
+        0
+        """
+        result = [
+            flow
+            for flow in self.source_flows
+            if flow.id not in self._matched_source_flows_ids
+        ]
+        return result
+
+    def matched_source_statistics(self) -> pd.DataFrame:
+        """Calculate matching statistics grouped by source flow context.
+
+        Computes statistics showing how many source flows were matched for each
+        context, including the total number of source flows per context and
+        the matching percentage.
+
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame with columns:
+            - `context`: The context value
+            - `matched`: Number of matches for this context
+            - `total`: Total number of source flows in this context
+            - `percent`: Matching percentage (matched / total)
+            Rows are sorted by matching percentage (ascending).
+
+        Notes
+        -----
+        - Contexts with no matches will have `matched=0`
+        - Contexts with no source flows will have `total=0`
+        - Percentages are calculated as matched/total, which may be > 1.0 if
+          multiple matches exist per source flow
+        - Results are sorted by percentage (lowest first)
+
+        Examples
+        --------
+        >>> flowmap.generate_matches()
+        >>> stats = flowmap.matched_source_statistics()
+        >>> stats.columns.tolist()
+        ['context', 'matched', 'total', 'percent']
+        """
+        matched = pd.Series(
+            Counter([flow.source.context.value for flow in self.matches])
+        ).reset_index()
+        matched.columns = ["context", "matched"]
+
+        total = pd.Series(
+            Counter([flow.original.context.value for flow in self.source_flows])
+        ).reset_index()
+        total.columns = ["context", "total"]
+
+        df = pd.merge(matched, total, on="context", how="outer")
+        df = df.fillna(0).astype({"matched": "int", "total": "int"})
+
+        df["percent"] = df.matched / df.total
+        result = df.sort_values("percent")
+        return result
+
+    @cached_property
+    def matched_target_statistics(self) -> pd.DataFrame:
+        """Calculate matching statistics grouped by target flow context.
+
+        Computes statistics showing how many target flows were matched for each
+        context, including the total number of target flows per context and
+        the matching percentage.
+
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame with columns:
+            - `context`: The context value
+            - `matched`: Number of matches for this context
+            - `total`: Total number of target flows in this context
+            - `percent`: Matching percentage (matched / total)
+            Rows are sorted by matching percentage (ascending).
+
+        Notes
+        -----
+        - This is a cached property, so it's computed once and cached
+        - Contexts with no matches will have `matched=0`
+        - Contexts with no target flows will have `total=0`
+        - Percentages are calculated as matched/total, which may be > 1.0 if
+          multiple matches exist per target flow
+        - Results are sorted by percentage (lowest first)
+        - The cache is invalidated if the `matches` or `target_flows` lists change
+
+        Examples
+        --------
+        >>> flowmap.generate_matches()
+        >>> stats = flowmap.matched_target_statistics
+        >>> stats.columns.tolist()
+        ['context', 'matched', 'total', 'percent']
+        """
+        matched = pd.Series(
+            Counter([flow.target.context.value for flow in self.matches])
+        ).reset_index()
+        matched.columns = ["context", "matched"]
+
+        total = pd.Series(
+            Counter([flow.original.context.value for flow in self.target_flows])
+        ).reset_index()
+        total.columns = ["context", "total"]
+
+        df = pd.merge(matched, total, on="context", how="outer")
+        df = df.fillna(0).astype({"matched": "int", "total": "int"})
+
+        df["percent"] = df.matched / df.total
+        result = df.sort_values("percent")
+        return result
+
+    def print_statistics(self) -> None:
+        """Print summary statistics for the flow mapping process.
+
+        Displays a formatted summary including:
+        - Number of source and target flows
+        - Total number of matches and percentage of source flows matched
+        - Cardinality distribution of mappings (1:1, 1:N, N:1, N:M)
+
+        The output is printed to stdout in a human-readable format.
+
+        Notes
+        -----
+        - Percentage is calculated as matches / source_flows
+        - Cardinalities are computed by `cardinalities()` method
+        - This method prints to stdout, so it's suitable for interactive use
+          but may need to be captured or redirected in automated contexts
+
+        Examples
+        --------
+        >>> flowmap.generate_matches()
+        >>> flowmap.print_statistics()
+        1 source and 1 target flows.
+        1 mappings (100.00% of total).
+        Mappings cardinalities: {'1:1': 1}
+        """
+        cardinalities = dict(Counter([x["cardinality"] for x in self.cardinalities()]))
+        percentage = (
+            len(self.matches) / len(self.source_flows) if self.source_flows else 0.0
+        )
+        print(
+            f"""{len(self.source_flows)} source and {len(self.target_flows)} target flows.
+{len(self.matches)} mappings ({percentage:.2%} of total).
+Mappings cardinalities: {str(cardinalities)}"""
+        )
+
+    def cardinalities(self) -> list[dict[str, int | str]]:
+        """Calculate and return the cardinality of each mapping.
+
+        Determines the relationship type (1:1, 1:N, N:1, or N:M) for each
+        match based on how many matches each source and target flow participate in.
+
+        Returns
+        -------
+        list[dict[str, int | str]]
+            List of dictionaries, each containing:
+            - `from`: Source flow internal ID
+            - `to`: Target flow internal ID
+            - `cardinality`: Relationship type as string ("1:1", "1:N", "N:1", or "N:M")
+            Results are sorted by source flow ID.
+
+        Notes
+        -----
+        - **1:1**: One source maps to one target, and that target maps only to this source
+        - **1:N**: One source maps to multiple targets
+        - **N:1**: Multiple sources map to the same target
+        - **N:M**: Multiple sources map to multiple targets (many-to-many)
+        - Cardinality is determined by counting how many matches each source
+          and target flow ID appears in
+
+        Examples
+        --------
+        >>> flowmap.generate_matches()
+        >>> card = flowmap.cardinalities()
+        >>> card[0]
+        {'from': 0, 'to': 0, 'cardinality': '1:1'}
+        """
+        mappings = [(match.source._id, match.target._id) for match in self.matches]
+        lhs_counts = Counter([pair[0] for pair in mappings])
+        rhs_counts = Counter([pair[1] for pair in mappings])
+
+        result = []
+
+        for lhs, rhs in mappings:
+            lhs_count = lhs_counts[lhs]
+            rhs_count = rhs_counts[rhs]
+            if lhs_count == 1 and rhs_count == 1:
+                result.append({"from": lhs, "to": rhs, "cardinality": "1:1"})
+            elif lhs_count == 1 and rhs_count > 1:
+                result.append({"from": lhs, "to": rhs, "cardinality": "N:1"})
+            elif lhs_count > 1 and rhs_count == 1:
+                result.append({"from": lhs, "to": rhs, "cardinality": "1:N"})
+            elif lhs_count > 1 and rhs_count > 1:
+                result.append({"from": lhs, "to": rhs, "cardinality": "N:M"})
+
+        return sorted(result, key=lambda x: x["from"])
+
+    def to_randonneur(
+        self,
+        source_id: str,
+        target_id: str,
+        contributors: list,
+        mapping_source: dict,
+        mapping_target: dict,
+        version: str = "1.0.0",
+        licenses: list | None = None,
+        homepage: str | None = None,
+        name: str | None = None,
+        path: Path | None = None,
+    ) -> randonneur.Datapackage:
+        """Export mappings in randonneur data migration format.
+
+        Creates a randonneur Datapackage containing all matches in a format
+        suitable for data migration and transformation workflows. The datapackage
+        can be saved to disk or returned for further processing.
+
+        Parameters
+        ----------
+        source_id : str
+            Identifier for the source flow list (e.g., "ecoinvent-3.8").
+        target_id : str
+            Identifier for the target flow list (e.g., "ecoinvent-3.9").
+        contributors : list
+            List of contributor information for the datapackage metadata.
+        mapping_source : dict
+            Mapping configuration for source flows (randonneur format).
+        mapping_target : dict
+            Mapping configuration for target flows (randonneur format).
+        version : str, default="1.0.0"
+            Version string for the datapackage.
+        licenses : list | None, optional
+            License information for the datapackage.
+        homepage : str | None, optional
+            Homepage URL for the datapackage.
+        name : str | None, optional
+            Name for the datapackage. If None, defaults to "{source_id}-{target_id}".
+        path : Path | None, optional
+            If provided, saves the datapackage as JSON to this path.
+
+        Returns
+        -------
+        randonneur.Datapackage
+            A Datapackage object containing all matches with verb "update".
+            The datapackage includes metadata and can be saved to disk if
+            `path` is provided.
+
+        Notes
+        -----
+        - All matches are exported using their `export()` method
+        - The datapackage description includes the flowmapper version
+        - If `path` is provided, the parent directory is created if it doesn't exist
+
+        Examples
+        --------
+        >>> dp = flowmap.to_randonneur(
+        ...     source_id="source-v1",
+        ...     target_id="target-v1",
+        ...     contributors=[],
+        ...     mapping_source={},
+        ...     mapping_target={}
+        ... )
+        >>> isinstance(dp, randonneur.Datapackage)
+        True
+        """
+        dp = randonneur.Datapackage(
+            name=name or f"{source_id}-{target_id}",
+            source_id=source_id,
+            target_id=target_id,
+            description=f"Flowmapper {__version__} elementary flow correspondence from {source_id} to {target_id}",
+            contributors=contributors,
+            mapping_source=mapping_source,
+            mapping_target=mapping_target,
+            homepage=homepage,
+            version=version,
+            licenses=licenses,
+        )
+
+        dp.add_data(verb="update", data=[match.export() for match in self.matches])
+
+        if path is not None:
+            dp.to_json(path)
+        return dp
+
+    def to_glad(
+        self,
+        path: Path | None = None,
+        ensure_id: bool = False,
+        missing_source: bool = False,
+    ) -> pd.DataFrame | None:
+        """Export mappings in GLAD (Global LCA Data Access) format.
+
+        Creates a DataFrame or Excel file in the GLAD flow mapping format,
+        which is a standardized format for exchanging flow mappings in the
+        LCA community.
+
+        Parameters
+        ----------
+        path : Path | None, optional
+            If provided, exports the DataFrame to an Excel file at this path.
+            If None, returns the DataFrame without saving.
+        ensure_id : bool, default=False
+            If True, replaces None identifiers with empty strings. If False,
+            None identifiers remain as None in the DataFrame.
+        missing_source : bool, default=False
+            If True, includes unmatched source flows in the output with only
+            source flow information (no target flow data).
+
+        Returns
+        -------
+        pd.DataFrame | None
+            DataFrame with GLAD format columns:
+            - SourceFlowName, SourceFlowUUID, SourceFlowContext, SourceUnit
+            - MatchCondition, ConversionFactor
+            - TargetFlowName, TargetFlowUUID, TargetFlowContext, TargetUnit
+            - MemoMapper
+            Returns None if `path` is provided (file is saved instead).
+
+        Notes
+        -----
+        - If `path` is provided, creates an Excel file with auto-sized columns
+        - Unmatched source flows (when `missing_source=True`) only include
+          source flow columns, with target columns left empty
+        - Context values are exported as strings using "/" as separator
+        - Match conditions are converted using `MatchCondition.as_glad()`
+        - Excel files use xlsxwriter engine with formulas disabled
+
+        Examples
+        --------
+        >>> df = flowmap.to_glad()
+        >>> df.columns.tolist()
+        ['SourceFlowName', 'SourceFlowUUID', ...]
+        >>>
+        >>> # Export to Excel
+        >>> flowmap.to_glad(path=Path("mapping.xlsx"))
+        """
+        data = []
+        for match in self.matches:
+            data.append(
+                {
+                    "SourceFlowName": str(match.source.name),
+                    "SourceFlowUUID": match.source.identifier
+                    or ("" if ensure_id else None),
+                    "SourceFlowContext": match.source.context.export_as_string(
+                        join_character="/"
+                    ),
+                    "SourceUnit": str(match.source.unit),
+                    "MatchCondition": match.condition.as_glad(),
+                    "ConversionFactor": match.conversion_factor,
+                    "TargetFlowName": str(match.target.name),
+                    "TargetFlowUUID": match.target.identifier
+                    or ("" if ensure_id else None),
+                    "TargetFlowContext": match.target.context.export_as_string(
+                        join_character="/"
+                    ),
+                    "TargetUnit": str(match.target.unit),
+                    "MemoMapper": match.comment,
+                }
+            )
+
+        if missing_source:
+            for flow_obj in filter(lambda x: not x.matched, self.source_flows):
+                data.append(
+                    {
+                        "SourceFlowName": str(flow_obj.original.name),
+                        "SourceFlowUUID": flow_obj.original.identifier
+                        or ("" if ensure_id else None),
+                        "SourceFlowContext": flow_obj.original.context.export_as_string(),
+                        "SourceUnit": str(flow_obj.original.unit),
+                    }
+                )
+
+        result = pd.DataFrame(data)
+
+        if not path:
+            return result
+        else:
+            path = Path(path)
+            path.parent.mkdir(parents=True, exist_ok=True)
+
+            writer = pd.ExcelWriter(
+                path,
+                engine="xlsxwriter",
+                engine_kwargs={"options": {"strings_to_formulas": False}},
+            )
+            result.to_excel(writer, sheet_name="Mapping", index=False, na_rep="NaN")
+
+            for column in result:
+                column_length = max(
+                    result[column].astype(str).map(len).max(), len(column)
+                )
+                col_idx = result.columns.get_loc(column)
+                writer.sheets["Mapping"].set_column(col_idx, col_idx, column_length)
+
+            writer.close()
diff --git a/src/flowmapper/main.py b/src/flowmapper/main.py
new file mode 100644
index 0000000..c2cb7b1
--- /dev/null
+++ b/src/flowmapper/main.py
@@ -0,0 +1,114 @@
+import json
+import logging
+from collections.abc import Callable
+from pathlib import Path
+
+from randonneur import Datapackage
+from randonneur_data import Registry
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match import Match
+from flowmapper.flowmap import Flowmap
+from flowmapper.utils import (
+    apply_transformation_and_convert_flows_to_normalized_flows,
+    randonneur_as_function,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def sorting_function(obj: dict) -> tuple:
+    return (
+        obj.get("name", "ZZZ"),
+        str(obj.get("context", "ZZZ")),
+        obj.get("unit", "ZZZ"),
+    )
+
+
+def flowmapper(
+    source: Path,
+    target: Path,
+    source_id: str,
+    target_id: str,
+    contributors: list,
+    output_dir: Path,
+    version: str = "1.0.0",
+    transformations: list[Datapackage | str | dict | Callable] | None = None,
+    rules: list[Callable[..., list[Match]]] | None = None,
+    unit_normalization: bool = True,
+    licenses: list | None = None,
+    homepage: str | None = None,
+    name: str | None = None,
+    registry: Registry | None = None,
+    no_matching: bool = False,
+) -> Flowmap:
+    """
+    Generate mappings between elementary flows lists
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    transformation_functions = []
+
+    if transformations is None:
+        transformations = []
+
+    if unit_normalization:
+        transformations.append("Flowmapper-standard-units-harmonization")
+
+    for obj in transformations:
+        if isinstance(obj, (str, dict, Datapackage)):
+            transformation_functions.append(
+                randonneur_as_function(datapackage=obj, registry=registry)
+            )
+        elif isinstance(obj, Callable):
+            transformation_functions.append(obj)
+        else:
+            raise ValueError(f"Can't understand transformation {obj}")
+
+    original_source_flows = [Flow.from_dict(obj) for obj in json.load(open(source))]
+    source_flows = apply_transformation_and_convert_flows_to_normalized_flows(
+        functions=transformation_functions, flows=original_source_flows
+    )
+
+    original_target_flows = [Flow.from_dict(obj) for obj in json.load(open(target))]
+    target_flows = apply_transformation_and_convert_flows_to_normalized_flows(
+        functions=transformation_functions, flows=original_target_flows
+    )
+
+    flowmap = Flowmap(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        data_preparation_functions=transformation_functions,
+        rules=rules,
+    )
+    if no_matching:
+        return flowmap
+    flowmap.generate_matches()
+    flowmap.print_statistics()
+
+    stem = f"{source.stem}-{target.stem}"
+
+    with open(output_dir / f"{stem}-unmatched-source.json", "w") as fs:
+        json.dump(
+            sorted(
+                [flow.export() for flow in source_flows if not flow.matched],
+                key=sorting_function,
+            ),
+            fs,
+            indent=True,
+        )
+
+    flowmap.to_randonneur(
+        source_id=source_id,
+        target_id=target_id,
+        contributors=contributors,
+        mapping_source=Flow.randonneur_mapping(),
+        mapping_target=Flow.randonneur_mapping(),
+        version=version,
+        licenses=licenses,
+        homepage=homepage,
+        name=name,
+        path=output_dir / f"{stem}.json",
+    )
+    flowmap.to_glad(output_dir / f"{stem}.xlsx", missing_source=True)
+
+    return flowmap
diff --git a/flowmapper/manual_matching/data/simapro_ecoinvent_310/just_different.json b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/just_different.json
similarity index 88%
rename from flowmapper/manual_matching/data/simapro_ecoinvent_310/just_different.json
rename to src/flowmapper/manual_matching/data/simapro_ecoinvent_310/just_different.json
index db938de..aea33a3 100644
--- a/flowmapper/manual_matching/data/simapro_ecoinvent_310/just_different.json
+++ b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/just_different.json
@@ -1,4 +1,12 @@
 [
+  {
+      "source": {
+        "name": "water, well"
+      },
+      "target": {
+        "name": "water, well, in ground"
+      }
+  },
   {
     "source": {
       "name": "Parathion, methyl"
@@ -79,6 +87,14 @@
       "name": "AOX, Adsorbable Organic Halides"
     }
   },
+  {
+    "source": {
+      "name": "AOX, Adsorbable Organic Halogen"
+    },
+    "target": {
+      "name": "AOX, Adsorbable Organic Halides"
+    }
+  },
   {
     "source": {
       "name": "AOX, Adsorbable Organic Halogen as Cl"
@@ -143,6 +159,14 @@
       "name": "Sand, unspecified"
     }
   },
+  {
+    "source": {
+      "name": "Sand, quartz"
+    },
+    "target": {
+      "name": "Sand, unspecified"
+    }
+  },
   {
     "source": {
       "name": "Potassium chloride"
@@ -175,14 +199,6 @@
       "name": "2,2,4-Trimethylpentane"
     }
   },
-  {
-    "source": {
-      "name": "2,4-D, dimethylamine salt"
-    },
-    "target": {
-      "name": "2,4-D dimethylamine salt"
-    }
-  },
   {
     "source": {
       "name": "Dioxin, 2,3,7,8 Tetrachlorodibenzo-p-"
@@ -241,19 +257,25 @@
   },
   {
     "source": {
-      "name": "Gas, natural, 36 MJ per m3"
+      "name": "Gas, natural, 36 MJ per m3",
+      "unit": "cubic_meter"
     },
     "target": {
-      "name": "Gas, natural, in ground"
-    }
+      "name": "Gas, natural",
+      "unit": "standard_cubic_meter"
+    },
+    "conversion_factor": 1.0
   },
   {
     "source": {
-      "name": "Gas, mine, off-gas, process, coal mining, 36 MJ per m3"
+      "name": "Gas, mine, off-gas, process, coal mining, 36 MJ per m3",
+      "unit": "cubic_meter"
     },
     "target": {
-      "name": "Gas, mine, off-gas, process, coal mining"
-    }
+      "name": "Gas, mine, off-gas, process, coal mining",
+      "unit": "standard_cubic_meter"
+    },
+    "conversion_factor": 1.0
   },
   {
     "source": {
@@ -303,6 +325,14 @@
       "name": "Caesium I"
     }
   },
+  {
+    "source": {
+      "name": "Cesium"
+    },
+    "target": {
+      "name": "Caesium"
+    }
+  },
   {
     "source": {
       "name": "Dimethyl formamide"
@@ -396,11 +426,7 @@
       "name": "Roundup"
     },
     "target": {
-      "name": "Glyphosate",
-      "context": [
-        "air",
-        "non-urban air or from high stacks"
-      ]
+      "name": "Glyphosate"
     }
   }
 ]
diff --git a/flowmapper/manual_matching/data/simapro_ecoinvent_310/land_use_not_in_ecoinvent.json b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/land_use_not_in_ecoinvent.json
similarity index 98%
rename from flowmapper/manual_matching/data/simapro_ecoinvent_310/land_use_not_in_ecoinvent.json
rename to src/flowmapper/manual_matching/data/simapro_ecoinvent_310/land_use_not_in_ecoinvent.json
index 629c5b0..159cd6c 100644
--- a/flowmapper/manual_matching/data/simapro_ecoinvent_310/land_use_not_in_ecoinvent.json
+++ b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/land_use_not_in_ecoinvent.json
@@ -219,14 +219,6 @@
       "name": "Transformation, to seabed, unspecified"
     }
   },
-  {
-    "source": {
-      "name": "Transformation, to industrial area, built up"
-    },
-    "target": {
-      "name": "Transformation, to industrial area"
-    }
-  },
   {
     "source": {
       "name": "Transformation, to industrial area, vegetation"
@@ -283,6 +275,14 @@
       "name": "Transformation, to river, artificial"
     }
   },
+  {
+    "source": {
+      "name": "Transformation, from urban, green areas"
+    },
+    "target": {
+      "name": "Transformation, from urban, green area"
+    }
+  },
   {
     "source": {
       "name": "Transformation, to water courses, artificial"
diff --git a/flowmapper/manual_matching/data/simapro_ecoinvent_310/ores.json b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/ores.json
similarity index 87%
rename from flowmapper/manual_matching/data/simapro_ecoinvent_310/ores.json
rename to src/flowmapper/manual_matching/data/simapro_ecoinvent_310/ores.json
index 80e7508..1e3ee5b 100644
--- a/flowmapper/manual_matching/data/simapro_ecoinvent_310/ores.json
+++ b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/ores.json
@@ -87,15 +87,6 @@
       "name": "Energy, gross calorific value, in biomass"
     }
   },
-  {
-    "source": {
-      "name": "Gas, mine, off-gas, process, coal mining/m3"
-    },
-    "target": {
-      "name": "Gas, mine, off-gas, process, coal mining",
-      "unit": "Sm3"
-    }
-  },
   {
     "source": {
       "name": "Silver, Ag 9.7E-4%, Au 9.7E-4%, Zn 0.63%, Cu 0.38%, Pb 0.014%, in ore"
@@ -199,5 +190,35 @@
     "target": {
       "name": "Rhodium"
     }
+  },
+  {
+    "source": {
+      "name": "Gas, mine, off-gas, process, coal mining/m3",
+      "unit": "cubic_meter"
+    },
+    "target": {
+      "name": "Gas, mine, off-gas, process, coal mining",
+      "unit": "standard_cubic_meter"
+    },
+    "conversion_factor": 1.0
+  },
+  {
+    "source": {
+      "name": "Gas, natural/m3",
+      "unit": "cubic_meter"
+    },
+    "target": {
+      "name": "Gas, natural",
+      "unit": "standard_cubic_meter"
+    },
+    "conversion_factor": 1.0
+  },
+  {
+    "source": {
+      "name": "Energy, from hydro power"
+    },
+    "target": {
+      "name": "Energy, potential (in hydropower reservoir), converted"
+    }
   }
 ]
diff --git a/flowmapper/manual_matching/data/simapro_ecoinvent_310/regionalized_random.json b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/regionalized_random.json
similarity index 100%
rename from flowmapper/manual_matching/data/simapro_ecoinvent_310/regionalized_random.json
rename to src/flowmapper/manual_matching/data/simapro_ecoinvent_310/regionalized_random.json
diff --git a/flowmapper/manual_matching/data/simapro_ecoinvent_310/unit_conversions.json b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/unit_conversions.json
similarity index 78%
rename from flowmapper/manual_matching/data/simapro_ecoinvent_310/unit_conversions.json
rename to src/flowmapper/manual_matching/data/simapro_ecoinvent_310/unit_conversions.json
index 306cd0f..ca0ef0b 100644
--- a/flowmapper/manual_matching/data/simapro_ecoinvent_310/unit_conversions.json
+++ b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/unit_conversions.json
@@ -2,35 +2,33 @@
   {
     "source": {
       "name": "Gas, mine, off-gas, process, coal mining/m3",
-      "unit": "cubic meter",
-      "context": ["natural resource", "in ground"]
+      "unit": "cubic_meter"
     },
     "target": {
       "name": "Gas, mine, off-gas, process, coal mining",
-      "unit": "Sm3"
+      "unit": "standard_cubic_meter"
     },
     "conversion_factor": 1.0
   },
   {
     "source": {
       "name": "Gas, natural/m3",
-      "unit": "m3",
-      "context": ["natural resource", "in ground"]
+      "unit": "cubic_meter"
     },
     "target": {
       "name": "Gas, natural",
-      "unit": "Sm3"
+      "unit": "standard_cubic_meter"
     },
     "conversion_factor": 1.0
   },
   {
     "source": {
       "name": "Energy, from peat",
-      "unit": "MJ"
+      "unit": "megajoule"
     },
     "target": {
       "name": "Peat",
-      "unit": "kg"
+      "unit": "kilogram"
     },
     "conversion_factor": 9.9,
     "comment": "Conversion factor from ecoinvent 3.10 CED LCIA factors"
@@ -38,11 +36,11 @@
   {
     "source": {
       "name": "Energy, from uranium",
-      "unit": "MJ"
+      "unit": "megajoule"
     },
     "target": {
       "name": "Uranium",
-      "unit": "kg"
+      "unit": "kilogram"
     },
     "conversion_factor": 560000.0,
     "comment": "Conversion factor from ecoinvent 3.10 CED LCIA factors"
@@ -50,11 +48,11 @@
   {
     "source": {
       "name": "Energy, from coal, brown",
-      "unit": "MJ"
+      "unit": "megajoule"
     },
     "target": {
       "name": "Coal, brown",
-      "unit": "kg"
+      "unit": "kilogram"
     },
     "conversion_factor": 9.9,
     "comment": "Conversion factor from ecoinvent 3.10 CED LCIA factors"
@@ -62,11 +60,11 @@
   {
     "source": {
       "name": "Energy, from gas, natural",
-      "unit": "MJ"
+      "unit": "megajoule"
     },
     "target": {
       "name": "Gas, natural",
-      "unit": "Sm3"
+      "unit": "standard_cubic_meter"
     },
     "conversion_factor": 40.3,
     "comment": "Conversion factor from ecoinvent 3.10 CED LCIA factors"
@@ -74,11 +72,11 @@
   {
     "source": {
       "name": "Energy, from oil sand (10% bitumen)",
-      "unit": "MJ"
+      "unit": "megajoule"
     },
     "target": {
       "name": "Oil, crude",
-      "unit": "kg"
+      "unit": "kilogram"
     },
     "conversion_factor": 45.8,
     "comment": "Conversion factor from ecoinvent 3.10 CED LCIA factors"
@@ -86,11 +84,11 @@
   {
     "source": {
       "name": "Energy, from oil sand (100% bitumen)",
-      "unit": "MJ"
+      "unit": "megajoule"
     },
     "target": {
       "name": "Oil, crude",
-      "unit": "kg"
+      "unit": "kilogram"
     },
     "conversion_factor": 45.8,
     "comment": "Conversion factor from ecoinvent 3.10 CED LCIA factors"
@@ -101,8 +99,6 @@
     },
     "target": {
       "name": "Energy, potential (in hydropower reservoir), converted"
-    },
-    "conversion_factor": 40.3,
-    "comment": "Conversion factor from ecoinvent 3.10 CED LCIA factors"
+    }
   }
 ]
diff --git a/flowmapper/manual_matching/data/simapro_ecoinvent_310/water_misc.json b/src/flowmapper/manual_matching/data/simapro_ecoinvent_310/water_misc.json
similarity index 100%
rename from flowmapper/manual_matching/data/simapro_ecoinvent_310/water_misc.json
rename to src/flowmapper/manual_matching/data/simapro_ecoinvent_310/water_misc.json
diff --git a/flowmapper/manual_matching/results/simapro-2024-ecoinvent-3.10-biosphere.json b/src/flowmapper/manual_matching/results/simapro-2024-ecoinvent-3.10-biosphere.json
similarity index 100%
rename from flowmapper/manual_matching/results/simapro-2024-ecoinvent-3.10-biosphere.json
rename to src/flowmapper/manual_matching/results/simapro-2024-ecoinvent-3.10-biosphere.json
diff --git a/flowmapper/manual_matching/simapro_ecoinvent_310.py b/src/flowmapper/manual_matching/simapro_ecoinvent_310.py
similarity index 71%
rename from flowmapper/manual_matching/simapro_ecoinvent_310.py
rename to src/flowmapper/manual_matching/simapro_ecoinvent_310.py
index 8bf554c..7c760f8 100644
--- a/flowmapper/manual_matching/simapro_ecoinvent_310.py
+++ b/src/flowmapper/manual_matching/simapro_ecoinvent_310.py
@@ -1,8 +1,8 @@
-import randonneur as rn
-import randonneur_data as rd
-from pathlib import Path
 import json
+from pathlib import Path
 
+import randonneur as rn
+import randonneur_data as rd
 
 data_dir = Path(__file__).parent / "data" / "simapro_ecoinvent_310"
 results_dir = Path(__file__).parent / "results"
@@ -24,25 +24,21 @@ def generate_simapro_ecoinvent_310_manual_matches(
         "ores.json",
     ]
     non_resources = {
-        'Caesium': 'Caesium I',
-        'Calcium': 'Calcium II',
-        'Sodium': 'Sodium I',
-        'Strontium': 'Strontium II',
+        "Caesium": "Caesium I",
+        "Calcium": "Calcium II",
+        "Sodium": "Sodium I",
+        "Strontium": "Strontium II",
     }
     non_resource_categories = [
-        obj['source']['context']
-        for obj in json.load(open(base_data_dir / "simapro-2023-ecoinvent-3-contexts.json"))["update"]
-        if obj['target']['context'][0] != "natural resource"
+        obj["source"]["context"]
+        for obj in json.load(
+            open(base_data_dir / "simapro-2023-ecoinvent-3-contexts.json")
+        )["update"]
+        if obj["target"]["context"][0] != "natural resource"
     ]
 
     data = [
-        {
-            'source': {
-                'name': key,
-                'context': context
-            },
-            'target': {'name': value}
-        }
+        {"source": {"name": key, "context": context}, "target": {"name": value}}
         for key, value in non_resources.items()
         for context in non_resource_categories
     ]
@@ -52,18 +48,17 @@ def generate_simapro_ecoinvent_310_manual_matches(
     registry = rd.Registry()
     migration = registry.get_file("ecoinvent-3.9.1-biosphere-ecoinvent-3.10-biosphere")
     name_change = {
-        (pair['source']['name'], pair['target']['name'])
-        for pair in migration['replace']
-        if 'name' in pair['target']
-        and 'name' in pair['source']
-        and pair['source']['name'] != pair['target']['name']
-        and pair['source']['name'] not in non_resources
+        (pair["source"]["name"], pair["target"]["name"])
+        for pair in migration["replace"]
+        if "name" in pair["target"]
+        and "name" in pair["source"]
+        and pair["source"]["name"] != pair["target"]["name"]
+        and pair["source"]["name"] not in non_resources
     }
     assert len(name_change) == len({a for a, b in name_change})
-    data.extend([
-        {'source': {'name': a}, 'target': {'name': b}}
-        for a, b in name_change
-    ])
+    data.extend(
+        [{"source": {"name": a}, "target": {"name": b}} for a, b in name_change]
+    )
 
     dp = rn.Datapackage(
         name="SimaPro-2024-to-ecoinvent-3.10-elementary-flows",
diff --git a/src/flowmapper/matching/__init__.py b/src/flowmapper/matching/__init__.py
new file mode 100644
index 0000000..9df92c6
--- /dev/null
+++ b/src/flowmapper/matching/__init__.py
@@ -0,0 +1,63 @@
+"""Matching functions for flow mapping.
+
+This package contains functions for matching flows between source and target
+flow lists. Functions are organized by type:
+
+- core: Core utilities for transformation and matching
+- basic: Basic matching functions (identical names, CAS numbers, etc.)
+- transformation: Transformation-based matching functions
+- context: Context-based matching functions
+- specialized: Specialized matching functions (regionalized flows, suffixes)
+- rules: Default matching rules configuration
+"""
+
+from flowmapper.matching.basic import (
+    match_close_names,
+    match_identical_cas_numbers,
+    match_identical_identifier,
+    match_identical_names,
+    match_identical_names_lowercase,
+    match_identical_names_without_commas,
+)
+from flowmapper.matching.context import (
+    match_name_and_parent_context,
+    match_resources_with_wrong_subcontext,
+)
+from flowmapper.matching.core import get_matches, transform_and_then_match
+from flowmapper.matching.ecoinvent import match_ecoinvent_transitive_matching
+from flowmapper.matching.rules import match_rules, match_rules_simapro_ecoinvent
+from flowmapper.matching.simapro import (
+    manual_simapro_ecoinvent_mapping,
+    simapro_ecoinvent_glad_name_matching,
+)
+from flowmapper.matching.specialized import (
+    add_missing_regionalized_flows,
+    match_names_with_suffix_removal,
+)
+
+__all__ = [
+    # Core
+    "transform_and_then_match",
+    "get_matches",
+    # Basic
+    "match_identical_identifier",
+    "match_identical_cas_numbers",
+    "match_identical_names",
+    "match_close_names",
+    "match_identical_names_lowercase",
+    "match_identical_names_without_commas",
+    # Transformation
+    "match_ecoinvent_transitive_matching",
+    # Context
+    "match_resources_with_wrong_subcontext",
+    "match_name_and_parent_context",
+    # Specialized
+    "add_missing_regionalized_flows",
+    "match_names_with_suffix_removal",
+    # Rules
+    "match_rules",
+    "match_rules_simapro_ecoinvent",
+    # SimaPro
+    "manual_simapro_ecoinvent_mapping",
+    "simapro_ecoinvent_glad_name_matching",
+]
diff --git a/src/flowmapper/matching/basic.py b/src/flowmapper/matching/basic.py
new file mode 100644
index 0000000..944723a
--- /dev/null
+++ b/src/flowmapper/matching/basic.py
@@ -0,0 +1,473 @@
+"""Basic matching functions.
+
+This module contains basic matching functions that match flows based on
+identical or similar attributes without transformations.
+"""
+
+import re
+
+from rapidfuzz.distance.DamerauLevenshtein import distance
+
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.matching.core import get_matches
+from flowmapper.utils import toolz
+
+
+def match_identical_identifier(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+) -> list:
+    """Match flows with identical identifiers.
+
+    This function groups source flows by their identifier and matches them
+    to target flows with the same identifier. Only flows with non-None
+    identifiers are considered.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.exact for flows with
+        matching identifiers.
+
+    Notes
+    -----
+    - Only flows with non-None identifiers are matched
+    - If multiple target flows share the same identifier, `get_matches` will
+      only allow a single result target per source flow
+    - Match condition is always MatchCondition.exact
+    """
+    matches = []
+
+    for source_id, sources in toolz.itertoolz.groupby(
+        lambda x: x.identifier, source_flows
+    ).items():
+        if not source_id:
+            continue
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                # Filter target flows with matching identifier. We don't need to worry about
+                # duplicate identifiers as `get_matches` will only allow a single result target
+                target_flows=[
+                    flow for flow in target_flows if source_id == flow.identifier
+                ],
+                comment=f"Shared target-unique identifier: {source_id}",
+                function_name="match_identical_identifier",
+                match_condition=MatchCondition.exact,
+            )
+        )
+
+    return matches
+
+
+def match_identical_cas_numbers(
+    source_flows: list[NormalizedFlow], target_flows: list[NormalizedFlow]
+) -> list:
+    """Match flows with identical CAS numbers, context, and location.
+
+    This function matches flows that share the same CAS (Chemical Abstracts
+    Service) registry number, context, and location. All three attributes
+    must match exactly.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.exact for flows with
+        matching CAS numbers, context, and location.
+
+    Notes
+    -----
+    - CAS number, context, and location must all match exactly
+    - Match condition is always MatchCondition.exact
+    - Only unit-compatible flows are matched
+    """
+    matches = []
+
+    for (cas_number, context, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.cas_number, x.context, x.location), source_flows
+    ).items():
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    flow
+                    for flow in target_flows
+                    if flow.cas_number == cas_number
+                    and flow.context == context
+                    and flow.location == location
+                ],
+                comment=f"Shared CAS code with identical context and location: {cas_number}",
+                function_name="match_identical_cas_numbers",
+                match_condition=MatchCondition.exact,
+            )
+        )
+
+    return matches
+
+
+def match_identical_names(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+    function_name: str | None = None,
+    comment: str | None = None,
+    match_condition: MatchCondition | None = None,
+) -> list:
+    """Match flows with identical normalized names, context, oxidation state, and location.
+
+    This is one of the most precise matching functions, requiring exact matches
+    on normalized name, context, oxidation state, and location. All four
+    attributes must match exactly.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.exact for flows with
+        identical normalized names, context, oxidation state, and location.
+
+    Notes
+    -----
+    - All four attributes (name, context, oxidation_state, location) must match exactly
+    - Names are compared after normalization
+    - Match condition is always MatchCondition.exact
+    - Only unit-compatible flows are matched
+    """
+    matches = []
+
+    for (name, context, oxidation_state, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.context, x.oxidation_state, x.location), source_flows
+    ).items():
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    target
+                    for target in target_flows
+                    if target.name == name
+                    and target.context == context
+                    and target.oxidation_state == oxidation_state
+                    and target.location == location
+                ],
+                comment=comment
+                or f"Shared normalized name with identical context, oxidation state, and location: {name}",
+                function_name=function_name or "match_identical_names",
+                match_condition=match_condition or MatchCondition.exact,
+            )
+        )
+
+    return matches
+
+
+def match_close_names(
+    source_flows: list[NormalizedFlow], target_flows: list[NormalizedFlow]
+) -> list:
+    """Match flows with similar names using Damerau-Levenshtein distance.
+
+    This function matches flows where the normalized names have a Damerau-
+    Levenshtein edit distance of less than 3, while still requiring exact
+    matches on context, oxidation state, and location.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.related for flows with
+        similar names (edit distance < 3) and identical context, oxidation
+        state, and location.
+
+    Notes
+    -----
+    - Uses Damerau-Levenshtein distance with case-insensitive comparison
+    - Edit distance must be less than 3 (i.e., 0, 1, or 2)
+    - Context, oxidation state, and location must still match exactly
+    - Match condition is MatchCondition.related (not exact due to name differences)
+    - Only unit-compatible flows are matched
+    """
+    matches = []
+
+    for (name, context, oxidation_state, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.context, x.oxidation_state, x.location), source_flows
+    ).items():
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    target
+                    for target in target_flows
+                    if distance(
+                        str(target.name), str(name), processor=lambda x: x.lower()
+                    )
+                    < 3
+                    and target.context == context
+                    and target.oxidation_state == oxidation_state
+                    and target.location == location
+                ],
+                comment=f"Name has Damerau Levenshtein edit distance of 2 or lower with identical context, oxidation state, and location: {name}",
+                function_name="match_close_names",
+                match_condition=MatchCondition.related,
+            )
+        )
+
+    return matches
+
+
+def match_identical_names_lowercase(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+    function_name: str | None = None,
+    comment: str | None = None,
+    match_condition: MatchCondition | None = None,
+) -> list:
+    """Match flows with identical names when compared in lowercase.
+
+    This function matches flows where the normalized names are identical when
+    converted to lowercase, while still requiring exact matches on context,
+    oxidation state, and location. This handles cases where names differ only
+    in capitalization.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.close for flows with
+        identical lowercase names and identical context, oxidation state,
+        and location.
+
+    Notes
+    -----
+    - Names are compared in lowercase (case-insensitive)
+    - Context, oxidation state, and location must still match exactly
+    - Match condition is MatchCondition.close (not exact due to case differences)
+    - Only unit-compatible flows are matched
+    """
+    matches = []
+
+    for (name, context, oxidation_state, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.context, x.oxidation_state, x.location), source_flows
+    ).items():
+        name = name.lower()
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    flow
+                    for flow in target_flows
+                    if flow.name.lower() == name
+                    and flow.context == context
+                    and flow.oxidation_state == oxidation_state
+                    and flow.location == location
+                ],
+                comment=comment
+                or f"Shared normalized lowercase name with identical context, oxidation state, and location: {name}",
+                function_name=function_name or "match_identical_names_lowercase",
+                match_condition=match_condition or MatchCondition.close,
+            )
+        )
+
+    return matches
+
+
+def match_identical_names_without_commas(
+    source_flows: list[NormalizedFlow], target_flows: list[NormalizedFlow]
+) -> list:
+    """Match flows with identical names when commas are removed.
+
+    This function matches flows where the normalized names are identical after
+    removing all commas, while still requiring exact matches on context,
+    oxidation state, and location. This handles cases where names differ only
+    in comma placement.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.close for flows with
+        identical names (after removing commas) and identical context,
+        oxidation state, and location.
+
+    Notes
+    -----
+    - All commas are removed from names before comparison
+    - Context, oxidation state, and location must still match exactly
+    - Match condition is MatchCondition.close (not exact due to comma differences)
+    - Only unit-compatible flows are matched
+    """
+    matches = []
+
+    for (name, context, oxidation_state, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.context, x.oxidation_state, x.location), source_flows
+    ).items():
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    flow
+                    for flow in target_flows
+                    if flow.name.replace(",", "") == name.replace(",", "")
+                    and flow.context == context
+                    and flow.oxidation_state == oxidation_state
+                    and flow.location == location
+                ],
+                comment=f"Shared normalized name with commas removed and identical context, oxidation state, and location: {name}",
+                match_condition=MatchCondition.close,
+                function_name="match_identical_names_without_commas",
+            )
+        )
+
+    return matches
+
+
+is_uuid = re.compile(
+    r"^[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}$"
+)
+
+
+def match_identical_names_target_uuid_identifier(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+    function_name: str | None = None,
+    comment: str | None = None,
+    match_condition: MatchCondition | None = None,
+) -> list:
+    """Match flows with identical normalized names, context, oxidation state, and location.
+
+    This function is similar to `match_identical_names`, but with an additional
+    requirement that target flows must have a UUID identifier. This is used in cases
+    where the target flow list has two identical flows that we want to match to - normally we
+    reject a match with multiple options. Instead of fixing these manually, we prefer a target
+    flows with a UUID identifier. This is a hack, so only use this function as a last resort.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against. Only flows with UUID identifiers
+        will be considered.
+    function_name : str | None, optional
+        Name of the matching function. Defaults to
+        "match_identical_names_target_uuid_identifier".
+    comment : str | None, optional
+        Comment to include in Match objects. Defaults to a description of the
+        shared attributes.
+    match_condition : MatchCondition | None, optional
+        Match condition to use. Defaults to MatchCondition.exact.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.exact (or specified condition)
+        for flows with identical normalized names, context, oxidation state,
+        and location, where the target flow has a UUID identifier.
+
+    Notes
+    -----
+    - All four attributes (name, context, oxidation_state, location) must match exactly
+    - Target flows must have a non-None identifier that matches the UUID format
+    - UUID format is validated using regex: 8-4-4-4-12 hexadecimal digits
+    - Names are compared after normalization
+    - Match condition defaults to MatchCondition.exact
+    - Only unit-compatible flows are matched (enforced by `get_matches`)
+
+    Examples
+    --------
+    >>> from flowmapper.domain.flow import Flow
+    >>> from flowmapper.domain.normalized_flow import NormalizedFlow
+    >>> from copy import copy
+    >>>
+    >>> source = Flow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> source_nf = NormalizedFlow(
+    ...     original=source,
+    ...     normalized=source.normalize(),
+    ...     current=copy(source.normalize())
+    ... )
+    >>>
+    >>> target = Flow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg",
+    ...     "identifier": "550e8400-e29b-41d4-a716-446655440000"  # Valid UUID
+    ... })
+    >>> target_nf = NormalizedFlow(
+    ...     original=target,
+    ...     normalized=target.normalize(),
+    ...     current=copy(target.normalize())
+    ... )
+    >>>
+    >>> matches = match_identical_names_target_uuid_identifier(
+    ...     source_flows=[source_nf],
+    ...     target_flows=[target_nf]
+    ... )
+    >>> len(matches)
+    1
+    """
+    matches = []
+
+    for (name, context, oxidation_state, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.context, x.oxidation_state, x.location), source_flows
+    ).items():
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    target
+                    for target in target_flows
+                    if target.name == name
+                    and target.context == context
+                    and target.oxidation_state == oxidation_state
+                    and target.location == location
+                    and target.identifier is not None
+                    and is_uuid.match(target.identifier)
+                ],
+                comment=comment
+                or f"Shared normalized name with identical context, oxidation state, and location: {name}",
+                function_name=function_name
+                or "match_identical_names_target_uuid_identifier",
+                match_condition=match_condition or MatchCondition.exact,
+            )
+        )
+
+    return matches
diff --git a/src/flowmapper/matching/context.py b/src/flowmapper/matching/context.py
new file mode 100644
index 0000000..36eb5e2
--- /dev/null
+++ b/src/flowmapper/matching/context.py
@@ -0,0 +1,139 @@
+"""Context-based matching functions.
+
+This module contains matching functions that match flows based on context
+relationships.
+"""
+
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.matching.core import get_matches
+from flowmapper.utils import toolz
+
+
+def match_resources_with_wrong_subcontext(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+    function_name: str | None = None,
+    comment: str | None = None,
+    match_condition: MatchCondition | None = None,
+) -> list:
+    """Match resource flows ignoring subcontext differences.
+
+    This function matches flows that are both resource-type flows (as
+    determined by `context.is_resource()`), have identical names, oxidation
+    states, and locations, but may have different subcontexts. This handles
+    cases where resource flows are categorized differently but represent the
+    same resource.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match. Only resource-type flows are considered.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against. Only resource-type flows are
+        considered.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.close for resource flows
+        with identical names, oxidation states, and locations, but potentially
+        different subcontexts.
+
+    Notes
+    -----
+    - Only flows where `normalized.context.is_resource()` returns True are matched
+    - Name, oxidation state, and location must match exactly
+    - Subcontext differences are ignored (both must be resource-type)
+    - Match condition is MatchCondition.close (not exact due to subcontext differences)
+    - Only unit-compatible flows are matched
+    """
+    matches = []
+
+    for (name, oxidation_state, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.oxidation_state, x.location),
+        filter(lambda f: f.normalized.context.is_resource(), source_flows),
+    ).items():
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    flow
+                    for flow in target_flows
+                    if flow.name == name
+                    and flow.normalized.context.is_resource()
+                    and flow.oxidation_state == oxidation_state
+                    and flow.location == location
+                ],
+                comment=comment
+                or f"Shared normalized name and resource-type context, with identical oxidation state and location: {name}",
+                match_condition=match_condition or MatchCondition.close,
+                function_name=function_name or "match_resources_with_wrong_subcontext",
+            )
+        )
+
+    return matches
+
+
+def match_name_and_parent_context(
+    source_flows: list[NormalizedFlow], target_flows: list[NormalizedFlow]
+) -> list:
+    """Match flows where target has parent context of source.
+
+    This function matches flows where the source flow has a multi-level context
+    (e.g., ["emissions", "to air"]) and the target flow has the parent context
+    (e.g., ["emissions"]). This handles cases where flows are categorized at
+    different levels of specificity.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match. Only flows with multi-level contexts
+        (length > 1) are considered.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with MatchCondition.related for flows where the
+        target context is the parent of the source context.
+
+    Notes
+    -----
+    - Only source flows with contexts of length > 1 are considered
+    - Target context must exactly match the parent of source context (context[:-1])
+    - Name, oxidation state, and location must match exactly
+    - Match condition is MatchCondition.related (not exact due to context differences)
+    - Only unit-compatible flows are matched
+
+    Examples
+    --------
+    >>> # Source: context=["emissions", "to air"]
+    >>> # Target: context=["emissions"]
+    >>> # These will match if name, oxidation_state, and location also match
+    """
+    matches = []
+
+    for (name, oxidation_state, context, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.oxidation_state, x.context, x.location),
+        filter(lambda f: len(f.context) > 1, source_flows),
+    ).items():
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    flow
+                    for flow in target_flows
+                    if flow.name == name
+                    and flow.context == context[:-1]
+                    and flow.oxidation_state == oxidation_state
+                    and flow.location == location
+                ],
+                comment="Shared normalized name and parent context, with identical oxidation state and location",
+                match_condition=MatchCondition.related,
+                function_name="match_name_and_parent_context",
+            )
+        )
+
+    return matches
diff --git a/src/flowmapper/matching/core.py b/src/flowmapper/matching/core.py
new file mode 100644
index 0000000..e057ff7
--- /dev/null
+++ b/src/flowmapper/matching/core.py
@@ -0,0 +1,258 @@
+"""Core matching utilities.
+
+This module contains core utility functions for matching flows, including
+transformation and filtering support.
+"""
+
+import itertools
+from collections.abc import Callable
+
+from flowmapper.domain.match import Match
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+
+
+def transform_and_then_match(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+    match_function: Callable,
+    transform_source_flows: list[Callable] | None = None,
+    transform_target_flows: list[Callable] | None = None,
+    filter_source_flows: Callable | None = None,
+    filter_target_flows: Callable | None = None,
+) -> list[Match]:
+    """Apply transformations and filters to flows, then match them.
+
+    This function provides a flexible way to apply transformations and filters
+    to source and target flows before matching, while ensuring all flows are
+    reset to their normalized state after matching completes.
+
+    The function applies transformations and filters in the following order:
+    1. Transform source flows (if provided) - applies all transformations in sequence
+    2. Filter source flows (if provided)
+    3. Transform target flows (if provided) - applies all transformations in sequence
+    4. Filter target flows (if provided)
+    5. Call match function with filtered flows
+    6. Reset all flows to normalized state
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+    match_function : Callable
+        Function that performs the actual matching. Must accept keyword arguments
+        `source_flows` and `target_flows` (both lists of NormalizedFlow) and return
+        a list of Match objects.
+    transform_source_flows : list[Callable[[list[NormalizedFlow]], list[NormalizedFlow]]] | None
+        Optional list of functions to transform source flows. Functions are applied
+        in sequence. Each function takes a list of NormalizedFlow objects and returns
+        a modified list. Functions should modify flows in place (e.g., using
+        update_current) and return the same list.
+    transform_target_flows : list[Callable[[list[NormalizedFlow]], list[NormalizedFlow]]] | None
+        Optional list of functions to transform target flows. Functions are applied
+        in sequence. Each function takes a list of NormalizedFlow objects and returns
+        a modified list. Functions should modify flows in place (e.g., using
+        update_current) and return the same list.
+    filter_source_flows : Callable[[list[NormalizedFlow]], list[NormalizedFlow]] | None
+        Optional function to filter source flows. Takes a list of NormalizedFlow objects
+        and returns a filtered list (may be shorter than input).
+    filter_target_flows : Callable[[list[NormalizedFlow]], list[NormalizedFlow]] | None
+        Optional function to filter target flows. Takes a list of NormalizedFlow objects
+        and returns a filtered list (may be shorter than input).
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects found by the match function.
+
+    Examples
+    --------
+    >>> from flowmapper.matching import match_identical_names, transform_and_then_match
+    >>> from flowmapper.utils import apply_randonneur
+    >>> from functools import partial
+    >>>
+    >>> # Transform flows with a single function (wrap in list)
+    >>> transform_func = partial(
+    ...     apply_randonneur,
+    ...     datapackage="some-transformation",
+    ...     fields=["name", "context"]
+    ... )
+    >>>
+    >>> matches = transform_and_then_match(
+    ...     source_flows=source_flows,
+    ...     target_flows=target_flows,
+    ...     match_function=match_identical_names,
+    ...     transform_source_flows=[transform_func],
+    ...     transform_target_flows=[transform_func]
+    ... )
+    >>>
+    >>> # Transform flows with multiple functions in sequence
+    >>> transform1 = partial(apply_randonneur, datapackage="transformation-1", fields=["name"])
+    >>> transform2 = partial(apply_randonneur, datapackage="transformation-2", fields=["context"])
+    >>>
+    >>> matches = transform_and_then_match(
+    ...     source_flows=source_flows,
+    ...     target_flows=target_flows,
+    ...     match_function=match_identical_names,
+    ...     transform_source_flows=[transform1, transform2],
+    ...     transform_target_flows=[transform1, transform2]
+    ... )
+    >>>
+    >>> # Filter flows before matching
+    >>> def filter_resources(flows):
+    ...     return [f for f in flows if f.normalized.context.is_resource()]
+    >>>
+    >>> matches = transform_and_then_match(
+    ...     source_flows=source_flows,
+    ...     target_flows=target_flows,
+    ...     match_function=match_identical_names,
+    ...     filter_source_flows=filter_resources,
+    ...     filter_target_flows=filter_resources
+    ... )
+
+    Notes
+    -----
+    - All flows (both source and target) are automatically reset to their normalized
+      state after matching completes successfully. If the match function raises an
+      exception, flows will not be reset.
+    - When multiple transformations are provided in a list, they are applied in
+      sequence. The output of each transformation becomes the input to the next.
+    - To apply a single transformation, wrap it in a list: `[transform_func]`
+    """
+    # Apply source flow transformations
+    if transform_source_flows is None:
+        transformed_source_flows = source_flows
+    else:
+        # Apply multiple transformations in sequence
+        transformed_source_flows = source_flows
+        for transform_func in transform_source_flows:
+            transformed_source_flows = transform_func(transformed_source_flows)
+
+    # Apply source flow filters
+    filtered_source_flows = (
+        filter_source_flows(transformed_source_flows)
+        if filter_source_flows
+        else transformed_source_flows
+    )
+
+    # Apply target flow transformations
+    if transform_target_flows is None:
+        transformed_target_flows = target_flows
+    else:
+        # Apply multiple transformations in sequence
+        transformed_target_flows = target_flows
+        for transform_func in transform_target_flows:
+            transformed_target_flows = transform_func(transformed_target_flows)
+
+    # Apply target flow filters
+    filtered_target_flows = (
+        filter_target_flows(transformed_target_flows)
+        if filter_target_flows
+        else transformed_target_flows
+    )
+
+    matches = match_function(
+        source_flows=filtered_source_flows, target_flows=filtered_target_flows
+    )
+
+    for flow in itertools.chain(source_flows, target_flows):
+        flow.reset_current()
+
+    return matches
+
+
+def get_matches(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+    comment: str,
+    function_name: str,
+    match_condition: MatchCondition,
+) -> list[Match]:
+    """Create Match objects from source and target flows.
+
+    This is a helper function used by various matching functions to create
+    Match objects with proper unit compatibility checking and conversion
+    factor calculation. It handles the common logic of:
+    - Filtering target flows by unit compatibility
+    - Resolving multiple target matches by context matching
+    - Calculating conversion factors
+    - Marking source flows as matched
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match. Each source flow will be matched
+        against compatible target flows.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against. Only unit-compatible flows
+        are considered.
+    comment : str
+        Comment to include in each Match object describing the match.
+    function_name : str
+        Name of the matching function that created this match (e.g.,
+        "match_identical_names").
+    match_condition : MatchCondition
+        The match quality condition (exact, close, related, etc.).
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects. Each Match represents a successful match
+        between a source flow and a target flow.
+
+    Notes
+    -----
+    - Only unit-compatible flows are matched (checked via `unit_compatible()`)
+    - If multiple target flows are unit-compatible, the function tries to
+      find the most appropriate match by matching normalized contexts
+    - If exactly one target flow matches after context filtering, a Match
+      is created and the source flow is marked as matched
+    - Conversion factors are calculated automatically using
+      `source.conversion_factor(target)` which accounts for both unit
+      conversion and any transformation factors
+    - The function only creates matches when there is exactly one target
+      flow remaining after filtering
+
+    Examples
+    --------
+    >>> matches = get_matches(
+    ...     source_flows=[source_flow],
+    ...     target_flows=[target_flow1, target_flow2],
+    ...     comment="Shared identifier",
+    ...     function_name="match_identical_identifier",
+    ...     match_condition=MatchCondition.exact
+    ... )
+    """
+    if not target_flows:
+        return []
+
+    matches = []
+
+    for source in source_flows:
+        targets = [flow for flow in target_flows if source.unit_compatible(flow)]
+        if len(targets) > 1:
+            # Try find most-appropriate match if more than one is present. Added because ecoinvent
+            # deprecated most stratospheric emissions and redirected them to air, unspecified, so
+            # now all air, unspecified emissions have multiple targets.
+            targets = [
+                target
+                for target in targets
+                if target.normalized.context == source.normalized.context
+            ]
+        if len(targets) == 1:
+            target = target_flows[0]
+            source.matched = True
+            matches.append(
+                Match(
+                    source=source.original,
+                    target=target.original,
+                    function_name=function_name,
+                    comment=comment or "",
+                    condition=match_condition,
+                    conversion_factor=source.conversion_factor(target),
+                )
+            )
+
+    return matches
diff --git a/src/flowmapper/matching/ecoinvent.py b/src/flowmapper/matching/ecoinvent.py
new file mode 100644
index 0000000..5582d57
--- /dev/null
+++ b/src/flowmapper/matching/ecoinvent.py
@@ -0,0 +1,31 @@
+from functools import partial
+
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.matching.basic import match_identical_names
+from flowmapper.matching.core import transform_and_then_match
+from flowmapper.utils import apply_randonneur
+
+match_ecoinvent_transitive_matching = partial(
+    transform_and_then_match,
+    match_function=partial(
+        match_identical_names,
+        function_name="match_ecoinvent_transitive_matching",
+        comment="Shared normalized attributes after applying transformation: ecoinvent-2.2-biosphere-ecoinvent-3.12-biosphere-transitive",
+        match_condition=MatchCondition.close,
+    ),
+    transform_source_flows=[
+        partial(
+            apply_randonneur,
+            datapackage="ecoinvent-2.2-biosphere-ecoinvent-3.12-biosphere-transitive",
+            fields=["name", "context"],
+        )
+    ],
+    transform_target_flows=[
+        partial(
+            apply_randonneur,
+            datapackage="ecoinvent-2.2-biosphere-ecoinvent-3.12-biosphere-transitive",
+            fields=["name", "context"],
+        )
+    ],
+)
+match_ecoinvent_transitive_matching.__name__ = "match_ecoinvent_transitive_matching"
diff --git a/src/flowmapper/matching/rules.py b/src/flowmapper/matching/rules.py
new file mode 100644
index 0000000..98f40c1
--- /dev/null
+++ b/src/flowmapper/matching/rules.py
@@ -0,0 +1,86 @@
+"""Matching rules configuration.
+
+This module provides the default set of matching rules used by Flowmap.
+"""
+
+from flowmapper.matching.basic import (
+    match_identical_cas_numbers,
+    match_identical_identifier,
+    match_identical_names,
+    match_identical_names_target_uuid_identifier,
+    match_identical_names_without_commas,
+)
+from flowmapper.matching.context import (
+    match_name_and_parent_context,
+    match_resources_with_wrong_subcontext,
+)
+from flowmapper.matching.ecoinvent import match_ecoinvent_transitive_matching
+from flowmapper.matching.simapro import (
+    manual_simapro_ecoinvent_mapping,
+    manual_simapro_ecoinvent_mapping_add_regionalized_flows,
+    manual_simapro_ecoinvent_mapping_resource_wrong_subcontext,
+    simapro_ecoinvent_glad_name_matching,
+)
+from flowmapper.matching.specialized import (
+    add_missing_regionalized_flows,
+    match_names_with_suffix_removal,
+)
+
+
+def match_rules():
+    """Return the default list of matching functions.
+
+    This function returns the default ordered list of matching functions
+    used by Flowmap. The functions are applied in order, and matching
+    stops once a flow is successfully matched.
+
+    Returns
+    -------
+    list[Callable]
+        List of matching functions to apply in order. Each function must
+        accept `source_flows` and `target_flows` keyword arguments and
+        return a list of Match objects.
+
+    Notes
+    -----
+    - Functions are applied in order from most specific to least specific
+    - Once a flow is matched, it is not considered by subsequent functions
+    - Some functions are commented out and not included in the default rules
+    - The list includes a specialized transformation for SimaPro 2024 to
+      ecoinvent 3.10 biosphere matching
+
+    Examples
+    --------
+    >>> rules = match_rules()
+    >>> for rule in rules:
+    ...     matches = rule(source_flows=source, target_flows=target)
+    ...     # Process matches...
+    """
+    return [
+        match_identical_identifier,
+        match_identical_names,
+        match_identical_names_without_commas,
+        match_resources_with_wrong_subcontext,
+        match_name_and_parent_context,
+        match_identical_cas_numbers,
+        match_names_with_suffix_removal,
+    ]
+
+
+def match_rules_simapro_ecoinvent():
+    return [
+        match_identical_identifier,
+        match_identical_names,
+        match_identical_names_without_commas,
+        match_ecoinvent_transitive_matching,
+        match_resources_with_wrong_subcontext,
+        match_name_and_parent_context,
+        manual_simapro_ecoinvent_mapping,
+        simapro_ecoinvent_glad_name_matching,
+        manual_simapro_ecoinvent_mapping_add_regionalized_flows,
+        manual_simapro_ecoinvent_mapping_resource_wrong_subcontext,
+        add_missing_regionalized_flows,
+        match_identical_cas_numbers,
+        match_identical_names_target_uuid_identifier,
+        match_names_with_suffix_removal,
+    ]
diff --git a/src/flowmapper/matching/simapro.py b/src/flowmapper/matching/simapro.py
new file mode 100644
index 0000000..24e2b8d
--- /dev/null
+++ b/src/flowmapper/matching/simapro.py
@@ -0,0 +1,121 @@
+from functools import partial
+
+from randonneur_data import Registry
+
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.matching import match_identical_names_lowercase
+from flowmapper.matching.context import match_resources_with_wrong_subcontext
+from flowmapper.matching.core import transform_and_then_match
+from flowmapper.matching.specialized import add_missing_regionalized_flows
+from flowmapper.utils import apply_randonneur
+
+manual_simapro_ecoinvent_mapping = partial(
+    transform_and_then_match,
+    match_function=partial(
+        match_identical_names_lowercase,
+        function_name="manual_simapro_ecoinvent_mapping",
+        comment="Shared normalized attributes after applying transformation: simapro-2024-biosphere-ecoinvent-3.10-biosphere",
+        match_condition=MatchCondition.related,
+    ),
+    transform_source_flows=[
+        partial(
+            apply_randonneur,
+            datapackage="simapro-2024-biosphere-ecoinvent-3.10-biosphere",
+            fields=["name", "unit"],
+        )
+    ],
+)
+manual_simapro_ecoinvent_mapping.__name__ = "manual_simapro_ecoinvent_mapping"
+
+
+manual_simapro_ecoinvent_mapping_add_regionalized_flows = partial(
+    transform_and_then_match,
+    match_function=partial(
+        add_missing_regionalized_flows,
+        function_name="manual_simapro_ecoinvent_mapping_add_regionalized_flows",
+    ),
+    transform_source_flows=[
+        partial(
+            apply_randonneur,
+            datapackage="simapro-2024-biosphere-ecoinvent-3.10-biosphere",
+            fields=["name", "unit"],
+        )
+    ],
+)
+manual_simapro_ecoinvent_mapping_add_regionalized_flows.__name__ = (
+    "manual_simapro_ecoinvent_mapping_add_regionalized_flows"
+)
+
+
+manual_simapro_ecoinvent_mapping_resource_wrong_subcontext = partial(
+    transform_and_then_match,
+    match_function=partial(
+        match_resources_with_wrong_subcontext,
+        function_name="manual_simapro_ecoinvent_mapping_resource_wrong_subcontext",
+    ),
+    transform_source_flows=[
+        partial(
+            apply_randonneur,
+            datapackage="simapro-2024-biosphere-ecoinvent-3.10-biosphere",
+            fields=["name", "unit"],
+        )
+    ],
+)
+manual_simapro_ecoinvent_mapping_resource_wrong_subcontext.__name__ = (
+    "manual_simapro_ecoinvent_mapping_resource_wrong_subcontext"
+)
+
+
+def _get_normalized_matching() -> dict:
+    registry = Registry()
+
+    context_mapping = {
+        line["source"]["context"]: line["target"]["context"]
+        for line in registry.get_file("SimaPro-2025-ecoinvent-3.12-context")["update"]
+    }
+
+    dp = registry.get_file(
+        "simapro-2025-biosphere-ef-3.1-biosphere-ecoinvent-3.12-biosphere-transitive"
+    )
+
+    # for row in dp["update"]:
+    #     if row["source"]["name"] == "Particulates, > 10 um" and row["source"]["context"].startswith("Air"):
+    #         print(row)
+
+    # print()
+
+    # Remove indoor mappings - these were deleted from ecoinvent, so map to other subcontexts.
+    # However, there is no guarantee that they will have the _same_ mapping in that subcontext
+    # as the other, existing mapping, and multiple conflicting mappings will raise an error.
+    dp["update"] = [
+        row for row in dp["update"] if not row["source"]["context"].endswith("indoor")
+    ]
+
+    for row in dp["update"]:
+        # Our source flows are already normalized to this form
+        row["source"]["context"] = context_mapping[row["source"]["context"]]
+
+    # for row in dp["update"]:
+    #     if row["source"]["name"] == "Particulates, > 10 um" and row["source"]["context"][0] == "air":
+    #         print(row)
+
+    return dp
+
+
+simapro_ecoinvent_glad_name_matching = partial(
+    transform_and_then_match,
+    transform_source_flows=[
+        partial(
+            apply_randonneur,
+            datapackage=_get_normalized_matching(),
+            fields=["name", "context"],
+        )
+    ],
+    match_function=partial(
+        match_identical_names_lowercase,
+        function_name="simapro_ecoinvent_glad_name_matching",
+        comment="Shared normalized attributes after applying transformation: simapro-2025-biosphere-ef-3.1-biosphere-ecoinvent-3.12-biosphere-transitive",
+        match_condition=MatchCondition.related,
+    ),
+)
+simapro_ecoinvent_glad_name_matching.__name__ = "simapro_ecoinvent_glad_name_matching"
diff --git a/src/flowmapper/matching/specialized.py b/src/flowmapper/matching/specialized.py
new file mode 100644
index 0000000..fe13064
--- /dev/null
+++ b/src/flowmapper/matching/specialized.py
@@ -0,0 +1,330 @@
+"""Specialized matching functions.
+
+This module contains specialized matching functions for specific use cases
+like regionalized flows and suffix matching.
+"""
+
+from flowmapper.domain.match import Match
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.utils import toolz
+
+
+def add_missing_regionalized_flows(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+    function_name: str | None = None,
+    comment: str | None = None,
+    match_condition: MatchCondition | None = None,
+) -> list[Match]:
+    """Add missing regionalized flows based on existing regionalized flows.
+
+    If a source flow has a location and there are target flows with the same
+    name, context, and oxidation state but different locations, create a new
+    target flow for the source location.
+
+    The function groups source flows by (name, oxidation_state, context, location)
+    and for each group:
+    - If there are other regionalized target flows (same name/context/oxidation_state
+      but different location), uses the first one as a template
+    - Otherwise, if there is exactly one non-regionalized target flow (same
+      name/context/oxidation_state but no location), uses that as a template
+    - Creates a new target flow by copying the template and setting the source's
+      location using `copy_with_new_location`
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match. Only flows with a location are considered.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against.
+    function_name : str | None, optional
+        Name of the matching function (currently not used, defaults to
+        "add_missing_regionalized_flows").
+    comment : str | None, optional
+        Comment for matches (currently not used, defaults to a description of
+        the new target flow).
+    match_condition : MatchCondition | None, optional
+        Match condition (currently not used, defaults to MatchCondition.related).
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects with new_target_flow=True. Each match represents
+        a source flow matched to a newly created target flow.
+
+    Notes
+    -----
+    - Only source flows with a location are considered
+    - Target flows must be unit-compatible with source flows to create matches
+    - The new target flow is created using `copy_with_new_location`, which sets
+      a new UUID identifier
+    - All matches are created with `MatchCondition.related` and
+      `new_target_flow=True`
+
+    Examples
+    --------
+    >>> source = NormalizedFlow.from_dict({
+    ...     "name": "Carbon dioxide, NL",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> target = NormalizedFlow.from_dict({
+    ...     "name": "Carbon dioxide, DE",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> matches = add_missing_regionalized_flows(
+    ...     source_flows=[source],
+    ...     target_flows=[target]
+    ... )
+    >>> len(matches)
+    1
+    >>> matches[0].new_target_flow
+    True
+    """
+    matches = []
+
+    for (name, oxidation_state, context, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.oxidation_state, x.context, x.location),
+        filter(lambda x: x.location, source_flows),
+    ).items():
+        other_regions = [
+            flow
+            for flow in target_flows
+            if flow.name == name
+            and flow.context == context
+            and flow.oxidation_state == oxidation_state
+            and flow.location
+            and flow.location != location
+        ]
+        non_regionalized = [
+            flow
+            for flow in target_flows
+            if flow.name == name
+            and flow.context == context
+            and flow.oxidation_state == oxidation_state
+            and flow.location is None
+        ]
+
+        if other_regions:
+            target = other_regions[0]
+
+            for source in sources:
+                if source.unit_compatible(target):
+                    source.matched = True
+                    matches.append(
+                        Match(
+                            source=source.original,
+                            target=target.original.copy_with_new_location(
+                                location=location
+                            ),
+                            function_name="add_missing_regionalized_flows",
+                            comment=f"Added new target flow for location {location}, with shared name, context, and oxidation state",
+                            condition=MatchCondition.related,
+                            conversion_factor=source.conversion_factor(target),
+                            new_target_flow=True,
+                        )
+                    )
+        elif len(non_regionalized) == 1:
+            target = non_regionalized[0]
+
+            for source in sources:
+                if source.unit_compatible(target):
+                    source.matched = True
+                    matches.append(
+                        Match(
+                            source=source.original,
+                            target=target.original.copy_with_new_location(
+                                location=location
+                            ),
+                            function_name="add_missing_regionalized_flows",
+                            comment=f"Added new target flow for location {location}, with shared name, context, and oxidation state",
+                            condition=MatchCondition.related,
+                            conversion_factor=source.conversion_factor(target),
+                            new_target_flow=True,
+                        )
+                    )
+
+    return matches
+
+
+def equivalent_names(a: str, b: str) -> bool:
+    """Check if two flow names are equivalent after removing certain suffixes.
+
+    This function determines if two flow names represent the same substance by
+    checking if they differ only by specific suffixes that don't change the
+    fundamental identity of the flow. It handles two types of equivalences:
+
+    1. **Suffix removal**: Names are equivalent if one has a suffix and the
+       other doesn't, but the base names match. Supported suffixes:
+       - ", in ground"
+       - ", ion"
+       - ", in air"
+       - ", in water"
+       - ", unspecified origin"
+
+    2. **Biogenic/non-fossil equivalence**: Names ending with ", biogenic" and
+       ", non-fossil" are considered equivalent if the base names match.
+
+    Parameters
+    ----------
+    a : str
+        First flow name to compare.
+    b : str
+        Second flow name to compare.
+
+    Returns
+    -------
+    bool
+        True if the names are equivalent (differ only by supported suffixes),
+        False otherwise.
+
+    Notes
+    -----
+    - The function is case-sensitive for the base name comparison
+    - Suffix matching is exact (must match the full suffix string)
+    - For biogenic/non-fossil equivalence, the base names must match exactly
+      after removing the respective suffixes (10 chars for ", biogenic" and
+      12 chars for ", non-fossil")
+    - The ", ion" suffix is safe to ignore because matching functions also
+      check for matching oxidation states, ensuring correct matching
+
+    Examples
+    --------
+    >>> equivalent_names("Carbon dioxide, in air", "Carbon dioxide")
+    True
+    >>> equivalent_names("Carbon dioxide", "Carbon dioxide, in air")
+    True
+    >>> equivalent_names("Carbon dioxide, in ground", "Carbon dioxide, in air")
+    False
+    >>> equivalent_names("Methane, biogenic", "Methane, non-fossil")
+    True
+    >>> equivalent_names("Carbon dioxide, ion", "Carbon dioxide")
+    True
+    >>> equivalent_names("Carbon dioxide", "Carbon monoxide")
+    False
+    """
+    suffixes = [
+        ", in ground",
+        ", ion",  # OK because we still check for single match and matching oxidation state
+        ", in air",
+        ", in water",
+        ", unspecified origin",
+    ]
+    for suffix in suffixes:
+        if a.endswith(suffix) and not b.endswith(suffix) and a[: -len(suffix)] == b:
+            return True
+        if b.endswith(suffix) and not a.endswith(suffix) and b[: -len(suffix)] == a:
+            return True
+    if a.endswith(", biogenic") and b.endswith(", non-fossil") and a[:-10] == b[:-12]:
+        return True
+    if b.endswith(", biogenic") and a.endswith(", non-fossil") and b[:-10] == a[:-12]:
+        return True
+    return False
+
+
+def match_names_with_suffix_removal(
+    source_flows: list[NormalizedFlow],
+    target_flows: list[NormalizedFlow],
+    function_name: str | None = None,
+    comment: str | None = None,
+    match_condition: MatchCondition | None = None,
+) -> list[Match]:
+    """Match flows where names are equivalent after removing certain suffixes.
+
+    This function matches source and target flows where the names are considered
+    equivalent by `equivalent_names`, meaning they differ only by supported
+    suffixes (e.g., ", in air", ", in ground", ", ion", ", biogenic"/", non-fossil").
+    In addition to name equivalence, flows must also have matching:
+    - Context
+    - Oxidation state
+    - Location
+
+    The function groups source flows by (name, context, oxidation_state, location)
+    and for each group, finds target flows with equivalent names (using
+    `equivalent_names`) and matching attributes.
+
+    Parameters
+    ----------
+    source_flows : list[NormalizedFlow]
+        List of source flows to match. Flows are grouped by name, context,
+        oxidation state, and location.
+    target_flows : list[NormalizedFlow]
+        List of target flows to match against. Only flows with equivalent names
+        and matching attributes are considered.
+    function_name : str | None, optional
+        Name of the matching function. Defaults to "match_names_with_suffix_removal".
+    comment : str | None, optional
+        Comment for matches. Defaults to a descriptive string about suffix removal.
+    match_condition : MatchCondition | None, optional
+        The match quality condition. Defaults to MatchCondition.close.
+
+    Returns
+    -------
+    list[Match]
+        List of Match objects representing successful matches. Each match has
+        a source flow and target flow with equivalent names (after suffix removal)
+        and matching context, oxidation state, and location.
+
+    Notes
+    -----
+    - Names are compared in lowercase for matching
+    - Only unit-compatible flows are matched (handled by `get_matches`)
+    - The function uses `equivalent_names` to determine name equivalence
+    - Supported suffixes include: ", in ground", ", ion", ", in air", ", in water",
+      ", unspecified origin", and the biogenic/non-fossil pair
+    - If multiple target flows match, `get_matches` handles resolution based on
+      context matching
+
+    Examples
+    --------
+    >>> from flowmapper.domain.normalized_flow import NormalizedFlow
+    >>> from flowmapper.matching.specialized import match_names_with_suffix_removal
+    >>>
+    >>> source = NormalizedFlow.from_dict({
+    ...     "name": "Carbon dioxide, in air",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> target = NormalizedFlow.from_dict({
+    ...     "name": "Carbon dioxide",
+    ...     "context": "air",
+    ...     "unit": "kg"
+    ... })
+    >>> matches = match_names_with_suffix_removal(
+    ...     source_flows=[source],
+    ...     target_flows=[target]
+    ... )
+    >>> len(matches)
+    1
+    >>> matches[0].condition
+    MatchCondition.close
+    """
+    from flowmapper.matching.core import get_matches
+
+    matches = []
+
+    for (name, context, oxidation_state, location), sources in toolz.itertoolz.groupby(
+        lambda x: (x.name, x.context, x.oxidation_state, x.location), source_flows
+    ).items():
+        name = name.lower()
+        matches.extend(
+            get_matches(
+                source_flows=sources,
+                target_flows=[
+                    flow
+                    for flow in target_flows
+                    if equivalent_names(name, flow.name.lower())
+                    and flow.context == context
+                    and flow.oxidation_state == oxidation_state
+                    and flow.location == location
+                ],
+                comment=comment
+                or f"Shared normalized lowercase name with suffix removed and identical context, oxidation state, and location: {name}",
+                function_name=function_name or "match_names_with_suffix_removal",
+                match_condition=match_condition or MatchCondition.close,
+            )
+        )
+
+    return matches
diff --git a/flowmapper/preferred_synonyms.py b/src/flowmapper/preferred_synonyms.py
similarity index 57%
rename from flowmapper/preferred_synonyms.py
rename to src/flowmapper/preferred_synonyms.py
index c678fd9..c3c2b01 100644
--- a/flowmapper/preferred_synonyms.py
+++ b/src/flowmapper/preferred_synonyms.py
@@ -1,6 +1,6 @@
 import re
 
-from flowmapper.flow import Flow
+from flowmapper.domain.flow import Flow
 
 ROMAN_NUMERAL_PATTERN = re.compile(r"\b\(?[ivx]+[\+-]?\)?\s*$", flags=re.IGNORECASE)
 PARENTHESES_PATTERN = re.compile(r"\([1-9]+[\+-]?\)\s*$")
@@ -35,24 +35,48 @@ def has_number_pattern_at_end(text: str) -> bool:
 
 
 def match_identical_names_in_preferred_synonyms(
-    s: Flow, t: Flow, comment: str = "Identical preferred synonyms"
+    source_flows: list[Flow],
+    target_flows: list[Flow],
+    comment: str = "Identical preferred synonyms",
 ):
     if t.synonyms and s.name in t.synonyms and s.context == t.context:
         if s.name.normalized in t.name.normalized and (
             has_roman_numeral_at_end(t.name.normalized)
             or has_number_pattern_at_end(t.name.normalized)
         ):
+            # Check if there's another target flow with a different name that shares the same synonym
+            for other_target in all_target_flows:
+                if (
+                    other_target is not t
+                    and other_target.name.normalized != t.name.normalized
+                    and other_target.synonyms
+                    and s.name in other_target.synonyms
+                    and other_target.context == s.context
+                ):
+                    return None
             return {"comment": comment}
     elif s.synonyms and t.name in s.synonyms and s.context == t.context:
         if t.name.normalized in s.name.normalized and (
             has_roman_numeral_at_end(s.name.normalized)
             or has_number_pattern_at_end(s.name.normalized)
         ):
+            # Check if there's another target flow with a different name that shares the same synonym
+            for other_target in all_target_flows:
+                if (
+                    other_target is not t
+                    and other_target.name.normalized != t.name.normalized
+                    and other_target.synonyms
+                    and t.name in other_target.synonyms
+                    and other_target.context == s.context
+                ):
+                    return None
             return {"comment": comment}
 
 
 def match_identical_names_in_synonyms(
-    s: Flow, t: Flow, comment: str = "Identical synonyms"
+    source_flows: list[Flow],
+    target_flows: list[Flow],
+    comment: str = "Identical synonyms",
 ):
     if (t.synonyms and s.name in t.synonyms and s.context == t.context) or (
         s.synonyms and t.name in s.synonyms and s.context == t.context
diff --git a/src/flowmapper/unit.py b/src/flowmapper/unit.py
new file mode 100644
index 0000000..95e4746
--- /dev/null
+++ b/src/flowmapper/unit.py
@@ -0,0 +1,66 @@
+import importlib.resources as resource
+import json
+import math
+from collections import UserString
+from pathlib import Path
+from typing import Any, Self
+
+from pint import UnitRegistry, errors
+
+from flowmapper.utils import normalize_str
+
+ureg = UnitRegistry()
+
+with resource.as_file(resource.files("flowmapper") / "data" / "units.txt") as filepath:
+    ureg.load_definitions(filepath)
+
+with open(Path(__file__).parent / "data" / "standard-units-harmonization.json") as f:
+    UNIT_MAPPING = {
+        line["source"]["unit"]: line["target"]["unit"]
+        for line in json.load(f)["update"]
+    }
+
+
+class UnitField(UserString):
+    def normalize(self) -> Self:
+        """Normalize string to fit into our `pint` definitions"""
+        label = normalize_str(self.data)
+        if label in UNIT_MAPPING:
+            label = UNIT_MAPPING[label]
+        try:
+            ureg(label)
+        except errors.UndefinedUnitError:
+            raise ValueError(
+                f"Unit {label} is unknown; add to flowmapper `units.txt` or define a mapping in `unit-mapping.json`"
+            )
+        # Makes type checkers happy, if inelegant...
+        return type(self)(label)
+
+    def is_uri(self, value: str) -> bool:
+        # Placeholder for when we support glossary entries
+        return False
+
+    def resolve_uri(self, uri: str) -> None:
+        # Placeholder
+        pass
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, UnitField):
+            return self.data == other.data or self.conversion_factor(other) == 1
+        else:
+            return self.data == other
+
+    def compatible(self, other: Any) -> bool:
+        return math.isfinite(self.conversion_factor(other))
+
+    def conversion_factor(self, to: Any) -> float:
+        if not isinstance(to, (UnitField, str)):
+            result = float("nan")
+        elif isinstance(to, UnitField) and self.data == to.data:
+            result = 1.0
+        else:
+            try:
+                result = ureg(self.data).to(ureg(str(to))).magnitude
+            except (errors.DimensionalityError, errors.UndefinedUnitError):
+                result = float("nan")
+        return result
diff --git a/src/flowmapper/utils/__init__.py b/src/flowmapper/utils/__init__.py
new file mode 100644
index 0000000..c030053
--- /dev/null
+++ b/src/flowmapper/utils/__init__.py
@@ -0,0 +1,57 @@
+"""Utility functions for flowmapper.
+
+This package contains utility functions organized by functionality:
+- context: Context-related utilities
+- strings: String manipulation utilities
+- flow_names: Flow name processing
+- randonneur: Randonneur-based transformations
+- files: File I/O utilities
+- constants: Shared constants and data
+"""
+
+from flowmapper.utils.constants import (
+    RESULTS_DIR,
+    default_registry,
+    logger,
+    names_and_locations,
+    toolz,
+)
+from flowmapper.utils.context import (
+    MISSING_VALUES,
+    as_normalized_tuple,
+    tupleize_context,
+)
+from flowmapper.utils.files import load_standard_transformations, read_migration_files
+from flowmapper.utils.flow_names import remove_unit_slash, unit_slash
+from flowmapper.utils.randonneur import (
+    apply_randonneur,
+    apply_transformation_and_convert_flows_to_normalized_flows,
+    randonneur_as_function,
+)
+from flowmapper.utils.strings import normalize_str, rowercase
+
+__all__ = [
+    # Constants
+    "RESULTS_DIR",
+    "default_registry",
+    "logger",
+    "names_and_locations",
+    "toolz",
+    # Context
+    "MISSING_VALUES",
+    "as_normalized_tuple",
+    "tupleize_context",
+    # Strings
+    "normalize_str",
+    "rowercase",
+    # Flow names
+    "remove_unit_slash",
+    "unit_slash",
+    # Randonneur
+    "apply_transformation_and_convert_flows_to_normalized_flows",
+    "apply_randonneur",
+    "randonneur_as_function",
+    # Files
+    "load_standard_transformations",
+    "read_migration_files",
+]
diff --git a/src/flowmapper/utils/constants.py b/src/flowmapper/utils/constants.py
new file mode 100644
index 0000000..6a83165
--- /dev/null
+++ b/src/flowmapper/utils/constants.py
@@ -0,0 +1,25 @@
+"""Shared constants and data for flowmapper utilities."""
+
+import importlib.resources as resource
+import json
+from pathlib import Path
+
+import structlog
+from randonneur_data import Registry
+
+logger = structlog.get_logger("flowmapper")
+default_registry = Registry()
+RESULTS_DIR = Path(__file__).parent.parent / "manual_matching" / "results"
+
+with resource.as_file(
+    resource.files("flowmapper") / "data" / "names_and_locations.json"
+) as filepath:
+    names_and_locations = {o["source"]: o for o in json.load(open(filepath))}
+
+try:
+    import cytoolz as toolz
+except ImportError:
+    logger.info("Install `cytoolz` to get a speed up in matching functions")
+    import toolz
+
+assert toolz  # Do not delete the import call stupid linter
diff --git a/src/flowmapper/utils/context.py b/src/flowmapper/utils/context.py
new file mode 100644
index 0000000..191e4f2
--- /dev/null
+++ b/src/flowmapper/utils/context.py
@@ -0,0 +1,72 @@
+"""Context-related utility functions."""
+
+from typing import Any
+
+MISSING_VALUES = {
+    "",
+    "(unknown)",
+    "(unspecified)",
+    "null",
+    "unknown",
+    "unspecified",
+}
+
+
+def as_normalized_tuple(value: Any) -> tuple[str]:
+    """Convert context inputs to normalized tuple form."""
+    if isinstance(value, (tuple, list)):
+        intermediate = value
+    elif isinstance(value, str) and "/" in value:
+        intermediate = list(value.split("/"))
+    elif isinstance(value, str):
+        intermediate = [value]
+    else:
+        raise ValueError(f"Can't understand input context {value}")
+
+    intermediate = [elem.lower().strip() for elem in intermediate]
+
+    while intermediate and intermediate[-1] in MISSING_VALUES:
+        if len(intermediate) == 1:
+            break
+        intermediate = intermediate[:-1]
+
+    return tuple(intermediate)
+
+
+def tupleize_context(obj: dict) -> dict:
+    """Convert `context` value to `tuple` if possible.
+
+    Handles both individual migration objects and full datapackage structures.
+    For datapackages, iterates through verb keys (like "update", "create") and
+    processes all migration objects in those lists.
+    """
+    # Handle datapackage structure with verb keys (update, create, etc.)
+    if isinstance(obj, dict):
+        # Check if this looks like a datapackage (has verb keys with lists)
+        verb_keys = ["update", "create", "delete", "rename"]
+        has_verb_keys = any(
+            key in obj and isinstance(obj[key], list) for key in verb_keys
+        )
+
+        if has_verb_keys:
+            # This is a datapackage - process each verb's list
+            for verb in verb_keys:
+                if verb in obj and isinstance(obj[verb], list):
+                    for migration_obj in obj[verb]:
+                        if isinstance(migration_obj, dict):
+                            tupleize_context(migration_obj)
+            return obj
+
+    # Handle individual migration object or dict with context
+    if isinstance(obj, dict):
+        # Process top-level context if present
+        if "context" in obj and not isinstance(obj["context"], str):
+            obj["context"] = as_normalized_tuple(obj["context"])
+
+        # Recursively process source and target
+        if isinstance(obj.get("source"), dict):
+            tupleize_context(obj["source"])
+        if isinstance(obj.get("target"), dict):
+            tupleize_context(obj["target"])
+
+    return obj
diff --git a/src/flowmapper/utils/files.py b/src/flowmapper/utils/files.py
new file mode 100644
index 0000000..d135101
--- /dev/null
+++ b/src/flowmapper/utils/files.py
@@ -0,0 +1,51 @@
+"""File I/O utility functions."""
+
+import importlib.resources as resource
+import json
+from pathlib import Path
+
+from flowmapper.utils.constants import RESULTS_DIR
+
+
+def load_standard_transformations() -> list:
+    """Load standard transformation files."""
+    # with resource.as_file(
+    #     resource.files("flowmapper") / "data" / "standard-units-harmonization.json"
+    # ) as filepath:
+    #     units = json.load(open(filepath))
+    with resource.as_file(
+        resource.files("flowmapper") / "data" / "simapro-2023-ecoinvent-3-contexts.json"
+    ) as filepath:
+        contexts = json.load(open(filepath))
+    # return [units, contexts]
+    return [contexts]
+
+
+def read_migration_files(*filepaths: str | Path) -> list[dict]:
+    """
+    Read and aggregate migration data from multiple JSON files.
+
+    This function opens and reads a series of JSON files, each containing migration data as a list of dicts without the change type.
+    It aggregates all changes into a single list and returns it wrapped in a dictionary
+    under the change type 'update'.
+
+    Parameters
+    ----------
+    *filepaths : Path
+        Variable length argument list of Path objects.
+
+    Returns
+    -------
+    dict
+        A dictionary containing a single key 'update', which maps to a list. This list is
+        an aggregation of the data from all the JSON files read.
+    """
+    migration_data = []
+
+    for filepath in filepaths:
+        if (RESULTS_DIR / filepath).is_file():
+            filepath = RESULTS_DIR / filepath
+        with open(Path(filepath)) as fs:
+            migration_data.append(json.load(fs))
+
+    return migration_data
diff --git a/src/flowmapper/utils/flow_names.py b/src/flowmapper/utils/flow_names.py
new file mode 100644
index 0000000..6391410
--- /dev/null
+++ b/src/flowmapper/utils/flow_names.py
@@ -0,0 +1,31 @@
+"""Flow name processing utility functions."""
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+import structlog
+
+if TYPE_CHECKING:
+    from flowmapper.domain.flow import Flow
+
+logger = structlog.get_logger("flowmapper")
+
+unit_slash = re.compile(r"/(?P<unit>m3|kg)(\,?\s+|\s+|$)")
+
+
+def remove_unit_slash(obj: Flow) -> str:
+    """Remove unit references from flow names that appear as '/unit' suffix."""
+    name = obj.name.data
+    if match := unit_slash.search(name):
+        obj_dict = match.groupdict()
+        if match.end() == len(name):
+            name = name[: match.start()]
+        else:
+            name = name[: match.start()] + ", " + name[match.end() :]
+        if not obj.unit.compatible(obj_dict["unit"]):
+            logger.warning(
+                f"Flow {obj} has unit '{obj.unit}' but name refers to incompatible unit '{obj_dict['unit']}'"
+            )
+    return name
diff --git a/src/flowmapper/utils/randonneur.py b/src/flowmapper/utils/randonneur.py
new file mode 100644
index 0000000..30ee56d
--- /dev/null
+++ b/src/flowmapper/utils/randonneur.py
@@ -0,0 +1,148 @@
+"""Randonneur-based transformation utility functions."""
+
+from __future__ import annotations
+
+import copy
+from collections.abc import Callable
+from functools import partial
+from typing import TYPE_CHECKING
+
+from randonneur import Datapackage, MigrationConfig, migrate_nodes
+from randonneur_data import Registry
+
+from flowmapper.utils.constants import default_registry
+from flowmapper.utils.context import tupleize_context
+
+if TYPE_CHECKING:
+    from flowmapper.domain.flow import Flow
+    from flowmapper.domain.normalized_flow import NormalizedFlow
+
+
+def randonneur_as_function(
+    datapackage: str | Datapackage | dict,
+    fields: list[str] | None = None,
+    registry: Registry | None = None,
+    verbs: list[str] | None = None,
+) -> Callable:
+    """Take a prepared transformation in"""
+    if registry is None:
+        registry = default_registry
+    if verbs is None:
+        verbs = ["update"]
+
+    if isinstance(datapackage, Datapackage):
+        datapackage = datapackage.data
+    elif isinstance(datapackage, str):
+        datapackage = registry.get_file(datapackage)
+    elif "update" not in datapackage:
+        raise KeyError
+
+    return partial(
+        migrate_nodes,
+        migrations=tupleize_context(datapackage),
+        config=MigrationConfig(
+            verbs=verbs,
+            case_sensitive=(
+                False
+                if "case-insensitive" not in datapackage
+                else not datapackage.get("case-insensitive")
+            ),
+            fields=fields,
+            add_conversion_factor_to_nodes=True,
+        ),
+    )
+
+
+def apply_randonneur(
+    flows: list[NormalizedFlow],
+    datapackage: str | Datapackage | dict,
+    fields: list[str] | None = None,
+    registry: Registry | None = None,
+    normalize: bool = False,
+) -> list[NormalizedFlow]:
+    """Apply randonneur transformations to NormalizedFlow objects."""
+    from flowmapper.domain.flow import Flow
+
+    func = randonneur_as_function(
+        datapackage=datapackage, fields=fields, registry=registry
+    )
+    transformed_data = func(graph=[nf.normalized.to_dict() for nf in flows])
+
+    for flow, data_dict in zip(flows, transformed_data):
+        if normalize:
+            flow.current = Flow.from_dict(data_dict).normalize()
+        else:
+            flow.current = Flow.from_dict(data_dict)
+
+    return flows
+
+
+def apply_transformation_and_convert_flows_to_normalized_flows(
+    functions: list[Callable[..., list[NormalizedFlow]]], flows: list[Flow]
+) -> list[NormalizedFlow]:
+    """
+    Apply a series of transformation functions to flows and return NormalizedFlow objects.
+
+    This function takes a list of Flow objects and applies a sequence of transformation
+    functions to them. Each transformation function receives the flow data as dictionaries
+    (via the `graph` keyword argument) and returns modified dictionaries. The transformations
+    are applied sequentially, with each function receiving the output of the previous one.
+
+    After all transformations are applied, the modified flow dictionaries are converted back
+    to Flow objects, normalized, and wrapped in NormalizedFlow objects. The original Flow
+    objects are preserved and stored in the `original` attribute of each NormalizedFlow.
+
+    Parameters
+    ----------
+    functions : list[Callable[..., list[dict]]]
+        List of transformation functions to apply sequentially. Each function must accept
+        a `graph` keyword argument containing a list of flow dictionaries and return a
+        list of modified flow dictionaries. Functions are typically created using
+        `randonneur_as_function()`.
+    flows : list[Flow]
+        List of Flow objects to transform. The original Flow objects are not modified.
+
+    Returns
+    -------
+    list[NormalizedFlow]
+        List of NormalizedFlow objects, one for each input flow. Each NormalizedFlow contains:
+        - `original`: The original Flow object (unchanged)
+        - `normalized`: The transformed and normalized Flow object
+        - `current`: A copy of the normalized Flow object
+
+    Examples
+    --------
+    >>> from flowmapper.domain.flow import Flow
+    >>> from flowmapper.utils import apply_transformation_and_convert_flows_to_normalized_flows, randonneur_as_function
+    >>>
+    >>> # Create a transformation function
+    >>> transform_func = randonneur_as_function(datapackage="some-transformation")
+    >>>
+    >>> # Create flows
+    >>> flows = [
+    ...     Flow.from_dict({"name": "Carbon dioxide", "context": "air", "unit": "kg"})
+    ... ]
+    >>>
+    >>> # Apply transformations
+    >>> normalized_flows = apply_transformation_and_convert_flows_to_normalized_flows(
+    ...     functions=[transform_func],
+    ...     flows=flows
+    ... )
+    >>>
+    >>> # Access transformed data
+    >>> print(normalized_flows[0].normalized.name.data)
+    """
+    from flowmapper.domain.flow import Flow
+    from flowmapper.domain.normalized_flow import NormalizedFlow
+
+    flow_dicts = [obj.to_dict() for obj in flows]
+
+    for function in functions:
+        flow_dicts = function(graph=flow_dicts)
+
+    normalized_flows = [Flow.from_dict(obj).normalize() for obj in flow_dicts]
+
+    return [
+        NormalizedFlow(original=o, normalized=n, current=copy.copy(n))
+        for o, n in zip(flows, normalized_flows)
+    ]
diff --git a/src/flowmapper/utils/strings.py b/src/flowmapper/utils/strings.py
new file mode 100644
index 0000000..da25486
--- /dev/null
+++ b/src/flowmapper/utils/strings.py
@@ -0,0 +1,25 @@
+"""String manipulation utility functions."""
+
+import unicodedata
+from collections.abc import Collection, Mapping
+from typing import Any
+
+
+def normalize_str(s: Any) -> str:
+    """Normalize a string using Unicode NFC normalization and strip whitespace."""
+    if s is not None:
+        return unicodedata.normalize("NFC", s).strip()
+    else:
+        return ""
+
+
+def rowercase(obj: Any) -> Any:
+    """Recursively transform everything to lower case recursively."""
+    if isinstance(obj, str):
+        return obj.lower()
+    elif isinstance(obj, Mapping):
+        return type(obj)([(rowercase(k), rowercase(v)) for k, v in obj.items()])
+    elif isinstance(obj, Collection):
+        return type(obj)([rowercase(o) for o in obj])
+    else:
+        return obj
diff --git a/tests/conftest.py b/tests/conftest.py
index 4e71bfc..59717d4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,16 +1 @@
 """Fixtures for flowmapper"""
-
-import pytest
-
-from flowmapper.flow import Flow
-from flowmapper.transformation_mapping import prepare_transformations
-from flowmapper.utils import (
-    apply_transformations,
-    load_standard_transformations,
-    read_migration_files,
-)
-
-
-@pytest.fixture
-def transformations():
-    return prepare_transformations(load_standard_transformations())
diff --git a/tests/data/ei-3.10.json b/tests/data/ei-3.10.json
index d9bc5c9..b62837e 100644
--- a/tests/data/ei-3.10.json
+++ b/tests/data/ei-3.10.json
@@ -1,9 +1,9 @@
 [
   {
     "identifier": "b6b4201e-0561-5992-912f-e729fbf04e41",
-    "CAS number": "002008-39-1",
+    "cas_number": "002008-39-1",
     "name": "2,4-D dimethylamine salt",
     "unit": "kg",
     "context": ["air", "non-urban air or from high stacks"]
   }
-]
\ No newline at end of file
+]
diff --git a/tests/data/ei-3.7.json b/tests/data/ei-3.7.json
index f338d95..dec80c2 100644
--- a/tests/data/ei-3.7.json
+++ b/tests/data/ei-3.7.json
@@ -1,7 +1,7 @@
 [
   {
     "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-    "CAS number": "000110-63-4",
+    "cas_number": "000110-63-4",
     "name": "1,4-Butanediol",
     "unit": "kg",
     "context": ["air", "unspecified"],
@@ -9,9 +9,9 @@
   },
   {
     "identifier": "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-    "CAS number": "007664-41-7",
+    "cas_number": "007664-41-7",
     "name": "Ammonia",
     "unit": "kg",
     "context": ["air", "non-urban air or from high stacks"]
   }
-]
\ No newline at end of file
+]
diff --git a/tests/data/ei-3.9.json b/tests/data/ei-3.9.json
index d07c8a7..6d2c1e1 100644
--- a/tests/data/ei-3.9.json
+++ b/tests/data/ei-3.9.json
@@ -2,7 +2,7 @@
   {
     "identifier": "4f777e05-70f9-4a18-a406-d8232325073f",
     "formula": "C10H13Cl2NO3",
-    "CAS number": "002008-39-1",
+    "cas_number": "002008-39-1",
     "name": "2,4-D amines",
     "unit": "kg",
     "context": ["air", "non-urban air or from high stacks"],
@@ -12,4 +12,4 @@
       "N-methylmethanamine"
     ]
   }
-]
\ No newline at end of file
+]
diff --git a/tests/data/sp.json b/tests/data/sp.json
index d042aaa..4a9bef8 100644
--- a/tests/data/sp.json
+++ b/tests/data/sp.json
@@ -3,19 +3,19 @@
       "name": "1,4-Butanediol",
       "context": "air",
       "unit": "kg",
-      "CAS number": "000110-63-4"
+      "cas_number": "000110-63-4"
   },
   {
       "name": "1,4-Butanediol",
       "context": "air",
       "unit": "kg",
-      "CAS number": "000110-63-4"
+      "cas_number": "000110-63-4"
   },
   {
       "name": "1,4-Butanediol",
       "context": "air/high. pop.",
       "unit": "kg",
-      "CAS number": "000110-63-4"
+      "cas_number": "000110-63-4"
   },
   {
     "name": "Cesium-134",
diff --git a/tests/test_cas.py b/tests/test_cas.py
deleted file mode 100644
index 5b5a468..0000000
--- a/tests/test_cas.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import pytest
-
-from flowmapper.cas import CASField
-
-
-def test_cas_init():
-    cas = CASField("0000096-49-1")
-    assert cas.original == "0000096-49-1"
-    assert cas.transformed == "96-49-1"
-    assert cas.digits == (9, 6, 4, 9, 1)
-
-
-def test_cas_init_empty_string():
-    cas = CASField("")
-    assert cas.original == ""
-    assert cas.transformed == ""
-    assert cas.digits == ()
-
-
-def test_cas_init_none():
-    cas = CASField(None)
-    assert cas.original is None
-    assert cas.transformed == ""
-    assert cas.digits == ()
-
-
-def test_cas_init_error():
-    with pytest.raises(TypeError):
-        CASField(96491)
-
-
-def test_cas_export():
-    assert CASField("7782-40-3").export == "7782-40-3"
-    assert CASField("7782403").export == "7782-40-3"
-    assert CASField("0007782403").export == "7782-40-3"
-    assert CASField("").export == ""
-    assert CASField(None).export == ""
-
-
-def test_invalid_cas_check_digit():
-    assert not CASField("96-49-2").valid
-    assert CASField("96-49-2").check_digit_expected == 1
-
-
-def test_cas_repr():
-    repr(CASField("0000096-49-1")) == "Valid CASField: '0000096-49-1' -> '96-49-1'"
-    repr(CASField("0000096-49-2")) == "Invalid CASField: '0000096-49-2' -> '96-49-2'"
-    repr(CASField("")) == "CASField with missing original value"
-
-
-def test_equality_comparison():
-    assert CASField("\t\n\n007440-05-3") == CASField("7440-05-3")
-    assert CASField("7440-05-3") == "0007440-05-3"
-    assert CASField("7440-05-3") == "7440-05-3"
-    assert not CASField("7440-05-3") == "7782-40-3"
-    assert not CASField("7440-05-3") == CASField("7782-40-3")
-    assert not CASField("") == CASField("7782-40-3")
-    assert not CASField("7440-05-3") == CASField("")
-    assert not CASField("") == CASField("")
-    assert not CASField(None) == CASField("")
-    assert not CASField("") == CASField(None)
diff --git a/tests/test_cli.py b/tests/test_cli.py
deleted file mode 100644
index 305b05c..0000000
--- a/tests/test_cli.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import json
-
-import pandas as pd
-import pytest
-from typer.testing import CliRunner
-
-from flowmapper.cli import app
-
-runner = CliRunner()
-
-
-def test_version():
-    result = runner.invoke(app, ["--version"])
-    assert result.output.startswith("flowmapper, version")
-
-
-def test_format_glad(tmp_path):
-    result = runner.invoke(
-        app,
-        [
-            "map",
-            "tests/data/sp.json",
-            "tests/data/ei-3.7.json",
-            "--format",
-            "glad",
-            "--output-dir",
-            str(tmp_path),
-        ],
-    )
-    expected_files = sorted(
-        [
-            tmp_path / "sp-ei-3.7.xlsx",
-            tmp_path / "sp-ei-3.7-unmatched-source.json",
-            tmp_path / "sp-ei-3.7-unmatched-target.json",
-        ]
-    )
-
-    files = sorted(tmp_path.glob("**/*"))
-
-    assert result.exit_code == 0
-    assert expected_files == files
-
-
-def test_format_randonneur(tmp_path):
-    result = runner.invoke(
-        app,
-        [
-            "map",
-            "tests/data/sp.json",
-            "tests/data/ei-3.7.json",
-            "--format",
-            "randonneur",
-            "--output-dir",
-            str(tmp_path),
-        ],
-    )
-    expected_files = sorted(
-        [
-            tmp_path / "sp-ei-3.7.json",
-            tmp_path / "sp-ei-3.7-unmatched-source.json",
-            tmp_path / "sp-ei-3.7-unmatched-target.json",
-        ]
-    )
-
-    files = sorted(tmp_path.glob("**/*"))
-
-    assert result.exit_code == 0
-    assert expected_files == files
-
-
-def test_matched_flows(tmp_path):
-    runner.invoke(
-        app,
-        [
-            "map",
-            "tests/data/sp.json",
-            "tests/data/ei-3.7.json",
-            "--matched-source",
-            "--matched-target",
-            "--output-dir",
-            str(tmp_path),
-        ],
-    )
-
-    with open(tmp_path / "sp-ei-3.7-matched-source.json") as fs:
-        actual = json.load(fs)
-
-    expected = [
-        {
-            "CAS number": "110-63-4",
-            "context": "air",
-            "name": "1,4-Butanediol",
-            "unit": "kg",
-        },
-        {"context": "air/low. pop.", "name": "Ammonia, FR", "unit": "kg"},
-    ]
-    assert actual == expected
-
-
-def test_matched_flows_with_randonneur_transformations(tmp_path):
-    runner.invoke(
-        app,
-        [
-            "map",
-            "tests/data/sp.json",
-            "tests/data/ei-3.7.json",
-            "--transformations",
-            "tests/data/transformations.json",
-            "--matched-source",
-            "--matched-target",
-            "--output-dir",
-            str(tmp_path),
-        ],
-    )
-
-    with open(tmp_path / "sp-ei-3.7-matched-source.json") as fs:
-        actual = json.load(fs)
-
-    expected = [
-        {
-            "CAS number": "110-63-4",
-            "context": "air",
-            "name": "1,4-Butanediol",
-            "unit": "kg",
-        },
-        {
-            "CAS number": "110-63-4",
-            "context": "air/high. pop.",
-            "name": "1,4-Butanediol",
-            "unit": "kg",
-        },
-        {"context": "air/low. pop.", "name": "Ammonia, FR", "unit": "kg"},
-        {"context": "air/low. pop.", "name": "Ammonia, as N", "unit": "kg"},
-    ]
-    assert actual == expected
-
-
-def test_matched_flows_with_multiple_randonneur_transformations(tmp_path):
-    runner.invoke(
-        app,
-        [
-            "map",
-            "tests/data/sp.json",
-            "tests/data/ei-3.7.json",
-            "--transformations",
-            "tests/data/transformations.json",
-            "--transformations",
-            "tests/data/migrations.json",
-            "--matched-source",
-            "--matched-target",
-            "--output-dir",
-            str(tmp_path),
-        ],
-    )
-
-    with open(tmp_path / "sp-ei-3.7-matched-source.json") as fs:
-        actual = json.load(fs)
-
-    expected = [
-        {
-            "name": "1,4-Butanediol",
-            "unit": "kg",
-            "context": "air",
-            "CAS number": "110-63-4",
-        },
-        {
-            "name": "1,4-Butanediol",
-            "unit": "kg",
-            "context": "air/high. pop.",
-            "CAS number": "110-63-4",
-        },
-        {"name": "Ammonia, FR", "unit": "kg", "context": "air/low. pop."},
-        {"name": "Ammonia, as N", "unit": "kg", "context": "air/low. pop."},
-    ]
-    assert actual == expected
diff --git a/tests/test_context.py b/tests/test_context.py
deleted file mode 100644
index e7eb83a..0000000
--- a/tests/test_context.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import pytest
-
-from flowmapper.context import MISSING_VALUES, ContextField
-
-
-def test_context_uses_transformed():
-    c = ContextField(
-        original="Raw/(unspecified)",
-        transformed=["Raw", "(unspecified)"],
-    )
-    assert c == ["Raw", "(unspecified)"]
-    assert c.transformed == ["Raw", "(unspecified)"]
-
-
-def test_context_transformed_from_tuple():
-    c = ContextField(
-        original="Raw/(unspecified)",
-        transformed=("Raw", "(unspecified)"),
-    )
-    assert c == ["Raw", "(unspecified)"]
-    assert c.transformed == ("Raw", "(unspecified)")
-
-
-def test_context_transformed_from_string_with_slash():
-    c = ContextField(
-        original="Raw/(unspecified)",
-        transformed="Raw/(unspecified)",
-    )
-    assert c == ["Raw", "(unspecified)"]
-    assert c.transformed == "Raw/(unspecified)"
-
-
-def test_context_transformed_from_string():
-    c = ContextField(
-        original="Raw/(unspecified)",
-        transformed="Raw",
-    )
-    assert c == ["Raw", "(unspecified)"]
-    assert c.transformed == "Raw"
-
-
-def test_context_transformed_not_given():
-    c = ContextField(
-        original="Raw/(unspecified)",
-    )
-    assert c == ["Raw", "(unspecified)"]
-    assert c.transformed == "Raw/(unspecified)"
-
-
-def test_context_normalize_tuple():
-    c = ContextField(
-        original=("Raw",),
-    )
-    assert c.normalized == ("raw",)
-
-
-def test_context_normalize_string_with_slash():
-    c = ContextField(
-        original="A/B",
-    )
-    assert c.normalized == ("a", "b")
-
-
-def test_context_normalize_string():
-    c = ContextField(
-        original="A-B",
-    )
-    assert c.normalized == ("a-b",)
-
-
-def test_context_normalize_error():
-    class Foo:
-        pass
-
-    with pytest.raises(ValueError):
-        ContextField(Foo())
-
-
-def test_context_normalize_lowercase():
-    c = ContextField(
-        original="A-B",
-    )
-    assert c.normalized == ("a-b",)
-
-
-def test_context_normalize_strip():
-    c = ContextField(
-        original=" A-B\t\n",
-    )
-    assert c.normalized == ("a-b",)
-
-
-@pytest.mark.parametrize("string", MISSING_VALUES)
-def test_context_missing_values(string):
-    c = ContextField(
-        original=("A", string),
-    )
-    assert c.original == ("A", string)
-    assert c.normalized == ("a",)
-
-
-def test_context_generic_dunder():
-    c = ContextField("A/B")
-    assert repr(c) == "ContextField: 'A/B' -> '('a', 'b')'"
-    assert repr(ContextField("")) == "ContextField: '' -> '()'"
-    assert bool(c)
-    assert isinstance(hash(c), int)
-    assert list(c) == ["a", "b"]
-
-
-def test_context_in():
-    a = ContextField("A")
-    b = ContextField("A/B")
-    assert b in a
-    assert a not in b
-
-
-def test_context_export_as_string():
-    assert ContextField(["A", "B"]).export_as_string() == "A✂️B"
-    assert ContextField("A/B").export_as_string() == "A/B"
-    c = ContextField("A/B")
-    c.original = {"A": "B"}
-    with pytest.raises(ValueError):
-        c.export_as_string()
diff --git a/tests/test_extract_ecospold2.py b/tests/test_extract_ecospold2.py
index d3d7851..e88bc4a 100644
--- a/tests/test_extract_ecospold2.py
+++ b/tests/test_extract_ecospold2.py
@@ -10,8 +10,14 @@ def test_remove_conflicting_synonyms_no_conflicts():
 
     result = remove_conflicting_synonyms(data)
 
-    assert result[0]["synonyms"] == ["water", "h2o"]
-    assert result[1]["synonyms"] == ["soil", "earth"]
+    assert result[0]["synonyms"] == [
+        "water",
+        "h2o",
+    ], f"Expected result[0]['synonyms'] to equal ['water', 'h2o'], but got {result[0]['synonyms']}"
+    assert result[1]["synonyms"] == [
+        "soil",
+        "earth",
+    ], f"Expected result[1]['synonyms'] to equal ['soil', 'earth'], but got {result[1]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_with_conflicts():
@@ -28,8 +34,12 @@ def test_remove_conflicting_synonyms_with_conflicts():
     result = remove_conflicting_synonyms(data)
 
     # "water" should be removed from flow_a's synonyms
-    assert result[0]["synonyms"] == ["h2o"]
-    assert result[1]["synonyms"] == ["aqua"]
+    assert result[0]["synonyms"] == [
+        "h2o"
+    ], f"Expected result[0]['synonyms'] to equal ['h2o'], but got {result[0]['synonyms']}"
+    assert result[1]["synonyms"] == [
+        "aqua"
+    ], f"Expected result[1]['synonyms'] to equal ['aqua'], but got {result[1]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_different_contexts():
@@ -46,8 +56,13 @@ def test_remove_conflicting_synonyms_different_contexts():
     result = remove_conflicting_synonyms(data)
 
     # "water" should be kept since contexts are different
-    assert result[0]["synonyms"] == ["water", "h2o"]
-    assert result[1]["synonyms"] == ["aqua"]
+    assert result[0]["synonyms"] == [
+        "water",
+        "h2o",
+    ], f"Expected result[0]['synonyms'] to equal ['water', 'h2o'], but got {result[0]['synonyms']}"
+    assert result[1]["synonyms"] == [
+        "aqua"
+    ], f"Expected result[1]['synonyms'] to equal ['aqua'], but got {result[1]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_multiple_conflicts():
@@ -61,9 +76,15 @@ def test_remove_conflicting_synonyms_multiple_conflicts():
     result = remove_conflicting_synonyms(data)
 
     # Both "water" and "soil" should be removed from flow_a's synonyms
-    assert result[0]["synonyms"] == ["h2o"]
-    assert result[1]["synonyms"] == ["aqua"]
-    assert result[2]["synonyms"] == ["earth"]
+    assert result[0]["synonyms"] == [
+        "h2o"
+    ], f"Expected result[0]['synonyms'] to equal ['h2o'], but got {result[0]['synonyms']}"
+    assert result[1]["synonyms"] == [
+        "aqua"
+    ], f"Expected result[1]['synonyms'] to equal ['aqua'], but got {result[1]['synonyms']}"
+    assert result[2]["synonyms"] == [
+        "earth"
+    ], f"Expected result[2]['synonyms'] to equal ['earth'], but got {result[2]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_no_synonyms():
@@ -80,8 +101,12 @@ def test_remove_conflicting_synonyms_no_synonyms():
     result = remove_conflicting_synonyms(data)
 
     # Should not raise error and flow_b should keep its synonym
-    assert "synonyms" not in result[0]
-    assert result[1]["synonyms"] == ["water"]
+    assert (
+        "synonyms" not in result[0]
+    ), "Expected 'synonyms' to not be in result[0], but it was"
+    assert result[1]["synonyms"] == [
+        "water"
+    ], f"Expected result[1]['synonyms'] to equal ['water'], but got {result[1]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_no_context():
@@ -98,8 +123,13 @@ def test_remove_conflicting_synonyms_no_context():
     result = remove_conflicting_synonyms(data)
 
     # flow_a should keep its synonyms since it has no context
-    assert result[0]["synonyms"] == ["water", "h2o"]
-    assert result[1]["synonyms"] == ["aqua"]
+    assert result[0]["synonyms"] == [
+        "water",
+        "h2o",
+    ], f"Expected result[0]['synonyms'] to equal ['water', 'h2o'], but got {result[0]['synonyms']}"
+    assert result[1]["synonyms"] == [
+        "aqua"
+    ], f"Expected result[1]['synonyms'] to equal ['aqua'], but got {result[1]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_empty_synonyms_list():
@@ -112,8 +142,12 @@ def test_remove_conflicting_synonyms_empty_synonyms_list():
     result = remove_conflicting_synonyms(data)
 
     # Empty synonyms list should remain empty
-    assert result[0]["synonyms"] == []
-    assert result[1]["synonyms"] == ["aqua"]
+    assert (
+        result[0]["synonyms"] == []
+    ), f"Expected result[0]['synonyms'] to equal [], but got {result[0]['synonyms']}"
+    assert result[1]["synonyms"] == [
+        "aqua"
+    ], f"Expected result[1]['synonyms'] to equal ['aqua'], but got {result[1]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_case_insensitive():
@@ -129,8 +163,12 @@ def test_remove_conflicting_synonyms_case_insensitive():
 
     result = remove_conflicting_synonyms(data)
 
-    assert result[0]["synonyms"] == ["H2O"]
-    assert result[1]["synonyms"] == ["aqua"]
+    assert result[0]["synonyms"] == [
+        "H2O"
+    ], f"Expected result[0]['synonyms'] to equal ['H2O'], but got {result[0]['synonyms']}"
+    assert result[1]["synonyms"] == [
+        "aqua"
+    ], f"Expected result[1]['synonyms'] to equal ['aqua'], but got {result[1]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_self_conflict():
@@ -142,7 +180,10 @@ def test_remove_conflicting_synonyms_self_conflict():
     result = remove_conflicting_synonyms(data)
 
     # All synonyms should be kept since they don't conflict with other flows
-    assert result[0]["synonyms"] == ["h2o", "aqua"]
+    assert result[0]["synonyms"] == [
+        "h2o",
+        "aqua",
+    ], f"Expected result[0]['synonyms'] to equal ['h2o', 'aqua'], but got {result[0]['synonyms']}"
 
 
 def test_remove_conflicting_synonyms_preserves_original_data():
@@ -167,14 +208,34 @@ def test_remove_conflicting_synonyms_preserves_original_data():
     result = remove_conflicting_synonyms(data)
 
     # Check that other fields are preserved
-    assert result[0]["name"] == "flow_a"
-    assert result[0]["context"] == ["ground"]
-    assert result[0]["unit"] == "kg"
-    assert result[0]["identifier"] == "123"
-    assert result[0]["synonyms"] == ["h2o"]  # Only "water" removed
-
-    assert result[1]["name"] == "water"
-    assert result[1]["context"] == ["ground"]
-    assert result[1]["unit"] == "m3"
-    assert result[1]["identifier"] == "456"
-    assert result[1]["synonyms"] == ["aqua"]
+    assert (
+        result[0]["name"] == "flow_a"
+    ), f"Expected result[0]['name'] to equal 'flow_a', but got {result[0]['name']!r}"
+    assert result[0]["context"] == [
+        "ground"
+    ], f"Expected result[0]['context'] to equal ['ground'], but got {result[0]['context']}"
+    assert (
+        result[0]["unit"] == "kg"
+    ), f"Expected result[0]['unit'] to equal 'kg', but got {result[0]['unit']!r}"
+    assert (
+        result[0]["identifier"] == "123"
+    ), f"Expected result[0]['identifier'] to equal '123', but got {result[0]['identifier']!r}"
+    assert result[0]["synonyms"] == [
+        "h2o"
+    ], f"Expected result[0]['synonyms'] to equal ['h2o'], but got {result[0]['synonyms']}"  # Only "water" removed
+
+    assert (
+        result[1]["name"] == "water"
+    ), f"Expected result[1]['name'] to equal 'water', but got {result[1]['name']!r}"
+    assert result[1]["context"] == [
+        "ground"
+    ], f"Expected result[1]['context'] to equal ['ground'], but got {result[1]['context']}"
+    assert (
+        result[1]["unit"] == "m3"
+    ), f"Expected result[1]['unit'] to equal 'm3', but got {result[1]['unit']!r}"
+    assert (
+        result[1]["identifier"] == "456"
+    ), f"Expected result[1]['identifier'] to equal '456', but got {result[1]['identifier']!r}"
+    assert result[1]["synonyms"] == [
+        "aqua"
+    ], f"Expected result[1]['synonyms'] to equal ['aqua'], but got {result[1]['synonyms']}"
diff --git a/tests/test_flow.py b/tests/test_flow.py
deleted file mode 100644
index 4e42c96..0000000
--- a/tests/test_flow.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from flowmapper.cas import CASField
-from flowmapper.flow import Flow
-from flowmapper.transformation_mapping import prepare_transformations
-
-
-def test_flow_with_transformations_repr():
-    d = {
-        "name": "Carbon dioxide, in air",
-        "context": ["Raw", "(unspecified)"],
-        "unit": "kg",
-        "cas": "000124-38-9",
-    }
-
-    transformations = prepare_transformations(
-        [
-            {
-                "update": [
-                    {
-                        "source": {
-                            "name": "Carbon dioxide, in air",
-                            "context": ["Raw", "(unspecified)"],
-                        },
-                        "target": {"name": "Carbon dioxide"},
-                    }
-                ]
-            }
-        ]
-    )
-
-    f = Flow(d, transformations=transformations)
-    expected = """Flow object:
-    Identifier: StringField with missing original value
-    Name: StringField: 'Carbon dioxide, in air' -> 'carbon dioxide'
-    Context: ContextField: '['Raw', '(unspecified)']' -> '('raw',)'
-    Unit: UnitField: 'kg' -> 'kg'"""
-
-    assert repr(f) == expected
-
-
-def test_flow_from_sp_categories(transformations):
-    data = {
-        "name": "Carbon dioxide, in air",
-        "context": "resources/in air",
-        "unit": "kg",
-        "CAS number": "000124-38-9",
-    }
-
-    flow = Flow(data, transformations)
-    assert not flow.identifier
-    assert flow.name.original == "Carbon dioxide, in air"
-    assert flow.name.normalized == "carbon dioxide, in air"
-    assert flow.context.original == "resources/in air"
-    assert flow.context.normalized == ("natural resource", "in air")
-
-
-def test_flow_from_sp_missing(transformations):
-    data = {"name": "Chrysotile", "context": "Raw/in ground", "unit": "kg"}
-
-    flow = Flow(data, transformations)
-    assert flow.name.original == "Chrysotile"
-    expected = """Flow object:
-    Identifier: StringField with missing original value
-    Name: StringField: 'Chrysotile' -> 'chrysotile'
-    Context: ContextField: 'Raw/in ground' -> '('natural resource', 'in ground')'
-    Unit: UnitField: 'kg' -> 'kilogram'"""
-    assert repr(flow) == expected
-    assert flow.context.original == "Raw/in ground"
-    assert flow.context.normalized == ("natural resource", "in ground")
-
-
-def test_flow_cas():
-    data = {
-        "name": "Actinium",
-        "CAS number": "007440-34-8",
-        "chemical formula": "Ac\u007f",
-        "synonyms": "Actinium",
-        "unit": "kg",
-        "Class": "Raw materials",
-        "context": "Raw materials",
-        "Description": "",
-    }
-
-    fields = {
-        "identifier": "Flow UUID",
-        "name": "name",
-        "context": "context",
-        "unit": "unit",
-        "CAS number": "CAS No",
-    }
-
-    flow = Flow(data)
-    assert flow.cas == CASField("007440-34-8")
-    assert flow.cas == "7440-34-8"
-
-
-def test_flow_from_ei():
-    data = {
-        "name": "1,3-Dioxolan-2-one",
-        "CAS number": "000096-49-1",
-        "chemical formula": "",
-        "synonyms": "",
-        "unit": "kg",
-        "Class": "chemical",
-        "ExternalReference": "",
-        "Preferred": "",
-        "context": "water/unspecified",
-        "identifier": "5b7d620e-2238-5ec9-888a-6999218b6974",
-        "AltUnit": "",
-        "Var": "",
-        "Second CAS": "96-49-1",
-    }
-    flow = Flow(data)
-    assert flow.identifier == "5b7d620e-2238-5ec9-888a-6999218b6974"
-
-
-def test_flow_with_synonyms(transformations):
-    data = {
-        "identifier": "f0cc0453-32c0-48f5-b8d4-fc87d100b8d9",
-        "CAS number": "000078-79-5",
-        "name": "Isoprene",
-        "unit": "kg",
-        "context": ["air", "low population density, long-term"],
-        "synonyms": [
-            "2-methylbuta-1,3-diene",
-            "methyl bivinyl",
-            "hemiterpene",
-        ],
-    }
-
-    flow = Flow(data, transformations)
-    assert [obj.original for obj in flow.synonyms] == [
-        "2-methylbuta-1,3-diene",
-        "methyl bivinyl",
-        "hemiterpene",
-    ]
diff --git a/tests/test_flowmap.py b/tests/test_flowmap.py
deleted file mode 100644
index da3da92..0000000
--- a/tests/test_flowmap.py
+++ /dev/null
@@ -1,322 +0,0 @@
-import json
-from pathlib import Path
-
-import pandas as pd
-import pytest
-
-from flowmapper import Flow, Flowmap
-from flowmapper.match import match_emissions_with_suffix_ion, match_identical_names
-
-DATA_DIR = Path(__file__).parent / "data"
-
-
-@pytest.fixture
-def source_flows(transformations):
-    return [
-        Flow(flow, transformations) for flow in json.load(open(DATA_DIR / "sp.json"))
-    ]
-
-
-@pytest.fixture
-def target_flows(transformations):
-    return [
-        Flow(flow, transformations)
-        for flow in json.load(open(DATA_DIR / "ei-3.7.json"))
-    ]
-
-
-@pytest.fixture
-def ei39():
-    return [Flow(flow) for flow in json.load(open(DATA_DIR / "ei-3.9.json"))]
-
-
-@pytest.fixture
-def ei310():
-    return [Flow(flow) for flow in json.load(open(DATA_DIR / "ei-3.10.json"))]
-
-
-def test_flowmap_remove_duplicates(source_flows, target_flows):
-    flowmap = Flowmap(source_flows, target_flows)
-    actual = flowmap.source_flows
-    # Added one duplicate on purpose
-    assert len(flowmap.source_flows) == 7
-
-
-def test_flowmap_mappings(source_flows, target_flows):
-    flowmap = Flowmap(source_flows, target_flows)
-    actual = flowmap.mappings[0]
-    assert list(actual.keys()) == [
-        "from",
-        "to",
-        "conversion_factor",
-        "match_rule",
-        "match_rule_priority",
-        "info",
-    ]
-    assert actual["match_rule"] == "match_identical_names"
-
-
-def test_flowmap_to_randonneur(source_flows, target_flows):
-    flowmap = Flowmap(source_flows, target_flows)
-    actual = flowmap.to_randonneur()
-    expected = [
-        {
-            "comment": "Identical names",
-            "conversion_factor": 1.0,
-            "source": {
-                "CAS number": "110-63-4",
-                "context": "air",
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-            },
-            "target": {
-                "CAS number": "110-63-4",
-                "context": ["air", "unspecified"],
-                "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-            },
-        },
-        {
-            "comment": "Name matching with location code",
-            "conversion_factor": 1.0,
-            "source": {"context": "air/low. pop.", "name": "Ammonia, FR", "unit": "kg"},
-            "target": {
-                "CAS number": "7664-41-7",
-                "context": ["air", "non-urban air or from high stacks"],
-                "identifier": "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-                "location": "FR",
-                "name": "Ammonia",
-                "unit": "kg",
-            },
-        },
-    ]
-    assert actual == expected
-
-
-def test_flowmap_to_randonneur_export(source_flows, target_flows, tmp_path):
-    flowmap = Flowmap(source_flows, target_flows)
-    flowmap.to_randonneur(tmp_path / "randonneur.json")
-    with open(tmp_path / "randonneur.json", "r") as fs:
-        actual = json.load(fs)
-    expected = [
-        {
-            "comment": "Identical names",
-            "conversion_factor": 1.0,
-            "source": {
-                "CAS number": "110-63-4",
-                "context": "air",
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-            },
-            "target": {
-                "CAS number": "110-63-4",
-                "context": ["air", "unspecified"],
-                "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-            },
-        },
-        {
-            "comment": "Name matching with location code",
-            "conversion_factor": 1.0,
-            "source": {"context": "air/low. pop.", "name": "Ammonia, FR", "unit": "kg"},
-            "target": {
-                "CAS number": "7664-41-7",
-                "context": ["air", "non-urban air or from high stacks"],
-                "identifier": "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-                "location": "FR",
-                "name": "Ammonia",
-                "unit": "kg",
-            },
-        },
-    ]
-    assert actual == expected
-
-
-def test_flowmap_with_custom_rules_no_match(source_flows, target_flows):
-    flowmap = Flowmap(
-        source_flows,
-        target_flows,
-        rules=[match_emissions_with_suffix_ion],
-    )
-    actual = flowmap.mappings
-    assert actual == []
-
-
-def test_flowmap_with_custom_rules_match(source_flows, target_flows):
-    flowmap = Flowmap(source_flows, target_flows, rules=[match_identical_names])
-    actual = flowmap.to_randonneur()
-    expected = [
-        {
-            "comment": "Identical names",
-            "conversion_factor": 1.0,
-            "source": {
-                "CAS number": "110-63-4",
-                "context": "air",
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-            },
-            "target": {
-                "CAS number": "110-63-4",
-                "context": [
-                    "air",
-                    "unspecified",
-                ],
-                "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-            },
-        }
-    ]
-    assert actual == expected
-
-
-def test_flowmap_to_glad(source_flows, target_flows):
-    flowmap = Flowmap(source_flows, target_flows)
-    actual = flowmap.to_glad()
-    expected = {
-        "SourceFlowName": ["1,4-Butanediol", "Ammonia, FR"],
-        "SourceFlowUUID": ["", ""],
-        "SourceFlowContext": ["air", "air/low. pop."],
-        "SourceUnit": ["kg", "kg"],
-        "MatchCondition": ["=", "="],
-        "ConversionFactor": [1.0, 1.0],
-        "TargetFlowName": ["1,4-Butanediol", "Ammonia"],
-        "TargetFlowUUID": [
-            "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-            "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-        ],
-        "TargetFlowContext": [
-            "air✂️unspecified",
-            "air✂️non-urban air or from high stacks",
-        ],
-        "TargetUnit": ["kg", "kg"],
-        "MemoMapper": ["Identical names", "Name matching with location code"],
-    }
-    pd.testing.assert_frame_equal(actual, pd.DataFrame(expected))
-
-
-def test_flowmap_to_glad_export(source_flows, target_flows, tmp_path):
-    flowmap = Flowmap(source_flows, target_flows)
-    flowmap.to_glad(tmp_path / "glad.xlsx")
-    actual = pd.read_excel(tmp_path / "glad.xlsx")
-    print(actual["MatchCondition"])
-    expected = {
-        "SourceFlowName": ["1,4-Butanediol", "Ammonia, FR"],
-        "SourceFlowUUID": [float("NaN"), float("NaN")],
-        "SourceFlowContext": ["air", "air/low. pop."],
-        "SourceUnit": ["kg", "kg"],
-        "MatchCondition": ["=", "="],
-        "ConversionFactor": [1, 1],
-        "TargetFlowName": ["1,4-Butanediol", "Ammonia"],
-        "TargetFlowUUID": [
-            "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-            "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-        ],
-        "TargetFlowContext": [
-            "air✂️unspecified",
-            "air✂️non-urban air or from high stacks",
-        ],
-        "TargetUnit": ["kg", "kg"],
-        "MemoMapper": ["Identical names", "Name matching with location code"],
-    }
-    pd.testing.assert_frame_equal(actual, pd.DataFrame(expected))
-
-
-def test_flowmap_nomatch_rule(source_flows, target_flows):
-    nomatch = lambda flow: flow.context == "air/urban air close to ground"
-    flowmap = Flowmap(source_flows, target_flows, nomatch_rules=[nomatch])
-
-    assert len(flowmap.source_flows_nomatch) == 1
-    assert flowmap.source_flows_nomatch[0].name == "1,4-Butanediol"
-    assert flowmap.source_flows_nomatch[0].context == "air/urban air close to ground"
-    assert flowmap.source_flows[0].name == "1,4-Butanediol"
-    assert flowmap.source_flows[0].context == "air"
-
-
-def test_flowmap_nomatch_rule_false(source_flows, target_flows):
-    nomatch = lambda flow: flow.context == "water"
-    flowmap = Flowmap(source_flows, target_flows, nomatch_rules=[nomatch])
-    assert not flowmap.source_flows_nomatch
-
-
-def test_flowmap_nomatch_multiple_rules(source_flows, target_flows):
-    nomatch1 = lambda flow: flow.context == "air/urban air close to ground"
-    nomatch2 = lambda flow: flow.context == "air"
-    flowmap = Flowmap(source_flows, target_flows, nomatch_rules=[nomatch1, nomatch2])
-
-    assert len(flowmap.source_flows_nomatch) == 2
-    assert flowmap.source_flows_nomatch[0].name == "1,4-Butanediol"
-    assert flowmap.source_flows_nomatch[1].name == "1,4-Butanediol"
-    assert flowmap.source_flows[0].name == "Cesium-134"
-
-
-def test_flowmap_mappings_ei_ei(target_flows):
-    flowmap = Flowmap(target_flows, target_flows)
-    actual = flowmap.to_randonneur()
-    expected = [
-        {
-            "source": {
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-                "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-                "context": ["air", "unspecified"],
-                "CAS number": "110-63-4",
-            },
-            "target": {
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-                "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-                "context": ["air", "unspecified"],
-                "CAS number": "110-63-4",
-            },
-            "conversion_factor": 1.0,
-            "comment": "Identical identifier",
-        },
-        {
-            "source": {
-                "name": "Ammonia",
-                "unit": "kg",
-                "identifier": "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-                "context": ["air", "non-urban air or from high stacks"],
-                "CAS number": "7664-41-7",
-            },
-            "target": {
-                "name": "Ammonia",
-                "unit": "kg",
-                "identifier": "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-                "context": ["air", "non-urban air or from high stacks"],
-                "CAS number": "7664-41-7",
-            },
-            "conversion_factor": 1.0,
-            "comment": "Identical identifier",
-        },
-    ]
-    assert actual == expected
-
-
-def test_flowmap_mappings_ei39_ei310(ei39, ei310):
-    flowmap = Flowmap(ei39, ei310)
-    actual = flowmap.to_randonneur()
-    expected = [
-        {
-            "source": {
-                "name": "2,4-D amines",
-                "unit": "kg",
-                "identifier": "4f777e05-70f9-4a18-a406-d8232325073f",
-                "context": ["air", "non-urban air or from high stacks"],
-                "CAS number": "2008-39-1",
-            },
-            "target": {
-                "name": "2,4-D dimethylamine salt",
-                "unit": "kg",
-                "identifier": "b6b4201e-0561-5992-912f-e729fbf04e41",
-                "context": ["air", "non-urban air or from high stacks"],
-                "CAS number": "2008-39-1",
-            },
-            "conversion_factor": 1.0,
-            "comment": "Identical CAS numbers",
-        }
-    ]
-    assert actual == expected
diff --git a/tests/test_format_match_result.py b/tests/test_format_match_result.py
deleted file mode 100644
index 535943f..0000000
--- a/tests/test_format_match_result.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from deepdiff import DeepDiff
-
-from flowmapper.flow import Flow
-from flowmapper.match import format_match_result
-
-
-def test_format_match_result_missing_id(transformations):
-    source = {
-        "name": "Carbon dioxide, in air",
-        "context": "Raw materials",
-        "unit": "kg",
-    }
-    s = Flow(source, transformations)
-
-    target = {
-        "identifier": "cc6a1abb-b123-4ca6-8f16-38209df609be",
-        "name": "Carbon dioxide, in air",
-        "context": "natural resource/in air",
-        "unit": "kg",
-    }
-    t = Flow(target)
-
-    actual = format_match_result(s, t, 1.0, {"is_match": True, "comment": "foo"})
-    expected = {
-        "source": {
-            "name": "Carbon dioxide, in air",
-            "context": "Raw materials",
-            "unit": "kg",
-        },
-        "target": {
-            "identifier": "cc6a1abb-b123-4ca6-8f16-38209df609be",
-            "name": "Carbon dioxide, in air",
-            "context": "natural resource/in air",
-            "unit": "kg",
-        },
-        "conversion_factor": 1.0,
-        "comment": "foo",
-    }
-
-    assert not DeepDiff(actual, expected)
diff --git a/tests/test_get_conversion_factor.py b/tests/test_get_conversion_factor.py
deleted file mode 100644
index 862f917..0000000
--- a/tests/test_get_conversion_factor.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import math
-
-from flowmapper.flow import Flow
-
-
-def test_get_conversion_factor(transformations):
-    s = Flow(
-        {
-            "name": "Protactinium-234",
-            "unit": "Bq",
-            "context": ["Emissions to air", "low. pop."],
-        },
-        transformations,
-    )
-
-    t = Flow(
-        {
-            "identifier": "fb13070e-06f1-4964-832f-a23945b880cc",
-            "name": "Protactinium-234",
-            "unit": "kBq",
-            "context": ["air", "non-urban air or from high stacks"],
-        },
-        transformations,
-    )
-
-    actual = s.unit.conversion_factor(t.unit)
-    expected = 1e-3
-    assert actual == expected
-
-
-def test_get_conversion_factor_water(transformations):
-    s = Flow(
-        {"name": "Water", "unit": "kg", "context": ["Emissions to water", ""]},
-        transformations,
-    )
-
-    t = Flow(
-        {
-            "identifier": "2404b41a-2eed-4e9d-8ab6-783946fdf5d6",
-            "CAS number": "007732-18-5",
-            "name": "Water",
-            "unit": "m3",
-            "context": ["water", "unspecified"],
-        },
-        transformations,
-    )
-
-    actual = s.unit.conversion_factor(t.unit)
-    assert math.isnan(actual)
-
-
-def test_get_conversion_factor_m3y(transformations):
-    s = Flow(
-        {
-            "name": "Volume occupied, reservoir",
-            "unit": "m3y",
-            "context": ["Resources", "in water"],
-        },
-        transformations,
-    )
-
-    t = Flow(
-        {
-            "identifier": "9a9d71c7-79f7-42d0-af47-282d22a7cf07",
-            "name": "Volume occupied, reservoir",
-            "unit": "m3*year",
-            "context": ["natural resource", "in water"],
-        },
-        transformations,
-    )
-
-    actual = s.unit.conversion_factor(t.unit)
-    expected = 1
-    assert actual == expected
-
-
-def test_get_conversion_factor_m2a(transformations):
-    s = Flow(
-        {
-            "name": "Occupation, annual crop",
-            "unit": "m2a",
-            "context": ["Resources", "land"],
-        },
-        transformations,
-    )
-
-    t = Flow(
-        {
-            "identifier": "c5aafa60-495c-461c-a1d4-b262a34c45b9",
-            "name": "Occupation, annual crop",
-            "unit": "m2*year",
-            "context": ["natural resource", "land"],
-        },
-        transformations,
-    )
-
-    actual = s.unit.conversion_factor(t.unit)
-    expected = 1
-    assert actual == expected
-
-
-def test_get_conversion_factor_nan(transformations):
-    s = Flow(
-        {
-            "name": "Radium-226/kg",
-            "unit": "kg",
-            "context": ["Emissions to water", ""],
-        },
-        transformations,
-    )
-
-    t = Flow(
-        {
-            "identifier": "74a0aabb-e11b-4f3b-8921-45e447b33393",
-            "CAS number": "013982-63-3",
-            "name": "Radium-226",
-            "unit": "kBq",
-            "context": ["water", "ocean"],
-        },
-        transformations,
-    )
-
-    actual = s.unit.conversion_factor(t.unit)
-    assert math.isnan(actual)
diff --git a/tests/test_id_generation.py b/tests/test_id_generation.py
deleted file mode 100644
index 031aac7..0000000
--- a/tests/test_id_generation.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from flowmapper.utils import generate_flow_id
-
-
-def test_generate_flow_id():
-    flow1 = {
-        "name": "1,4-Butanediol",
-        "context": ["Air", "(unspecified)"],
-        "unit": "kg",
-        "CAS number": "000110-63-4",
-    }
-    assert generate_flow_id(flow1) == "77bb0c932afd7d7eb7ada382c8828b9f"
diff --git a/tests/test_match_biogenic_to_non_fossil.py b/tests/test_match_biogenic_to_non_fossil.py
deleted file mode 100644
index f0573c4..0000000
--- a/tests/test_match_biogenic_to_non_fossil.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from flowmapper.flow import Flow
-from flowmapper.match import match_biogenic_to_non_fossil
-
-
-def test_match_biogenic_to_non_fossil():
-    s = Flow({"name": "Oils, biogenic", "context": "air", "unit": "kg"})
-    t = Flow({"name": "Oils, non-fossil", "context": "air", "unit": "kg"})
-
-    actual = match_biogenic_to_non_fossil(s, t)
-    expected = {"comment": "Biogenic to non-fossil if no better match"}
-    assert actual == expected
diff --git a/tests/test_match_custom_names_with_location_codes.py b/tests/test_match_custom_names_with_location_codes.py
deleted file mode 100644
index aa18668..0000000
--- a/tests/test_match_custom_names_with_location_codes.py
+++ /dev/null
@@ -1,87 +0,0 @@
-from flowmapper.flow import Flow
-from flowmapper.match import match_custom_names_with_location_codes
-
-
-def test_match_custom_names_with_location_codes_extra():
-    s = Flow(
-        {
-            "name": "Water (ersatz), net cons., irrigation, HU",
-            "context": "air",
-            "unit": "kg",
-        }
-    )
-    t = Flow(
-        {"name": "water, unspecified natural origin", "context": "air", "unit": "kg"}
-    )
-
-    actual = match_custom_names_with_location_codes(s, t)
-    expected = {
-        "comment": "Custom names with location code",
-        "location": "HU",
-        "irrigation": True,
-    }
-    assert actual == expected
-
-
-def test_match_custom_names_with_location_codes_no_extra():
-    s = Flow({"name": "Water, well, HU", "context": "air", "unit": "kg"})
-    t = Flow({"name": "Water, well, in ground", "context": "air", "unit": "kg"})
-
-    actual = match_custom_names_with_location_codes(s, t)
-    expected = {"comment": "Custom names with location code", "location": "HU"}
-    assert actual == expected
-
-
-def test_match_custom_names_with_location_codes_extra_whitespace_complicated():
-    s = Flow(
-        {
-            "name": "Water (ersatz), net cons., irrigation,  \t RER w/o DE+NL+NO",
-            "context": "air",
-            "unit": "kg",
-        }
-    )
-    t = Flow(
-        {"name": "water, unspecified natural origin", "context": "air", "unit": "kg"}
-    )
-
-    actual = match_custom_names_with_location_codes(s, t)
-    expected = {
-        "comment": "Custom names with location code",
-        "location": "RER w/o DE+NL+NO",
-        "irrigation": True,
-    }
-    assert actual == expected
-
-
-def test_match_custom_names_with_location_codes_no_match():
-    s = Flow({"name": "Ersatz water, RER w/o DE+NL+NO", "context": "air", "unit": "kg"})
-    t = Flow(
-        {"name": "water, unspecified natural origin", "context": "air", "unit": "kg"}
-    )
-    assert match_custom_names_with_location_codes(s, t) is None
-
-
-def test_match_custom_names_with_location_codes_conversion():
-    s = Flow({"name": "Water, well, HU", "context": "air", "unit": "kilogram"})
-    t = Flow(
-        {"name": "Water, well, in ground", "context": "air", "unit": "cubic_meter"}
-    )
-
-    actual = match_custom_names_with_location_codes(s, t)
-    expected = {
-        "comment": "Custom names with location code",
-        "location": "HU",
-        "conversion_factor": 0.001,
-    }
-    assert actual == expected
-
-    s = Flow({"name": "Water, well, HU", "context": "air", "unit": "cubic_meter"})
-    t = Flow({"name": "Water, well, in ground", "context": "air", "unit": "kilogram"})
-
-    actual = match_custom_names_with_location_codes(s, t)
-    expected = {
-        "comment": "Custom names with location code",
-        "location": "HU",
-        "conversion_factor": 1000.0,
-    }
-    assert actual == expected
diff --git a/tests/test_match_identical_cas_numbers.py b/tests/test_match_identical_cas_numbers.py
deleted file mode 100644
index 018baac..0000000
--- a/tests/test_match_identical_cas_numbers.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from deepdiff import DeepDiff
-
-from flowmapper.flow import Flow
-from flowmapper.match import match_identical_cas_numbers
-
-
-def test_match_identical_cas_numbers(transformations):
-    source = {
-        "name": "1-Propanol",
-        "CAS number": "000071-23-8",
-        "checmical formula": "",
-        "Synonyms": "1-Propanol",
-        "unit": "kg",
-        "Class": "Waterborne emissions",
-        "context": "Emissions to water/groundwater",
-        "Flow UUID": "8C31919B-2D42-4CAD-A10E-8084CCD6BE99",
-        "Description": "Formula: C3H8O\u007f",
-    }
-
-    target = {
-        "name": "Propanol",
-        "CAS number": "000071-23-8",
-        "checmical formula": "",
-        "Synonyms": "propan-1-ol, 1-propanol, propyl alcohol, n-propanol, n-propyl alcohol",
-        "unit": "kg",
-        "Class": "chemical",
-        "ExternalReference": "",
-        "Preferred": "",
-        "context": "water/ground-",
-        "identifier": "85500204-9d88-40ae-9f0b-3ceba0e7a74f",
-        "AltUnit": "",
-        "Var": "",
-        "Second CAS": "71-31-8; 19986-23-3; 71-23-8; 64118-40-7; 4712-36-1; 142583-61-7; 71-23-8",
-    }
-
-    s = Flow(source, transformations)
-    t = Flow(target, transformations)
-
-    assert match_identical_cas_numbers(s, t)
-
-
-def test_match_missing_cas_numbers(transformations):
-    source = {
-        "name": "1-Propanol",
-        "CAS number": "",
-        "checmical formula": "",
-        "synonyms": "1-Propanol",
-        "unit": "kg",
-        "Class": "Waterborne emissions",
-        "context": "Emissions to water/groundwater",
-        "identifier": "8C31919B-2D42-4CAD-A10E-8084CCD6BE99",
-        "Description": "Formula: C3H8O\u007f",
-    }
-
-    target = {
-        "name": "Propanol",
-        "CAS number": "",
-        "checmical formula": "",
-        "synonyms": "propan-1-ol, 1-propanol, propyl alcohol, n-propanol, n-propyl alcohol",
-        "unit": "kg",
-        "Class": "chemical",
-        "ExternalReference": "",
-        "Preferred": "",
-        "context": "water/ground-",
-        "identifier": "85500204-9d88-40ae-9f0b-3ceba0e7a74f",
-        "AltUnit": "",
-        "Var": "",
-        "Second CAS": "71-31-8; 19986-23-3; 71-23-8; 64118-40-7; 4712-36-1; 142583-61-7; 71-23-8",
-    }
-
-    s = Flow(source, transformations)
-    t = Flow(target, transformations)
-
-    assert not match_identical_cas_numbers(s, t)
diff --git a/tests/test_match_identical_names.py b/tests/test_match_identical_names.py
deleted file mode 100644
index af712e0..0000000
--- a/tests/test_match_identical_names.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from deepdiff import DeepDiff
-
-from flowmapper.flow import Flow
-from flowmapper.match import match_identical_names
-
-
-def test_match_identical_names(transformations):
-    source = {
-        "name": "Carbon dioxide, in air",
-        "CAS No": "000124-38-9",
-        "unit": "kg",
-        "context": "Resources/in air",
-        "Flow UUID": "32722990-B7D8-44A8-BC7D-EC3A89F533FF",
-    }
-
-    target = {
-        "name": "Carbon dioxide, in air",
-        "CAS number": "000124-38-9",
-        "unit": "kg",
-        "context": "natural resource/in air",
-        "identifier": "cc6a1abb-b123-4ca6-8f16-38209df609be",
-    }
-
-    s = Flow(source, transformations)
-    t = Flow(target, transformations)
-
-    match = match_identical_names(s, t)
-    assert match
-
-
-def test_match_identical_names_jsonpath(transformations):
-    source = {
-        "name": "Carbon dioxide, in air",
-        "context": ["Raw", "(unspecified)"],
-        "unit": "kg",
-        "CAS": "000124-38-9",
-    }
-
-    target = {
-        "identifier": "cc6a1abb-b123-4ca6-8f16-38209df609be",
-        "CAS number": "000124-38-9",
-        "name": "Carbon dioxide, in air",
-        "unit": "kg",
-        "context": ["natural resource", "in air"],
-    }
-
-    s = Flow(source, transformations)
-    t = Flow(target, transformations)
-
-    match = match_identical_names(s, t)
-    assert not match
diff --git a/tests/test_match_identical_names_except_missing_suffix.py b/tests/test_match_identical_names_except_missing_suffix.py
deleted file mode 100644
index 0fe7f89..0000000
--- a/tests/test_match_identical_names_except_missing_suffix.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from flowmapper.flow import Flow
-from flowmapper.match import match_identical_names_except_missing_suffix
-
-
-def test_match_identical_names_except_missing_suffix(transformations):
-    source = {
-        "name": "Copper",
-        "CAS number": "007440-50-8",
-        "unit": "kg",
-        "context": "Emissions to water/groundwater",
-        "identifier": "F277F190-A8A4-4A2D-AAF6-F6CB3772A545",
-    }
-    target = {
-        "name": "Copper, ion",
-        "CAS number": "017493-86-6",
-        "unit": "kg",
-        "context": "water/ground-",
-        "identifier": "c3b659e5-35f1-408c-8cb5-b5f9b295c76e",
-    }
-
-    s = Flow(source, transformations)
-    t = Flow(target, transformations)
-
-    assert match_identical_names_except_missing_suffix(s, t, suffix="ion")
-
-
-def test_match_identical_names_except_missing_suffix_different_order(transformations):
-    s = Flow(
-        {"name": "Iron, ion", "unit": "g", "context": ["Emissions to air", ""]},
-        transformations,
-    )
-    t = Flow(
-        {
-            "identifier": "8dba66e2-0f2e-4038-84ef-1e40b4f573a6",
-            "CAS number": "007439-89-6",
-            "name": "Iron",
-            "unit": "kg",
-            "context": ["air", "unspecified"],
-        },
-        transformations,
-    )
-
-    assert match_identical_names_except_missing_suffix(s, t, suffix="ion")
diff --git a/tests/test_match_identical_names_in_synonyms.py b/tests/test_match_identical_names_in_synonyms.py
deleted file mode 100644
index 12adefa..0000000
--- a/tests/test_match_identical_names_in_synonyms.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from flowmapper.flow import Flow
-from flowmapper.match import match_identical_names_in_synonyms
-
-
-def test_match_identical_names_in_synonyms(transformations):
-    source = {
-        "name": "Sulfuric acid",
-        "unit": "kg",
-        "context": ["Emissions to water", ""],
-    }
-
-    target = {
-        "identifier": "8570c45a-8c78-4709-9b8f-fb88314d9e9d",
-        "chemical formula": "H8N2O4S",
-        "CAS number": "007783-20-2",
-        "name": "Ammonium sulfate",
-        "unit": "kg",
-        "context": ["water", "unspecified"],
-        "synonyms": [
-            "Diammonium sulfate",
-            "Mascagnite",
-            "Sulfuric acid",
-            "Actamaster",
-            "Diammonium salt",
-            "Dolamin",
-        ],
-    }
-
-    s = Flow(source, transformations)
-    t = Flow(target, transformations)
-
-    assert match_identical_names_in_synonyms(s, t)
diff --git a/tests/test_match_names_with_country_codes.py b/tests/test_match_names_with_country_codes.py
deleted file mode 100644
index 0525067..0000000
--- a/tests/test_match_names_with_country_codes.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from flowmapper.flow import Flow
-from flowmapper.match import match_names_with_location_codes
-
-
-def test_match_names_with_country_codes():
-    s = Flow({"name": "Ammonia, NL", "context": "air", "unit": "kg"})
-    t = Flow({"name": "Ammonia", "context": "air", "unit": "kg"})
-
-    actual = match_names_with_location_codes(s, t)
-    expected = {"comment": "Name matching with location code", "location": "NL"}
-    assert actual == expected
-
-
-def test_match_names_with_country_codes_extra_whitespace():
-    s = Flow({"name": "Ammonia,  \tNL", "context": "air", "unit": "kg"})
-    t = Flow({"name": "Ammonia", "context": "air", "unit": "kg"})
-
-    actual = match_names_with_location_codes(s, t)
-    expected = {"comment": "Name matching with location code", "location": "NL"}
-    assert actual == expected
-
-
-def test_match_names_with_country_codes_no_match():
-    s = Flow({"name": "Ammonia-NL", "context": "air", "unit": "kg"})
-    t = Flow({"name": "Ammonia", "context": "air", "unit": "kg"})
-    assert match_names_with_location_codes(s, t) is None
-
-
-def test_match_names_with_country_codes_complicated_location():
-    s = Flow({"name": "Ammonia, RER w/o DE+NL+NO", "context": "air", "unit": "kg"})
-    t = Flow({"name": "Ammonia", "context": "air", "unit": "kg"})
-
-    actual = match_names_with_location_codes(s, t)
-    expected = {
-        "comment": "Name matching with location code",
-        "location": "RER w/o DE+NL+NO",
-    }
-    assert actual == expected
-
-
-def test_match_names_with_country_codes_water_source_conversion():
-    s = Flow({"name": "Water, NL", "context": "air", "unit": "kilogram"})
-    t = Flow({"name": "Water", "context": "air", "unit": "cubic_meter"})
-
-    actual = match_names_with_location_codes(s, t)
-    expected = {
-        "comment": "Name matching with location code",
-        "location": "NL",
-        "conversion_factor": 0.001,
-    }
-    assert actual == expected
-
-
-def test_match_names_with_country_codes_water_target_conversion():
-    s = Flow({"name": "Water, NL", "context": "air", "unit": "cubic_meter"})
-    t = Flow({"name": "Water", "context": "air", "unit": "kilogram"})
-
-    actual = match_names_with_location_codes(s, t)
-    expected = {
-        "comment": "Name matching with location code",
-        "location": "NL",
-        "conversion_factor": 1000.0,
-    }
-    assert actual == expected
diff --git a/tests/test_match_non_ionic_state.py b/tests/test_match_non_ionic_state.py
deleted file mode 100644
index 3b469cf..0000000
--- a/tests/test_match_non_ionic_state.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from flowmapper.flow import Flow
-from flowmapper.flowmap import Flowmap
-
-
-def test_match_non_ionic_state():
-    s = [
-        Flow({"name": "Mercury (II)", "context": "air", "unit": "kg"}),
-        Flow({"name": "Manganese (II)", "context": "air", "unit": "kg"}),
-    ]
-    t = [
-        Flow({"name": "Mercury", "context": "air", "unit": "kg", "identifier": "foo"}),
-        Flow(
-            {
-                "name": "Manganese II",
-                "context": "air",
-                "unit": "kg",
-                "identifier": "bar",
-            }
-        ),
-    ]
-
-    flowmap = Flowmap(s, t)
-    actual = flowmap.to_randonneur()
-    expected = [
-        {
-            "source": {"name": "Manganese (II)", "context": "air", "unit": "kg"},
-            "target": {
-                "identifier": "bar",
-                "name": "Manganese II",
-                "context": "air",
-                "unit": "kg",
-            },
-            "conversion_factor": 1.0,
-            "comment": "With/without roman numerals in parentheses",
-        },
-        {
-            "source": {"name": "Mercury (II)", "context": "air", "unit": "kg"},
-            "target": {
-                "identifier": "foo",
-                "name": "Mercury",
-                "context": "air",
-                "unit": "kg",
-            },
-            "conversion_factor": 1.0,
-            "comment": "Non-ionic state if no better match",
-        },
-    ]
-    assert actual == expected
diff --git a/tests/test_normalize_str.py b/tests/test_normalize_str.py
index d0b058f..6ce5cad 100644
--- a/tests/test_normalize_str.py
+++ b/tests/test_normalize_str.py
@@ -11,4 +11,6 @@ def test_normalize_str():
         " \u00dcber",
         None,
     ]
-    assert {normalize_str(name) for name in names} == {"über", "Über", ""}
+    actual = {normalize_str(name) for name in names}
+    expected = {"über", "Über", ""}
+    assert actual == expected, f"Expected actual to equal {expected}, but got {actual}"
diff --git a/tests/test_preferred_synonyms.py b/tests/test_preferred_synonyms.py
deleted file mode 100644
index c6a6e33..0000000
--- a/tests/test_preferred_synonyms.py
+++ /dev/null
@@ -1,392 +0,0 @@
-import pytest
-
-from flowmapper.flow import Flow
-from flowmapper.preferred_synonyms import (
-    has_number_pattern_at_end,
-    has_roman_numeral_at_end,
-    match_identical_names_in_preferred_synonyms,
-)
-
-
-@pytest.mark.parametrize(
-    "text",
-    [
-        "Chapter I",
-        "Section V",
-        "Appendix XXI",
-        "Book III",
-        "Part IV",
-        "Chapter VI",
-        "Section VII",
-        "Appendix VIII",
-        "Appendix VIII+",
-        "Appendix VIII-",
-        "Appendix viii",
-        "Book IX",
-        "Part X",
-        "Chapter XI",
-        "Section XV",
-        "Appendix XX",
-        "Book XXX",
-        "Chapter II ",  # Trailing space
-        "  Chapter III  ",  # Leading and trailing spaces
-        "Chapter (I)",  # With parentheses
-        "Section (V+)",  # With parentheses and plus
-        "Book (III-)",  # With parentheses and minus
-    ],
-)
-def test_roman_numerals_should_match(text):
-    """Test that valid roman numerals at the end of strings are detected."""
-    assert has_roman_numeral_at_end(text)
-
-
-@pytest.mark.parametrize(
-    "text",
-    [
-        "Chapter 1",
-        "Appendix VIII-+",
-        "Section A",
-        "Part XL",
-        "Chapter L",
-        "Appendix C",
-        "Chapter DC",
-        "Section M",
-        "Part MMMCMXCIX",  # 3999
-        "I am at the beginning",
-        "This ends with I but not roman",
-        "",
-        "   ",
-        "Chapter",
-    ],
-)
-def test_non_roman_numerals_should_not_match(text):
-    """Test that invalid or non-roman numerals are not detected."""
-    assert not has_roman_numeral_at_end(text)
-
-
-@pytest.mark.parametrize(
-    "text",
-    [
-        "Substance (1+)",
-        "Compound (2-)",
-        "Element (3)",
-        "Chemical (5+)",
-        "Material (7-)",
-        "Substance (9)",
-        "Element (11)",  # Multi-digit numbers are allowed
-        "Substance (1+) ",  # Trailing space
-        "  Compound (2-)  ",  # Leading and trailing spaces
-        "Element (123+)",  # Multiple digits with plus
-        "Compound (456-)",  # Multiple digits with minus
-    ],
-)
-def test_number_patterns_should_match(text):
-    """Test that valid number patterns at the end of strings are detected."""
-    assert has_number_pattern_at_end(text)
-
-
-@pytest.mark.parametrize(
-    "text",
-    [
-        "Chemical",
-        "Substance 1+",  # Missing parentheses
-        "Molecule (1+2)",
-        "Compound (0)",
-        "Chemical ()",  # Empty parentheses
-        "Material (+)",  # Just plus sign
-        "Substance (-)",  # Just minus sign
-        "Element (10)",
-        "Substance 1-",  # Missing parentheses
-        "Chemical (5+-)",
-        "Substance 1-+",  # Missing parentheses
-        "Molecule (1+2",  # Missing closing parenthesis
-        "Element 1+2)",  # Missing opening parenthesis
-        "Compound (1+2",  # Missing closing parenthesis
-        "",
-        "   ",
-        "Substance (1+2) extra",  # Text after pattern
-        "(1+) Substance",  # Pattern not at end
-    ],
-)
-def test_invalid_patterns_should_not_match(text):
-    """Test that invalid patterns are not detected."""
-    assert not has_number_pattern_at_end(text)
-
-
-def test_match_when_target_has_source_name_in_synonyms_with_roman_numeral():
-    """Test matching when target has source name in synonyms and target name ends with roman numeral."""
-    source_data = {
-        "name": "water",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["h2o"],
-    }
-    target_data = {
-        "name": "water I",  # Ends with roman numeral
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["water", "aqua"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result == {"comment": "Identical preferred synonyms"}
-
-
-def test_match_when_target_has_source_name_in_synonyms_with_number_pattern():
-    """Test matching when target has source name in synonyms and target name ends with number pattern."""
-    source_data = {
-        "name": "carbon",
-        "context": ["air"],
-        "unit": "kg",
-        "synonyms": ["co2"],
-    }
-    target_data = {
-        "name": "carbon (2+)",  # Ends with number pattern
-        "context": ["air"],
-        "unit": "kg",
-        "synonyms": ["carbon", "c"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result == {"comment": "Identical preferred synonyms"}
-
-
-def test_match_when_source_has_target_name_in_synonyms_with_roman_numeral():
-    """Test matching when source has target name in synonyms and source name ends with roman numeral."""
-    source_data = {
-        "name": "nitrogen II",  # Ends with roman numeral
-        "context": ["air"],
-        "unit": "kg",
-        "synonyms": ["nitrogen", "n2"],
-    }
-    target_data = {
-        "name": "nitrogen",
-        "context": ["air"],
-        "unit": "kg",
-        "synonyms": ["n2"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result == {"comment": "Identical preferred synonyms"}
-
-
-def test_match_when_source_has_target_name_in_synonyms_with_number_pattern():
-    """Test matching when source has target name in synonyms and source name ends with number pattern."""
-    source_data = {
-        "name": "oxygen (1-)",  # Ends with number pattern
-        "context": ["air"],
-        "unit": "kg",
-        "synonyms": ["oxygen", "o2"],
-    }
-    target_data = {
-        "name": "oxygen",
-        "context": ["air"],
-        "unit": "kg",
-        "synonyms": ["n2"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result == {"comment": "Identical preferred synonyms"}
-
-
-def test_no_match_when_different_contexts():
-    """Test that no match occurs when contexts are different."""
-    source_data = {
-        "name": "water",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["h2o"],
-    }
-    target_data = {
-        "name": "water I",
-        "context": ["air"],  # Different context
-        "unit": "kg",
-        "synonyms": ["water", "aqua"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result is None
-
-
-def test_no_match_when_name_not_in_synonyms():
-    """Test that no match occurs when name is not in synonyms."""
-    source_data = {
-        "name": "water",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["h2o"],
-    }
-    target_data = {
-        "name": "water I",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["aqua", "liquid"],  # "water" not in synonyms
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result is None
-
-
-def test_no_match_when_no_roman_numeral_or_number_pattern():
-    """Test that no match occurs when name doesn't end with roman numeral or number pattern."""
-    source_data = {
-        "name": "water",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["h2o"],
-    }
-    target_data = {
-        "name": "water",  # No roman numeral or number pattern
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["water", "aqua"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result is None
-
-
-def test_no_match_when_name_not_contained_in_other_name():
-    """Test that no match occurs when one name is not contained in the other."""
-    source_data = {
-        "name": "water",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["h2o"],
-    }
-    target_data = {
-        "name": "different I",  # "water" not contained in "different_water I"
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["water", "aqua"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result is None
-
-
-def test_no_match_when_no_synonyms():
-    """Test that no match occurs when flows have no synonyms."""
-    source_data = {
-        "name": "water",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": [],  # No synonyms
-    }
-    target_data = {
-        "name": "water I",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": [],  # No synonyms
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result is None
-
-
-def test_custom_comment():
-    """Test that custom comment is returned when provided."""
-    source_data = {
-        "name": "water",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["h2o"],
-    }
-    target_data = {
-        "name": "water I",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["water", "aqua"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    custom_comment = "Custom match comment"
-    result = match_identical_names_in_preferred_synonyms(
-        source, target, custom_comment
-    )
-
-    assert result == {"comment": custom_comment}
-
-
-def test_match_with_roman_numeral_and_plus_minus():
-    """Test matching with roman numerals that have + or - signs."""
-    source_data = {
-        "name": "iron",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["fe"],
-    }
-    target_data = {
-        "name": "iron II+",  # Roman numeral with plus
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["iron", "fe"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result == {"comment": "Identical preferred synonyms"}
-
-
-def test_match_with_number_pattern_and_plus_minus():
-    """Test matching with number patterns that have + or - signs."""
-    source_data = {
-        "name": "sodium",
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["na"],
-    }
-    target_data = {
-        "name": "sodium (1+)",  # Number pattern with plus
-        "context": ["ground"],
-        "unit": "kg",
-        "synonyms": ["sodium", "na"],
-    }
-
-    source = Flow(source_data)
-    target = Flow(target_data)
-
-    result = match_identical_names_in_preferred_synonyms(source, target)
-
-    assert result == {"comment": "Identical preferred synonyms"}
diff --git a/tests/test_prepare_transformations.py b/tests/test_prepare_transformations.py
deleted file mode 100644
index 9ea7a35..0000000
--- a/tests/test_prepare_transformations.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# TBD
-# Also include pydantic stuff
diff --git a/tests/test_rm_parentheses_roman_numerals.py b/tests/test_rm_parentheses_roman_numerals.py
deleted file mode 100644
index 94fa177..0000000
--- a/tests/test_rm_parentheses_roman_numerals.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from flowmapper.utils import (
-    rm_parentheses_roman_numerals,
-    rm_roman_numerals_ionic_state,
-)
-
-
-def test_rm_parentheses_roman_numerals():
-    assert rm_parentheses_roman_numerals("chromium (iii)") == "chromium iii"
-    assert rm_parentheses_roman_numerals("chromium ( iii )") == "chromium iii"
-    assert (
-        rm_parentheses_roman_numerals("water (evapotranspiration)")
-        == "water (evapotranspiration)"
-    )
-    assert rm_parentheses_roman_numerals("metolachlor, (s)") == "metolachlor, (s)"
-    assert rm_parentheses_roman_numerals("chromium (vi)") == "chromium vi"
-    assert rm_parentheses_roman_numerals("beryllium (ii)") == "beryllium ii"
-    assert rm_parentheses_roman_numerals("thallium (i)") == "thallium i"
-    assert rm_parentheses_roman_numerals("tin (iv) oxide") == "tin iv oxide"
-
-
-def test_rm_roman_numerals_ionic_state():
-    assert rm_roman_numerals_ionic_state("mercury (ii)") == "mercury"
-    assert rm_roman_numerals_ionic_state("manganese (ii)") == "manganese"
-    assert rm_roman_numerals_ionic_state("molybdenum (vi)") == "molybdenum"
diff --git a/tests/test_stringfield.py b/tests/test_stringfield.py
deleted file mode 100644
index f59d04a..0000000
--- a/tests/test_stringfield.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from flowmapper.string_field import StringField
-
-
-def test_string_field_empty():
-    sf = StringField(None)
-    assert sf.original is None
-    assert sf.normalized == ""
-    assert sf != ""
-    assert sf != "a"
-    assert sf != StringField("a")
-    assert sf is not None
-    assert not sf
-    assert repr(sf) == "StringField with missing original value"
-
-
-def test_string_field_no_transformed():
-    sf = StringField("A", use_lowercase=False)
-    assert sf.original == "A"
-    assert sf.normalized == "A"
-    assert sf == "A"
-    assert sf != "a"
-    assert sf == StringField("A", use_lowercase=True)
-    assert sf == StringField("A", use_lowercase=False)
-    assert sf != "B"
-    assert not sf.use_lowercase
-    assert sf
-    assert repr(sf) == "StringField: 'A' -> 'A'"
-
-
-def test_string_field_no_transformed_lowercase():
-    sf = StringField("A", use_lowercase=True)
-    assert sf.original == "A"
-    assert sf.normalized == "a"
-    assert sf == "a"
-    assert sf == "A"
-    assert sf == StringField("A", use_lowercase=True)
-    assert sf == StringField("A", use_lowercase=False)
-    assert sf != "B"
-    assert sf.use_lowercase
-    assert sf
-    assert repr(sf) == "StringField: 'A' -> 'a'"
-
-
-def test_string_field_transformed():
-    sf = StringField("A*", use_lowercase=False)
-    assert sf.original == "A*"
-    assert sf.normalized == "A*"
-    assert sf != "A"
-    assert sf != "a*"
-    assert sf == "A*"
-    assert sf == StringField("A*", use_lowercase=True)
-    assert sf == StringField("A*", use_lowercase=False)
-    assert sf != "B"
-    assert not sf.use_lowercase
-    assert sf
-    assert repr(sf) == "StringField: 'A*' -> 'A*'"
-
-
-def test_string_field_transformed_lowercase():
-    sf = StringField("A*", use_lowercase=True)
-    assert sf.original == "A*"
-    assert sf.normalized == "a*"
-    assert sf == "a*"
-    assert sf == "A*"
-    assert sf == StringField("A*", use_lowercase=True)
-    assert sf == StringField("A*", use_lowercase=False)
-    assert sf != "B"
-    assert sf.use_lowercase
-    assert sf
-    assert repr(sf) == "StringField: 'A*' -> 'a*'"
diff --git a/tests/test_stringlist.py b/tests/test_stringlist.py
deleted file mode 100644
index 88e2dee..0000000
--- a/tests/test_stringlist.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from flowmapper.string_list import StringList
-
-
-def test_string_list_empty():
-    sl = StringList([])
-    assert sl.data == []
-    assert list(iter(sl)) == []
-    assert len(sl) == 0
-    assert not sl
-    assert repr(sl) == "StringList: Empty"
-    assert 1 not in sl
-
-
-def test_string_list_no_transformed():
-    sl = StringList(["A", "b"])
-    assert "A" in sl
-    assert "b" in sl
-    assert len(sl) == 2
-    assert sl
-    assert (
-        repr(sl)
-        == "StringList: [\"StringField: 'A' -> 'a'\", \"StringField: 'b' -> 'b'\"]"
-    )
-    assert list(iter(sl)) == ["a", "b"]
-    assert sl.data[0].original == "A"
-    assert sl.data[0].normalized == "a"
-
-
-def test_string_list_transformed():
-    sl = StringList(["A", "b"], ["A*", "b"])
-    assert "A*" in sl
-    assert "b" in sl
-    assert len(sl) == 2
-    assert sl
-    assert (
-        repr(sl)
-        == "StringList: [\"StringField: 'A' -> 'a*'\", \"StringField: 'b' -> 'b'\"]"
-    )
-    assert list(iter(sl)) == ["a*", "b"]
-    assert sl.data[0].original == "A"
-    assert sl.data[0].normalized == "a*"
diff --git a/tests/test_transform_and_then_match.py b/tests/test_transform_and_then_match.py
new file mode 100644
index 0000000..6d13d5f
--- /dev/null
+++ b/tests/test_transform_and_then_match.py
@@ -0,0 +1,678 @@
+"""Tests for transform_and_then_match function."""
+
+from copy import copy
+
+import pytest
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.matching import match_identical_names, transform_and_then_match
+
+
+def test_transform_and_then_match_basic():
+    """Test basic matching without transformations or filters."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+    )
+
+    assert len(matches) == 1, "Expected one match"
+    assert matches[0].source == source_flow, "Expected match to reference source flow"
+    assert matches[0].target == target_flow, "Expected match to reference target flow"
+
+    # Verify flows are reset
+    assert (
+        source_flows[0].current.name.data == source_normalized.name.data
+    ), "Expected source flow to be reset"
+    assert (
+        target_flows[0].current.name.data == target_normalized.name.data
+    ), "Expected target flow to be reset"
+
+
+def test_transform_and_then_match_with_transformation():
+    """Test matching with transformations applied."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    def transform_func(flows):
+        for flow in flows:
+            flow.update_current(name="Modified name")
+        return flows
+
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        transform_source_flows=[transform_func],
+        transform_target_flows=[transform_func],
+    )
+
+    # Should match because both are transformed to "Modified name"
+    assert len(matches) == 1, "Expected one match after transformation"
+
+    # Verify flows are reset
+    assert (
+        source_flows[0].current.name.data == source_normalized.name.data
+    ), f"Expected source flow to be reset after transformation, got {source_flows[0].current.name.data!r} != {source_normalized.name.data!r}"
+    assert (
+        target_flows[0].current.name.data == target_normalized.name.data
+    ), f"Expected target flow to be reset after transformation, got {target_flows[0].current.name.data!r} != {target_normalized.name.data!r}"
+
+
+def test_transform_and_then_match_with_filter():
+    """Test matching with filters applied."""
+    source_data1 = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    source_data2 = {
+        "name": "Water",
+        "context": "water",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow1 = Flow.from_dict(source_data1)
+    source_flow2 = Flow.from_dict(source_data2)
+    target_flow = Flow.from_dict(target_data)
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow1,
+            normalized=source_flow1.normalize(),
+            current=copy(source_flow1.normalize()),
+        ),
+        NormalizedFlow(
+            original=source_flow2,
+            normalized=source_flow2.normalize(),
+            current=copy(source_flow2.normalize()),
+        ),
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_flow.normalize(),
+            current=copy(target_flow.normalize()),
+        )
+    ]
+
+    def filter_air_flows(flows):
+        return [f for f in flows if "air" in str(f.current.context)]
+
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        filter_source_flows=filter_air_flows,
+    )
+
+    # Should match only the carbon dioxide flow (air context), not water
+    assert len(matches) == 1, "Expected one match after filtering"
+    assert (
+        matches[0].source == source_flow1
+    ), "Expected match to reference filtered source flow"
+
+    # Verify all flows are reset (including the filtered one)
+    assert (
+        source_flows[0].current.name.data == source_flow1.normalize().name.data
+    ), "Expected first source flow to be reset"
+    assert (
+        source_flows[1].current.name.data == source_flow2.normalize().name.data
+    ), "Expected second source flow to be reset"
+
+
+def test_transform_and_then_match_with_transform_and_filter():
+    """Test matching with both transformations and filters."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    def transform_func(flows):
+        for flow in flows:
+            flow.update_current(name="Transformed name")
+        return flows
+
+    def filter_func(flows):
+        # Filter to only flows with "Transformed" in name
+        return [f for f in flows if "Transformed" in f.current.name.data]
+
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        transform_source_flows=[transform_func],
+        transform_target_flows=[transform_func],
+        filter_source_flows=filter_func,
+        filter_target_flows=filter_func,
+    )
+
+    # Should match because both are transformed and pass filter
+    assert len(matches) == 1, "Expected one match after transformation and filtering"
+
+    # Verify flows are reset
+    assert (
+        source_flows[0].current.name.data == source_normalized.name.data
+    ), "Expected source flow to be reset"
+
+
+def test_transform_and_then_match_resets_on_exception():
+    """Test that flows are NOT reset when match function raises exception."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    def transform_func(flows):
+        for flow in flows:
+            flow.update_current(name="Modified")
+        return flows
+
+    def failing_match_function(source_flows, target_flows):
+        raise ValueError("Test exception")
+
+    try:
+        transform_and_then_match(
+            source_flows=source_flows,
+            target_flows=target_flows,
+            match_function=failing_match_function,
+            transform_source_flows=[transform_func],
+            transform_target_flows=[transform_func],
+        )
+    except ValueError:
+        pass
+
+    # Verify flows are NOT reset when exception occurs
+    # (This documents current behavior - flows are only reset on success)
+    assert (
+        source_flows[0].current.name.data == "Modified"
+    ), "Expected source flow to NOT be reset when exception occurs"
+    assert (
+        target_flows[0].current.name.data == "Modified"
+    ), "Expected target flow to NOT be reset when exception occurs"
+
+
+def test_transform_and_then_match_only_source_transformation():
+    """Test matching with only source flow transformation."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    def transform_source(flows):
+        for flow in flows:
+            flow.update_current(name="Modified source")
+        return flows
+
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        transform_source_flows=[transform_source],
+    )
+
+    # Should not match because only source is transformed
+    assert len(matches) == 0, "Expected no match when only source is transformed"
+
+    # Verify flows are reset
+    assert (
+        source_flows[0].current.name.data == source_normalized.name.data
+    ), "Expected source flow to be reset"
+
+
+def test_transform_and_then_match_filter_returns_empty_list():
+    """Test matching when filter returns empty list."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    def filter_nothing(flows):
+        return []
+
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        filter_source_flows=filter_nothing,
+    )
+
+    # Should have no matches because filter returns empty list
+    assert len(matches) == 0, "Expected no matches when filter returns empty list"
+
+    # Verify flows are still reset
+    assert (
+        source_flows[0].current.name.data == source_normalized.name.data
+    ), "Expected source flow to be reset even when filtered out"
+
+
+def test_transform_and_then_match_with_list_of_transformations():
+    """Test matching with a list of transformations applied in sequence."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    def transform1(flows):
+        for flow in flows:
+            flow.update_current(name="First transformation")
+        return flows
+
+    def transform2(flows):
+        for flow in flows:
+            flow.update_current(name="Second transformation")
+        return flows
+
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        transform_source_flows=[transform1, transform2],
+        transform_target_flows=[transform1, transform2],
+    )
+
+    # Should match because both are transformed through the same sequence
+    assert len(matches) == 1, "Expected one match after multiple transformations"
+
+    # Verify flows are reset
+    assert (
+        source_flows[0].current.name.data == source_normalized.name.data
+    ), "Expected source flow to be reset after transformations"
+    assert (
+        target_flows[0].current.name.data == target_normalized.name.data
+    ), "Expected target flow to be reset after transformations"
+
+
+def test_transform_and_then_match_list_transformations_sequence():
+    """Test that list transformations are applied in the correct sequence."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    # Track transformation order
+    transform_order = []
+
+    def transform1(flows):
+        transform_order.append("transform1")
+        for flow in flows:
+            flow.update_current(name="Transform1")
+        return flows
+
+    def transform2(flows):
+        transform_order.append("transform2")
+        for flow in flows:
+            flow.update_current(name="Transform2")
+        return flows
+
+    def transform3(flows):
+        transform_order.append("transform3")
+        for flow in flows:
+            flow.update_current(name="Transform3")
+        return flows
+
+    # Apply transformations in sequence
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        transform_source_flows=[transform1, transform2, transform3],
+        transform_target_flows=[transform1, transform2, transform3],
+    )
+
+    # Verify transformations were applied in order
+    assert transform_order == [
+        "transform1",
+        "transform2",
+        "transform3",
+        "transform1",
+        "transform2",
+        "transform3",
+    ], f"Expected transformations in order, got {transform_order}"
+
+    # Final name should be from transform3
+    # But we need to check during matching, so let's verify the match happened
+    assert len(matches) == 1, "Expected match after sequential transformations"
+
+
+def test_transform_and_then_match_single_function_still_works():
+    """Test that single function transformation works when wrapped in a list."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    def transform_func(flows):
+        for flow in flows:
+            flow.update_current(name="Single transform")
+        return flows
+
+    # Test with single function wrapped in list
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        transform_source_flows=[transform_func],
+        transform_target_flows=[transform_func],
+    )
+
+    # Should match because both are transformed
+    assert len(matches) == 1, "Expected one match with single transformation function"
+
+    # Verify flows are reset
+    assert (
+        source_flows[0].current.name.data == source_normalized.name.data
+    ), "Expected source flow to be reset"
+
+
+def test_transform_and_then_match_mixed_single_and_list():
+    """Test matching with single function for source and list for target."""
+    source_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+    target_data = {
+        "name": "Carbon dioxide",
+        "context": "air",
+        "unit": "kg",
+    }
+
+    source_flow = Flow.from_dict(source_data)
+    target_flow = Flow.from_dict(target_data)
+    source_normalized = source_flow.normalize()
+    target_normalized = target_flow.normalize()
+
+    source_flows = [
+        NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+    ]
+    target_flows = [
+        NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+    ]
+
+    def single_transform(flows):
+        for flow in flows:
+            flow.update_current(name="Single")
+        return flows
+
+    def list_transform1(flows):
+        for flow in flows:
+            flow.update_current(name="List1")
+        return flows
+
+    def list_transform2(flows):
+        for flow in flows:
+            flow.update_current(name="List2")
+        return flows
+
+    # Source: single function in list, Target: list of functions
+    matches = transform_and_then_match(
+        source_flows=source_flows,
+        target_flows=target_flows,
+        match_function=match_identical_names,
+        transform_source_flows=[single_transform],
+        transform_target_flows=[list_transform1, list_transform2],
+    )
+
+    # Should not match because names differ: "Single" vs "List2"
+    assert len(matches) == 0, "Expected no match when transformations differ"
+
+    # Verify flows are reset
+    assert (
+        source_flows[0].current.name.data == source_normalized.name.data
+    ), "Expected source flow to be reset"
diff --git a/tests/test_transform_flow.py b/tests/test_transform_flow.py
deleted file mode 100644
index 339d7bb..0000000
--- a/tests/test_transform_flow.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import json
-from pathlib import Path
-
-from flowmapper.flow import Flow
-from flowmapper.flowmap import Flowmap
-from flowmapper.transformation_mapping import prepare_transformations
-
-DATA_DIR = Path(__file__).parent / "data"
-
-
-def test_transform_flow_without_default_transformations():
-    transformations = prepare_transformations(
-        [json.load(open(DATA_DIR / "transformations.json"))]
-    )
-    source_flows = json.load(open(DATA_DIR / "sp.json"))
-    source_flows = [Flow(flow, transformations) for flow in source_flows]
-    target_flows = json.load(open(DATA_DIR / "ei-3.7.json"))
-    target_flows = [Flow(flow, transformations) for flow in target_flows]
-
-    flowmap = Flowmap(source_flows, target_flows)
-    actual = flowmap.to_randonneur()
-
-    expected = [
-        {
-            "source": {
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-                "context": "air",
-                "CAS number": "110-63-4",
-            },
-            "target": {
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-                "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-                "context": ["air", "unspecified"],
-                "CAS number": "110-63-4",
-            },
-            "conversion_factor": 1.0,
-            "comment": "Identical names",
-        },
-        {
-            "source": {
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-                "context": "air/high. pop.",
-                "CAS number": "110-63-4",
-            },
-            "target": {
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-                "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-                "context": ["air", "unspecified"],
-                "CAS number": "110-63-4",
-            },
-            "conversion_factor": 1.0,
-            "comment": "Identical names",
-        },
-    ]
-    assert actual == expected
-
-
-def test_transform_flow_with_default_transformations(transformations):
-    all_transformations = transformations + prepare_transformations(
-        [json.load(open(DATA_DIR / "transformations.json"))]
-    )
-    source_flows = json.load(open(DATA_DIR / "sp.json"))
-    source_flows = [Flow(flow, all_transformations) for flow in source_flows]
-    target_flows = json.load(open(DATA_DIR / "ei-3.7.json"))
-    target_flows = [Flow(flow, all_transformations) for flow in target_flows]
-
-    flowmap = Flowmap(source_flows, target_flows)
-    actual = flowmap.to_randonneur()
-
-    expected = [
-        {
-            "comment": "Identical names",
-            "conversion_factor": 1.0,
-            "source": {
-                "CAS number": "110-63-4",
-                "context": "air",
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-            },
-            "target": {
-                "CAS number": "110-63-4",
-                "context": ["air", "unspecified"],
-                "identifier": "09db39be-d9a6-4fc3-8d25-1f80b23e9131",
-                "name": "1,4-Butanediol",
-                "unit": "kg",
-            },
-        },
-        {
-            "comment": "Identical names",
-            "conversion_factor": 1.2142857142857142,
-            "source": {
-                "context": "air/low. pop.",
-                "name": "Ammonia, as N",
-                "unit": "kg",
-            },
-            "target": {
-                "CAS number": "7664-41-7",
-                "context": ["air", "non-urban air or from high stacks"],
-                "identifier": "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-                "name": "Ammonia",
-                "unit": "kg",
-            },
-        },
-        {
-            "comment": "Name matching with location code",
-            "conversion_factor": 1.0,
-            "source": {"context": "air/low. pop.", "name": "Ammonia, FR", "unit": "kg"},
-            "target": {
-                "CAS number": "7664-41-7",
-                "context": ["air", "non-urban air or from high stacks"],
-                "identifier": "0f440cc0-0f74-446d-99d6-8ff0e97a2444",
-                "location": "FR",
-                "name": "Ammonia",
-                "unit": "kg",
-            },
-        },
-    ]
-
-    assert actual == expected
diff --git a/tests/test_unit.py b/tests/test_unit.py
deleted file mode 100644
index f1e395e..0000000
--- a/tests/test_unit.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import math
-
-from flowmapper.transformation_mapping import prepare_transformations
-from flowmapper.unit import UnitField
-from flowmapper.utils import apply_transformations, load_standard_transformations
-
-
-def test_equals_with_loaded_transformation():
-    transformations = prepare_transformations(load_standard_transformations())
-
-    a = {"unit": "M2A"}
-    a_t = apply_transformations(a, transformations)
-    b = {"unit": "m2*year"}
-    b_t = apply_transformations(b, transformations)
-
-    u1 = UnitField(a["unit"], a_t["unit"])
-    u2 = UnitField(b["unit"], b_t["unit"])
-
-    assert u1 == u2
-
-
-def test_equals_mass():
-    u1 = UnitField("kg")
-    u2 = UnitField("kilogram")
-
-    assert u1 == u2
-
-
-def test_energy():
-    u1 = UnitField("kilowatt hour")
-    u2 = UnitField("MJ")
-    assert u1.compatible(u2)
-    assert u1.conversion_factor(u2) == 3.6
-
-
-def test_enrichment():
-    u1 = UnitField("SWU")
-    u2 = UnitField("tonne * SW")
-    assert u1.compatible(u2)
-    assert u1.conversion_factor(u2) == 1e-3
-
-
-def test_natural_gas():
-    u1 = UnitField("nm3")
-    u2 = UnitField("sm3")
-    assert u1.compatible(u2)
-
-
-def test_livestock():
-    u1 = UnitField("LU")
-    u2 = UnitField("livestock unit")
-    assert u1 == u2
-
-
-def test_freight():
-    u1 = UnitField("kilogram * km")
-    u2 = UnitField("tkm")
-    assert u1.conversion_factor(u2) == 1e-3
-
-
-def test_vehicular_travel():
-    u1 = UnitField("vehicle * m")
-    u2 = UnitField("vkm")
-    assert u1.conversion_factor(u2) == 1e-3
-
-
-def test_person_travel():
-    u1 = UnitField("person * m")
-    u2 = UnitField("pkm")
-    assert u1.conversion_factor(u2) == 1e-3
-
-
-def test_conversion_factor():
-    u1 = UnitField("mg")
-    u2 = UnitField("kg")
-    actual = u1.conversion_factor(u2)
-    assert actual == 1e-06
-
-
-def test_nan_conversion_factor():
-    u1 = UnitField("bq")
-    u2 = UnitField("kg")
-    actual = u1.conversion_factor(u2)
-    assert math.isnan(actual)
-
-
-def test_complex_conversions():
-    u1 = UnitField("square_meter_year / t")
-    u2 = UnitField("(meter ** 2 * month) / kg")
-    assert u1.conversion_factor(u2) == 0.012
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 0000000..828322c
--- /dev/null
+++ b/tests/unit/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for flowmapper using mocks."""
diff --git a/tests/unit/domain/__init__.py b/tests/unit/domain/__init__.py
new file mode 100644
index 0000000..9b513de
--- /dev/null
+++ b/tests/unit/domain/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for domain entities."""
diff --git a/tests/unit/domain/test_flow.py b/tests/unit/domain/test_flow.py
new file mode 100644
index 0000000..64e3c42
--- /dev/null
+++ b/tests/unit/domain/test_flow.py
@@ -0,0 +1,733 @@
+import pytest
+
+from flowmapper.domain.flow import Flow
+
+
+class TestFlowRepr:
+    """Test Flow __repr__ method."""
+
+    def test_repr_basic_flow(self):
+        """Test Flow __repr__ with only required fields."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        result = repr(flow)
+        assert "Flow(" in result, "Expected 'Flow(' in repr"
+        assert "name=" in result, "Expected 'name=' in repr"
+        assert "unit=" in result, "Expected 'unit=' in repr"
+        assert "context=" in result, "Expected 'context=' in repr"
+        assert (
+            "Carbon dioxide" in result or "carbon dioxide" in result
+        ), "Expected name in repr"
+
+    def test_repr_with_identifier(self):
+        """Test Flow __repr__ with identifier."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "identifier": "test-id-123",
+            }
+        )
+        result = repr(flow)
+        assert "identifier=" in result, "Expected 'identifier=' in repr"
+        assert "test-id-123" in result, "Expected identifier value in repr"
+
+    def test_repr_with_location(self):
+        """Test Flow __repr__ with location."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg", "location": "US"}
+        )
+        result = repr(flow)
+        assert "location=" in result, "Expected 'location=' in repr"
+        assert "US" in result, "Expected location value in repr"
+
+    def test_repr_with_cas_number(self):
+        """Test Flow __repr__ with CAS number."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "cas_number": "000124-38-9",
+            }
+        )
+        result = repr(flow)
+        assert "cas_number=" in result, "Expected 'cas_number=' in repr"
+        # CAS number is normalized, so check for normalized format
+        assert (
+            "124-38-9" in result or "000124-38-9" in result
+        ), "Expected CAS number in repr"
+
+    def test_repr_with_synonyms(self):
+        """Test Flow __repr__ with synonyms."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "synonyms": ["CO2", "carbon dioxide"],
+            }
+        )
+        result = repr(flow)
+        assert "synonyms=" in result, "Expected 'synonyms=' in repr"
+        assert "CO2" in result, "Expected synonym in repr"
+
+    def test_repr_with_all_fields(self):
+        """Test Flow __repr__ with all optional fields."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "identifier": "test-id",
+                "location": "US",
+                "cas_number": "000124-38-9",
+                "synonyms": ["CO2"],
+                "conversion_factor": 2.5,
+            }
+        )
+        result = repr(flow)
+        assert "name=" in result, "Expected 'name=' in repr"
+        assert "unit=" in result, "Expected 'unit=' in repr"
+        assert "context=" in result, "Expected 'context=' in repr"
+        assert "identifier=" in result, "Expected 'identifier=' in repr"
+        assert "location=" in result, "Expected 'location=' in repr"
+        assert "cas_number=" in result, "Expected 'cas_number=' in repr"
+        assert "synonyms=" in result, "Expected 'synonyms=' in repr"
+        assert "conversion_factor=" in result, "Expected 'conversion_factor=' in repr"
+
+    def test_repr_without_optional_fields(self):
+        """Test Flow __repr__ without optional fields (should not include them)."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        result = repr(flow)
+        assert (
+            "identifier=" not in result
+        ), "Expected 'identifier=' not in repr when None"
+        assert "location=" not in result, "Expected 'location=' not in repr when None"
+        assert (
+            "cas_number=" not in result
+        ), "Expected 'cas_number=' not in repr when None"
+        assert "synonyms=" not in result, "Expected 'synonyms=' not in repr when empty"
+        assert (
+            "conversion_factor=" not in result
+        ), "Expected 'conversion_factor=' not in repr when None"
+
+    def test_repr_with_empty_synonyms(self):
+        """Test Flow __repr__ with empty synonyms list (should not include)."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg", "synonyms": []}
+        )
+        result = repr(flow)
+        assert (
+            "synonyms=" not in result
+        ), "Expected 'synonyms=' not in repr when empty list"
+
+    def test_repr_with_oxidation_state(self):
+        """Test Flow __repr__ with oxidation state."""
+        flow = Flow.from_dict(
+            {
+                "name": "Iron(II) oxide",
+                "context": "air",
+                "unit": "kg",
+            }
+        )
+        # Oxidation state is extracted during normalization, but we can set it directly
+
+        # Create a flow with oxidation state
+        normalized = flow.normalize()
+        result = repr(normalized)
+        # Oxidation state might be extracted from name, check if it's in repr
+        # The repr will show it if it's not None
+        if normalized.oxidation_state is not None:
+            assert "oxidation_state=" in result, "Expected 'oxidation_state=' in repr"
+
+
+class TestFlowCopyWithNewLocation:
+    """Test Flow copy_with_new_location method."""
+
+    def test_copy_with_new_location_basic(self):
+        """Test copy_with_new_location with simple location replacement."""
+        flow = Flow.from_dict({"name": "Ammonia, NL", "context": "air", "unit": "kg"})
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert new_flow.name.data == "Ammonia, DE", "Expected name to have new location"
+        assert new_flow.context == flow.context, "Expected context to be preserved"
+        assert new_flow.unit == flow.unit, "Expected unit to be preserved"
+        assert new_flow._id != flow._id, "Expected new Flow instance with different _id"
+
+    def test_copy_with_new_location_preserves_attributes(self):
+        """Test copy_with_new_location preserves all other attributes except identifier."""
+        flow = Flow.from_dict(
+            {
+                "name": "Ammonia, NL",
+                "context": "air",
+                "unit": "kg",
+                "identifier": "test-id-123",
+                "location": "US",
+                "cas_number": "0007664-41-7",
+                "synonyms": ["NH3"],
+            }
+        )
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert (
+            new_flow.identifier != flow.identifier
+        ), "Expected identifier to be a new UUID, not preserved"
+        assert new_flow.identifier is not None, "Expected identifier to be set"
+        assert (
+            new_flow.cas_number == flow.cas_number
+        ), "Expected cas_number to be preserved"
+        assert new_flow.synonyms == flow.synonyms, "Expected synonyms to be preserved"
+        assert new_flow.context == flow.context, "Expected context to be preserved"
+        assert new_flow.unit == flow.unit, "Expected unit to be preserved"
+
+    def test_copy_with_new_location_multiple_commas(self):
+        """Test copy_with_new_location with multiple commas in name."""
+        flow = Flow.from_dict(
+            {"name": "Ammonia, pure, NL", "context": "air", "unit": "kg"}
+        )
+        new_flow = flow.copy_with_new_location("FR")
+
+        assert (
+            new_flow.name.data == "Ammonia, pure, FR"
+        ), "Expected location at end to be replaced"
+
+    def test_copy_with_new_location_complex_location(self):
+        """Test copy_with_new_location with complex location codes."""
+        flow = Flow.from_dict(
+            {"name": "Ammonia, RER w/o DE+NL+NO", "context": "air", "unit": "kg"}
+        )
+        new_flow = flow.copy_with_new_location("GLO")
+
+        assert (
+            new_flow.name.data == "Ammonia, GLO"
+        ), "Expected complex location to be replaced with simple one"
+
+    def test_copy_with_new_location_simple_to_complex(self):
+        """Test copy_with_new_location replacing simple location with complex one."""
+        flow = Flow.from_dict({"name": "Ammonia, NL", "context": "air", "unit": "kg"})
+        new_flow = flow.copy_with_new_location("RER w/o DE+NL+NO")
+
+        assert (
+            new_flow.name.data == "Ammonia, RER w/o DE+NL+NO"
+        ), "Expected simple location to be replaced with complex one"
+
+    def test_copy_with_new_location_appends_when_no_location_suffix(self):
+        """Test copy_with_new_location appends location when no location suffix exists."""
+        flow = Flow.from_dict({"name": "Ammonia", "context": "air", "unit": "kg"})
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert new_flow.name.data == "Ammonia, DE", "Expected location to be appended"
+        assert new_flow.identifier != flow.identifier, "Expected new identifier"
+
+    def test_copy_with_new_location_appends_with_dash_location(self):
+        """Test copy_with_new_location appends location when dash-separated location exists."""
+        flow = Flow.from_dict({"name": "Ammonia-NL", "context": "air", "unit": "kg"})
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert (
+            new_flow.name.data == "Ammonia-NL, DE"
+        ), "Expected location to be appended"
+        assert new_flow.identifier != flow.identifier, "Expected new identifier"
+
+    def test_copy_with_new_location_appends_when_location_in_middle(self):
+        """Test copy_with_new_location appends location when location not at end."""
+        flow = Flow.from_dict(
+            {"name": "Ammonia, NL, pure", "context": "air", "unit": "kg"}
+        )
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert (
+            new_flow.name.data == "Ammonia, NL, pure, DE"
+        ), "Expected location to be appended"
+        assert new_flow.identifier != flow.identifier, "Expected new identifier"
+
+    def test_copy_with_new_location_various_locations(self):
+        """Test copy_with_new_location with various location codes."""
+        test_cases = [
+            ("Water, DE", "FR", "Water, FR"),
+            ("Water, FR", "US", "Water, US"),
+            ("Water, US", "GLO", "Water, GLO"),
+            ("Water, GLO", "DE", "Water, DE"),
+        ]
+
+        for name, new_location, expected_name in test_cases:
+            flow = Flow.from_dict({"name": name, "context": "air", "unit": "kg"})
+            new_flow = flow.copy_with_new_location(new_location)
+            assert (
+                new_flow.name.data == expected_name
+            ), f"Expected '{expected_name}' for '{name}' -> '{new_location}', but got {new_flow.name.data!r}"
+
+    def test_copy_with_new_location_only_location_code(self):
+        """Test copy_with_new_location with only location code in name."""
+        flow = Flow.from_dict({"name": ", NL", "context": "air", "unit": "kg"})
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert new_flow.name.data == ", DE", "Expected location to be replaced"
+
+    def test_copy_with_new_location_with_trailing_whitespace(self):
+        """Test copy_with_new_location preserves trailing whitespace."""
+        flow = Flow.from_dict({"name": "Ammonia, NL ", "context": "air", "unit": "kg"})
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert (
+            new_flow.name.data == "Ammonia, DE "
+        ), "Expected trailing whitespace to be preserved"
+
+    def test_copy_with_new_location_creates_new_instance(self):
+        """Test copy_with_new_location creates a new Flow instance."""
+        flow = Flow.from_dict({"name": "Ammonia, NL", "context": "air", "unit": "kg"})
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert new_flow is not flow, "Expected new Flow instance"
+        assert new_flow._id != flow._id, "Expected different _id"
+
+    def test_copy_with_new_location_original_unchanged(self):
+        """Test copy_with_new_location does not modify original flow."""
+        flow = Flow.from_dict({"name": "Ammonia, NL", "context": "air", "unit": "kg"})
+        original_name = flow.name.data
+
+        new_flow = flow.copy_with_new_location("DE")
+
+        assert (
+            flow.name.data == original_name
+        ), "Expected original flow name to be unchanged"
+        assert (
+            new_flow.name.data != original_name
+        ), "Expected new flow name to be different"
+
+    def test_copy_with_new_location_with_all_fields(self):
+        """Test copy_with_new_location with flow containing all fields."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide, NL",
+                "context": ("Raw", "(unspecified)"),
+                "unit": "kg",
+                "identifier": "test-id-123",
+                "cas_number": "000124-38-9",
+                "synonyms": ["CO2"],
+            }
+        )
+        new_flow = flow.copy_with_new_location("DE")
+
+        # Check name is updated
+        assert (
+            new_flow.name.data == "Carbon dioxide, DE"
+        ), "Expected name to have new location"
+        # Check all other fields are preserved except identifier
+        assert (
+            new_flow.identifier != flow.identifier
+        ), "Expected identifier to be a new UUID, not preserved"
+        assert new_flow.identifier is not None, "Expected identifier to be set"
+        assert new_flow.context == flow.context, "Expected context preserved"
+        assert new_flow.unit == flow.unit, "Expected unit preserved"
+        assert new_flow.cas_number == flow.cas_number, "Expected cas_number preserved"
+        assert new_flow.synonyms == flow.synonyms, "Expected synonyms preserved"
+
+    def test_copy_with_new_location_raises_value_error_empty_location(self):
+        """Test copy_with_new_location raises ValueError when location parameter is empty."""
+        flow = Flow.from_dict({"name": "Ammonia, NL", "context": "air", "unit": "kg"})
+
+        with pytest.raises(ValueError, match="No location parameter given"):
+            flow.copy_with_new_location("")
+
+        with pytest.raises(ValueError, match="No location parameter given"):
+            flow.copy_with_new_location(None)
+
+    def test_copy_with_new_location_sets_new_identifier(self):
+        """Test copy_with_new_location sets a new UUID identifier."""
+        import uuid
+
+        flow = Flow.from_dict(
+            {
+                "name": "Ammonia, NL",
+                "context": "air",
+                "unit": "kg",
+                "identifier": "test-id-123",
+            }
+        )
+        new_flow = flow.copy_with_new_location("DE")
+
+        # Verify identifier is different
+        assert (
+            new_flow.identifier != flow.identifier
+        ), "Expected identifier to be different from original"
+        assert new_flow.identifier is not None, "Expected identifier to be set"
+        # Verify it's a valid UUID format
+        try:
+            uuid.UUID(new_flow.identifier)
+        except ValueError:
+            pytest.fail(
+                f"Expected identifier to be a valid UUID, but got {new_flow.identifier!r}"
+            )
+
+    def test_copy_with_new_location_identifier_when_none(self):
+        """Test copy_with_new_location sets identifier even when original is None."""
+        import uuid
+
+        flow = Flow.from_dict({"name": "Ammonia, NL", "context": "air", "unit": "kg"})
+        assert flow.identifier is None, "Expected original identifier to be None"
+
+        new_flow = flow.copy_with_new_location("DE")
+
+        # Verify identifier is set even when original was None
+        assert (
+            new_flow.identifier is not None
+        ), "Expected identifier to be set even when original was None"
+        # Verify it's a valid UUID format
+        try:
+            uuid.UUID(new_flow.identifier)
+        except ValueError:
+            pytest.fail(
+                f"Expected identifier to be a valid UUID, but got {new_flow.identifier!r}"
+            )
+
+
+class TestFlowToDict:
+    """Test Flow to_dict method."""
+
+    def test_to_dict_with_all_fields(self):
+        """Test to_dict with all fields populated."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "identifier": "test-id-123",
+                "location": "NL",
+                "cas_number": "000124-38-9",
+                "synonyms": ["CO2", "Carbon dioxide"],
+                "conversion_factor": 2.5,
+            }
+        )
+        result = flow.to_dict()
+
+        assert result["name"] == "Carbon dioxide", "Expected name in dict"
+        assert result["unit"] == "kg", "Expected unit in dict"
+        # Context as_tuple() returns string if value is string, tuple if list/tuple
+        assert result["context"] == "air", "Expected context as string (from as_tuple)"
+        assert result["identifier"] == "test-id-123", "Expected identifier in dict"
+        assert result["location"] == "NL", "Expected location in dict"
+        assert result["cas_number"] == flow.cas_number, "Expected cas_number in dict"
+        assert result["synonyms"] == [
+            "CO2",
+            "Carbon dioxide",
+        ], "Expected synonyms in dict"
+        assert result["conversion_factor"] == 2.5, "Expected conversion_factor in dict"
+
+    def test_to_dict_with_only_required_fields(self):
+        """Test to_dict with only required fields."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        result = flow.to_dict()
+
+        assert result["name"] == "Carbon dioxide", "Expected name in dict"
+        assert result["unit"] == "kg", "Expected unit in dict"
+        # Context as_tuple() returns string if value is string
+        assert result["context"] == "air", "Expected context as string (from as_tuple)"
+        assert result["identifier"] is None, "Expected identifier to be None"
+        assert "location" not in result, "Expected location not in dict when None"
+        assert "cas_number" not in result, "Expected cas_number not in dict when None"
+        assert "synonyms" not in result, "Expected synonyms not in dict when empty"
+        assert (
+            "conversion_factor" not in result
+        ), "Expected conversion_factor not in dict when None"
+
+    def test_to_dict_excludes_none_optional_fields(self):
+        """Test to_dict excludes None optional fields."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "identifier": None,
+            }
+        )
+        result = flow.to_dict()
+
+        assert "location" not in result, "Expected location not in dict when None"
+        assert (
+            "oxidation_state" not in result
+        ), "Expected oxidation_state not in dict when None"
+        assert "cas_number" not in result, "Expected cas_number not in dict when None"
+        assert "synonyms" not in result, "Expected synonyms not in dict when empty"
+        assert (
+            "conversion_factor" not in result
+        ), "Expected conversion_factor not in dict when None"
+
+    def test_to_dict_excludes_empty_synonyms(self):
+        """Test to_dict excludes empty synonyms list."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "synonyms": [],
+            }
+        )
+        result = flow.to_dict()
+
+        assert "synonyms" not in result, "Expected empty synonyms not in dict"
+
+    def test_to_dict_context_as_tuple(self):
+        """Test to_dict converts context to tuple format."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": ["Raw", "(unspecified)"],
+                "unit": "kg",
+            }
+        )
+        result = flow.to_dict()
+
+        # When context is a list, as_tuple() returns a tuple (not normalized)
+        assert isinstance(result["context"], tuple), "Expected context to be tuple"
+        assert result["context"] == (
+            "Raw",
+            "(unspecified)",
+        ), "Expected context tuple (not normalized in to_dict)"
+
+
+class TestFlowRandonneurMapping:
+    """Test Flow randonneur_mapping static method."""
+
+    def test_randonneur_mapping_returns_dict(self):
+        """Test randonneur_mapping returns dictionary structure."""
+        result = Flow.randonneur_mapping()
+
+        assert isinstance(result, dict), "Expected dict return type"
+        assert "expression language" in result, "Expected expression language key"
+        assert "labels" in result, "Expected labels key"
+
+    def test_randonneur_mapping_expression_language(self):
+        """Test randonneur_mapping has correct expression language."""
+        result = Flow.randonneur_mapping()
+
+        assert (
+            result["expression language"] == "JSONPath"
+        ), "Expected JSONPath expression language"
+
+    def test_randonneur_mapping_all_attributes_mapped(self):
+        """Test randonneur_mapping includes all Flow attributes."""
+        result = Flow.randonneur_mapping()
+        labels = result["labels"]
+
+        assert "unit" in labels, "Expected unit mapping"
+        assert "name" in labels, "Expected name mapping"
+        assert "context" in labels, "Expected context mapping"
+        assert "identifier" in labels, "Expected identifier mapping"
+        assert "location" in labels, "Expected location mapping"
+        assert "cas_number" in labels, "Expected cas_number mapping"
+        assert "synonyms" in labels, "Expected synonyms mapping"
+
+    def test_randonneur_mapping_jsonpath_expressions(self):
+        """Test randonneur_mapping has correct JSONPath expressions."""
+        result = Flow.randonneur_mapping()
+        labels = result["labels"]
+
+        assert labels["unit"] == "$.unit", "Expected unit JSONPath"
+        assert labels["name"] == "$.name", "Expected name JSONPath"
+        assert labels["context"] == "$.context", "Expected context JSONPath"
+        assert labels["identifier"] == "$.identifier", "Expected identifier JSONPath"
+        assert labels["location"] == "$.location", "Expected location JSONPath"
+        assert labels["cas_number"] == "$.cas_number", "Expected cas_number JSONPath"
+        assert labels["synonyms"] == "$.synonyms", "Expected synonyms JSONPath"
+        assert "conversion_factor" in labels, "Expected conversion_factor mapping"
+        assert (
+            labels["conversion_factor"] == "$.conversion_factor"
+        ), "Expected conversion_factor JSONPath"
+
+
+class TestFlowEquality:
+    """Test Flow __eq__ method."""
+
+    def test_eq_same_instance(self):
+        """Test equality with same instance."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        assert flow == flow, "Expected flow to equal itself"
+
+    def test_eq_different_instances_same_data(self):
+        """Test different flows with same data are not equal (different _id)."""
+        flow1 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        flow2 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        assert flow1 != flow2, "Expected flows with different _id to not be equal"
+
+    def test_eq_different_objects(self):
+        """Test equality with non-Flow objects returns False."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        assert flow != "not a flow", "Expected flow to not equal string"
+        assert flow != 123, "Expected flow to not equal number"
+        assert flow != None, "Expected flow to not equal None"  # noqa: E711
+
+
+class TestFlowComparison:
+    """Test Flow __lt__ method."""
+
+    def test_lt_sorts_by_name(self):
+        """Test sorting by name."""
+        flow1 = Flow.from_dict({"name": "Ammonia", "context": "air", "unit": "kg"})
+        flow2 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        assert flow1 < flow2, "Expected Ammonia < Carbon dioxide"
+        assert not (flow2 < flow1), "Expected Carbon dioxide not < Ammonia"
+
+    def test_lt_sorts_by_unit_when_names_equal(self):
+        """Test sorting by unit when names are equal."""
+        flow1 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "g"}
+        )
+        flow2 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        assert flow1 < flow2, "Expected g < kg when names are equal"
+
+    def test_lt_sorts_by_context_when_name_and_unit_equal(self):
+        """Test sorting by context when name and unit are equal."""
+        flow1 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        flow2 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "water", "unit": "kg"}
+        )
+
+        assert flow1 < flow2, "Expected air < water when name and unit are equal"
+
+    def test_lt_sorts_by_identifier_when_other_fields_equal(self):
+        """Test sorting by identifier when other fields are equal."""
+        flow1 = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "identifier": "id1",
+            }
+        )
+        flow2 = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "identifier": "id2",
+            }
+        )
+
+        assert flow1 < flow2, "Expected id1 < id2 when other fields are equal"
+
+    def test_lt_with_non_flow_object(self):
+        """Test comparison with non-Flow objects."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        # __lt__ should return False for non-Flow objects
+        result = flow < "not a flow"
+        assert result is False, "Expected __lt__ to return False for non-Flow objects"
+
+
+class TestFlowConversionFactor:
+    """Test Flow conversion_factor attribute."""
+
+    def test_conversion_factor_from_dict(self):
+        """Test conversion_factor can be set via from_dict."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "conversion_factor": 2.5,
+            }
+        )
+        assert flow.conversion_factor == 2.5, "Expected conversion_factor to be set"
+
+    def test_conversion_factor_none_by_default(self):
+        """Test conversion_factor is None by default."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        assert (
+            flow.conversion_factor is None
+        ), "Expected conversion_factor to be None by default"
+
+    def test_conversion_factor_preserved_in_normalize(self):
+        """Test conversion_factor is preserved during normalization."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide, NL",
+                "context": "air",
+                "unit": "kg",
+                "conversion_factor": 3.0,
+            }
+        )
+        normalized = flow.normalize()
+        assert (
+            normalized.conversion_factor == 3.0
+        ), "Expected conversion_factor to be preserved in normalize"
+
+    def test_conversion_factor_in_to_dict_when_present(self):
+        """Test conversion_factor included in to_dict when present."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "conversion_factor": 1.5,
+            }
+        )
+        result = flow.to_dict()
+        assert "conversion_factor" in result, "Expected conversion_factor in dict"
+        assert (
+            result["conversion_factor"] == 1.5
+        ), "Expected conversion_factor value in dict"
+
+    def test_conversion_factor_not_in_to_dict_when_none(self):
+        """Test conversion_factor excluded from to_dict when None."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        result = flow.to_dict()
+        assert (
+            "conversion_factor" not in result
+        ), "Expected conversion_factor not in dict when None"
+
+    def test_conversion_factor_in_repr_when_present(self):
+        """Test conversion_factor included in __repr__ when present."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "conversion_factor": 2.0,
+            }
+        )
+        result = repr(flow)
+        assert "conversion_factor=" in result, "Expected conversion_factor in repr"
+        assert "2.0" in result, "Expected conversion_factor value in repr"
+
+    def test_conversion_factor_not_in_repr_when_none(self):
+        """Test conversion_factor excluded from __repr__ when None."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        result = repr(flow)
+        assert (
+            "conversion_factor=" not in result
+        ), "Expected conversion_factor not in repr when None"
diff --git a/tests/unit/domain/test_match.py b/tests/unit/domain/test_match.py
new file mode 100644
index 0000000..d7071dd
--- /dev/null
+++ b/tests/unit/domain/test_match.py
@@ -0,0 +1,452 @@
+"""Unit tests for Match class."""
+
+from copy import copy
+
+import pytest
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match import Match
+from flowmapper.domain.match_condition import MatchCondition
+
+
+class TestMatchInitialization:
+    """Test Match class initialization."""
+
+    def test_match_initialization_with_required_fields(self):
+        """Test Match initialization with only required fields."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.exact,
+        )
+
+        assert match.source == source_flow, "Expected source to match"
+        assert match.target == target_flow, "Expected target to match"
+        assert match.function_name == "test_function", "Expected function_name to match"
+        assert match.condition == MatchCondition.exact, "Expected condition to match"
+        assert match.conversion_factor == 1.0, "Expected default conversion_factor"
+        assert match.comment == "", "Expected default empty comment"
+        assert (
+            match.new_target_flow is False
+        ), "Expected default new_target_flow to be False"
+
+    def test_match_initialization_with_all_fields(self):
+        """Test Match initialization with all fields including new_target_flow."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.related,
+            conversion_factor=2.5,
+            comment="Test comment",
+            new_target_flow=True,
+        )
+
+        assert match.source == source_flow, "Expected source to match"
+        assert match.target == target_flow, "Expected target to match"
+        assert match.function_name == "test_function", "Expected function_name to match"
+        assert match.condition == MatchCondition.related, "Expected condition to match"
+        assert match.conversion_factor == 2.5, "Expected conversion_factor to match"
+        assert match.comment == "Test comment", "Expected comment to match"
+        assert match.new_target_flow is True, "Expected new_target_flow to be True"
+
+    def test_match_initialization_with_new_target_flow_false(self):
+        """Test Match initialization with new_target_flow explicitly set to False."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.exact,
+            new_target_flow=False,
+        )
+
+        assert match.new_target_flow is False, "Expected new_target_flow to be False"
+
+    def test_match_initialization_with_different_conditions(self):
+        """Test Match initialization with different MatchCondition values."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        for condition in MatchCondition:
+            match = Match(
+                source=source_flow,
+                target=target_flow,
+                function_name="test_function",
+                condition=condition,
+                new_target_flow=True,
+            )
+            assert match.condition == condition, f"Expected condition to be {condition}"
+            assert match.new_target_flow is True, "Expected new_target_flow to be True"
+
+
+class TestMatchExport:
+    """Test Match export method."""
+
+    def test_export_basic(self):
+        """Test basic export without metadata."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.exact,
+            new_target_flow=True,
+        )
+
+        exported = match.export()
+
+        assert "source" in exported, "Expected source in exported data"
+        assert "target" in exported, "Expected target in exported data"
+        # Export uses the original flow data (not normalized)
+        assert (
+            exported["source"]["name"] == "Carbon dioxide"
+        ), "Expected source name in export"
+        assert (
+            exported["target"]["name"] == "Carbon dioxide"
+        ), "Expected target name in export"
+        # Condition is exported as SKOS URI via as_glad() method
+        assert (
+            exported["condition"] == "http://www.w3.org/2004/02/skos/core#exactMatch"
+        ), "Expected condition as SKOS URI"
+        assert exported["conversion_factor"] == 1.0, "Expected conversion_factor"
+        assert exported["comment"] == "", "Expected comment"
+        assert exported["new_target_flow"] is True, "Expected new_target_flow in export"
+        assert "function_name" not in exported, "Expected function_name to be removed"
+
+    def test_export_with_metadata(self):
+        """Test export with flowmapper_metadata enabled."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.close,
+            new_target_flow=False,
+        )
+
+        exported = match.export(flowmapper_metadata=True)
+
+        assert (
+            "flowmapper_metadata" in exported
+        ), "Expected flowmapper_metadata in export"
+        assert exported["flowmapper_metadata"]["function_name"] == "test_function"
+        assert "version" in exported["flowmapper_metadata"]
+        assert (
+            exported["new_target_flow"] is False
+        ), "Expected new_target_flow in export"
+
+    def test_export_with_new_target_flow(self):
+        """Test export includes new_target_flow attribute."""
+        source_flow = Flow.from_dict(
+            {"name": "Water", "context": "water", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict(
+            {"name": "Water", "context": "water", "unit": "kg"}
+        )
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.related,
+            new_target_flow=True,
+            comment="New target flow",
+        )
+
+        exported = match.export()
+
+        assert (
+            exported["new_target_flow"] is True
+        ), "Expected new_target_flow to be True in export"
+        assert (
+            exported["comment"] == "New target flow"
+        ), "Expected comment to be preserved"
+
+
+class TestMatchComparison:
+    """Test Match comparison methods."""
+
+    def test_match_less_than_comparison(self):
+        """Test Match __lt__ method for sorting."""
+        source1 = Flow.from_dict({"name": "A", "context": "air", "unit": "kg"})
+        target1 = Flow.from_dict({"name": "B", "context": "air", "unit": "kg"})
+        source2 = Flow.from_dict({"name": "C", "context": "air", "unit": "kg"})
+        target2 = Flow.from_dict({"name": "D", "context": "air", "unit": "kg"})
+
+        match1 = Match(
+            source=source1,
+            target=target1,
+            function_name="test",
+            condition=MatchCondition.exact,
+            new_target_flow=True,
+        )
+        match2 = Match(
+            source=source2,
+            target=target2,
+            function_name="test",
+            condition=MatchCondition.exact,
+            new_target_flow=False,
+        )
+
+        assert match1 < match2, "Expected match1 to be less than match2"
+        assert not (match2 < match1), "Expected match2 not to be less than match1"
+
+    def test_match_comparison_with_same_source_different_target(self):
+        """Test Match comparison with same source but different target."""
+        source = Flow.from_dict({"name": "A", "context": "air", "unit": "kg"})
+        target1 = Flow.from_dict({"name": "B", "context": "air", "unit": "kg"})
+        target2 = Flow.from_dict({"name": "C", "context": "air", "unit": "kg"})
+
+        match1 = Match(
+            source=source,
+            target=target1,
+            function_name="test",
+            condition=MatchCondition.exact,
+            new_target_flow=True,
+        )
+        match2 = Match(
+            source=source,
+            target=target2,
+            function_name="test",
+            condition=MatchCondition.exact,
+            new_target_flow=False,
+        )
+
+        assert (
+            match1 < match2
+        ), "Expected match1 to be less than match2 based on target name"
+
+    def test_match_comparison_new_target_flow_does_not_affect_sorting(self):
+        """Test that new_target_flow does not affect comparison."""
+        source1 = Flow.from_dict({"name": "A", "context": "air", "unit": "kg"})
+        target1 = Flow.from_dict({"name": "B", "context": "air", "unit": "kg"})
+        source2 = Flow.from_dict({"name": "C", "context": "air", "unit": "kg"})
+        target2 = Flow.from_dict({"name": "D", "context": "air", "unit": "kg"})
+
+        match1 = Match(
+            source=source1,
+            target=target1,
+            function_name="test",
+            condition=MatchCondition.exact,
+            new_target_flow=True,
+        )
+        match2 = Match(
+            source=source2,
+            target=target2,
+            function_name="test",
+            condition=MatchCondition.exact,
+            new_target_flow=False,
+        )
+
+        # Comparison should be based on source/target names, not new_target_flow
+        assert (
+            match1 < match2
+        ), "Expected comparison based on names, not new_target_flow"
+
+
+class TestMatchWithComplexFlows:
+    """Test Match with complex flow data."""
+
+    def test_match_with_all_flow_fields(self):
+        """Test Match with flows containing all possible fields."""
+        source_flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide, in air",
+                "context": ["Emissions", "to air"],
+                "unit": "kg",
+                "identifier": "source-id",
+                "location": "US",
+                "cas_number": "000124-38-9",
+                "synonyms": ["CO2"],
+            }
+        )
+        target_flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": ["Emissions", "to air"],
+                "unit": "kg",
+                "identifier": "target-id",
+                "location": "CA",
+                "cas_number": "124-38-9",
+            }
+        )
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.close,
+            conversion_factor=1.5,
+            comment="Complex match",
+            new_target_flow=True,
+        )
+
+        assert match.source == source_flow
+        assert match.target == target_flow
+        assert match.new_target_flow is True
+
+        exported = match.export()
+        assert exported["new_target_flow"] is True
+        assert exported["conversion_factor"] == 1.5
+        assert exported["comment"] == "Complex match"
+
+
+class TestMatchExportEdgeCases:
+    """Test Match export edge cases."""
+
+    def test_export_excludes_private_attributes(self):
+        """Test export excludes _id and other private attributes."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict({"name": "CO2", "context": "air", "unit": "kg"})
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.exact,
+        )
+
+        exported = match.export()
+
+        # Check source and target don't have _id
+        assert "_id" not in exported["source"], "Expected _id not in exported source"
+        assert "_id" not in exported["target"], "Expected _id not in exported target"
+
+    def test_export_with_flowmapper_metadata_true(self):
+        """Test export with flowmapper_metadata=True includes version."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict({"name": "CO2", "context": "air", "unit": "kg"})
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.exact,
+        )
+
+        exported = match.export(flowmapper_metadata=True)
+
+        assert (
+            "flowmapper_metadata" in exported
+        ), "Expected flowmapper_metadata in export"
+        assert (
+            "version" in exported["flowmapper_metadata"]
+        ), "Expected version in metadata"
+        assert (
+            "function_name" in exported["flowmapper_metadata"]
+        ), "Expected function_name in metadata"
+        assert (
+            exported["flowmapper_metadata"]["function_name"] == "test_function"
+        ), "Expected function_name to match"
+
+    def test_export_with_flowmapper_metadata_false(self):
+        """Test export with flowmapper_metadata=False excludes metadata."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict({"name": "CO2", "context": "air", "unit": "kg"})
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.exact,
+        )
+
+        exported = match.export(flowmapper_metadata=False)
+
+        assert (
+            "flowmapper_metadata" not in exported
+        ), "Expected flowmapper_metadata not in export"
+
+    def test_export_serializes_userstring_objects(self):
+        """Test export serializes UserString objects in source/target."""
+        from flowmapper.fields import StringField
+
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_flow = Flow.from_dict({"name": "CO2", "context": "air", "unit": "kg"})
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.exact,
+        )
+
+        exported = match.export()
+
+        # StringField is a UserString subclass, should be serialized to string
+        assert isinstance(
+            exported["source"]["name"], str
+        ), "Expected name to be string, not UserString"
+        assert isinstance(
+            exported["target"]["name"], str
+        ), "Expected name to be string, not UserString"
+
+    def test_export_serializes_contextfield_objects(self):
+        """Test export serializes ContextField objects."""
+        source_flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": ["air", "unspecified"], "unit": "kg"}
+        )
+        target_flow = Flow.from_dict({"name": "CO2", "context": "air", "unit": "kg"})
+
+        match = Match(
+            source=source_flow,
+            target=target_flow,
+            function_name="test_function",
+            condition=MatchCondition.exact,
+        )
+
+        exported = match.export()
+
+        # ContextField should be serialized to its value
+        assert isinstance(
+            exported["source"]["context"], (str, tuple, list)
+        ), "Expected context to be serialized"
+        assert not hasattr(
+            exported["source"]["context"], "value"
+        ), "Expected context not to be ContextField object"
diff --git a/tests/unit/domain/test_match_condition.py b/tests/unit/domain/test_match_condition.py
new file mode 100644
index 0000000..cde514b
--- /dev/null
+++ b/tests/unit/domain/test_match_condition.py
@@ -0,0 +1,81 @@
+"""Unit tests for MatchCondition enum."""
+
+import pytest
+
+from flowmapper.domain.match_condition import MatchCondition
+
+
+class TestMatchConditionAsGlad:
+    """Test MatchCondition as_glad method."""
+
+    def test_exact_match_returns_equals(self):
+        """Test exact match returns '='."""
+        assert (
+            MatchCondition.exact.as_glad() == "="
+        ), "Expected exact match to return '='"
+
+    def test_close_match_returns_tilde(self):
+        """Test close match returns '~'."""
+        assert (
+            MatchCondition.close.as_glad() == "~"
+        ), "Expected close match to return '~'"
+
+    def test_related_match_returns_tilde(self):
+        """Test related match returns '~'."""
+        assert (
+            MatchCondition.related.as_glad() == "~"
+        ), "Expected related match to return '~'"
+
+    def test_narrow_match_returns_greater_than(self):
+        """Test narrow match returns '>'."""
+        assert (
+            MatchCondition.narrow.as_glad() == ">"
+        ), "Expected narrow match to return '>'"
+
+    def test_broad_match_returns_less_than(self):
+        """Test broad match returns '<'."""
+        assert (
+            MatchCondition.broad.as_glad() == "<"
+        ), "Expected broad match to return '<'"
+
+    def test_all_enum_values_have_glad_symbols(self):
+        """Test all enum values have corresponding GLAD symbols."""
+        glad_symbols = {condition.as_glad() for condition in MatchCondition}
+
+        assert "=" in glad_symbols, "Expected '=' symbol for exact match"
+        assert "~" in glad_symbols, "Expected '~' symbol for close/related match"
+        assert ">" in glad_symbols, "Expected '>' symbol for narrow match"
+        assert "<" in glad_symbols, "Expected '<' symbol for broad match"
+
+
+class TestMatchConditionEnumValues:
+    """Test MatchCondition enum values."""
+
+    def test_all_values_are_valid_skos_uris(self):
+        """Test all enum values are valid SKOS URIs."""
+        skos_base = "http://www.w3.org/2004/02/skos/core#"
+
+        for condition in MatchCondition:
+            assert condition.value.startswith(
+                skos_base
+            ), f"Expected {condition.name} to be SKOS URI"
+            assert "#" in condition.value, f"Expected {condition.value} to contain '#'"
+
+    def test_enum_can_be_used_in_comparisons(self):
+        """Test enum can be used in comparisons."""
+        assert MatchCondition.exact == MatchCondition.exact, "Expected exact == exact"
+        assert MatchCondition.exact != MatchCondition.close, "Expected exact != close"
+        assert MatchCondition.exact in [
+            MatchCondition.exact,
+            MatchCondition.close,
+        ], "Expected exact in list"
+
+    def test_enum_string_representation(self):
+        """Test enum string representation."""
+        assert (
+            str(MatchCondition.exact) == MatchCondition.exact.value
+        ), "Expected str() to return value"
+        assert (
+            repr(MatchCondition.exact)
+            == f"<MatchCondition.exact: '{MatchCondition.exact.value}'>"
+        ), "Expected repr() to show enum name and value"
diff --git a/tests/unit/domain/test_normalized_flow.py b/tests/unit/domain/test_normalized_flow.py
new file mode 100644
index 0000000..83df1b8
--- /dev/null
+++ b/tests/unit/domain/test_normalized_flow.py
@@ -0,0 +1,987 @@
+"""Unit tests for NormalizedFlow class."""
+
+from copy import copy
+
+import pytest
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.normalized_flow import NormalizedFlow
+
+
+class TestNormalizedFlowResetCurrent:
+    """Test NormalizedFlow reset_current method."""
+
+    def test_reset_current_resets_to_normalized(self):
+        """Test reset_current resets current to normalized flow."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        # Modify current
+        nf.update_current(name="Modified name")
+        assert (
+            nf.current.name.data != normalized.name.data
+        ), "Expected current to be different from normalized after update"
+
+        # Reset
+        nf.reset_current()
+        assert (
+            nf.current.name.data == normalized.name.data
+        ), f"Expected current.name to equal normalized.name after reset, but got {nf.current.name.data!r} != {normalized.name.data!r}"
+        assert (
+            nf.current.unit.data == normalized.unit.data
+        ), f"Expected current.unit to equal normalized.unit after reset, but got {nf.current.unit.data!r} != {normalized.unit.data!r}"
+        assert (
+            nf.current.context.value == normalized.context.value
+        ), f"Expected current.context to equal normalized.context after reset, but got {nf.current.context.value!r} != {normalized.context.value!r}"
+
+    def test_reset_current_creates_new_instance(self):
+        """Test reset_current creates a new Flow instance."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        # Modify current
+        nf.update_current(name="Modified name")
+        old_current_id = nf.current._id
+
+        # Reset
+        nf.reset_current()
+        assert (
+            nf.current._id != old_current_id
+        ), "Expected reset_current to create a new Flow instance with different _id"
+        assert (
+            nf.current is not normalized
+        ), "Expected reset_current to create a copy, not reference to normalized"
+
+    def test_reset_current_preserves_normalized(self):
+        """Test reset_current does not modify normalized flow."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        # Modify current multiple times
+        nf.update_current(name="First modification")
+        nf.update_current(name="Second modification")
+        nf.update_current(unit="g")
+
+        # Reset
+        nf.reset_current()
+
+        # Check normalized is unchanged
+        assert (
+            normalized.name.data == "carbon dioxide"
+        ), f"Expected normalized.name to be unchanged, but got {normalized.name.data!r}"
+        # Unit is normalized (kg -> kilogram), so check normalized value
+        assert (
+            normalized.unit.data == "kilogram"
+        ), f"Expected normalized.unit to be unchanged, but got {normalized.unit.data!r}"
+
+    def test_reset_current_with_complex_flow(self):
+        """Test reset_current with flow containing all fields."""
+        data = {
+            "name": "Carbon dioxide, in air",
+            "context": ["Raw", "(unspecified)"],
+            "unit": "kg",
+            "identifier": "test-id-123",
+            "location": "US",
+            "cas_number": "000124-38-9",
+            "synonyms": ["CO2"],
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        # Modify multiple fields
+        nf.update_current(name="Modified", unit="g", location="CA")
+
+        # Reset
+        nf.reset_current()
+
+        # Verify all fields are reset
+        assert (
+            nf.current.name.data == normalized.name.data
+        ), "Expected name to be reset to normalized"
+        assert (
+            nf.current.unit.data == normalized.unit.data
+        ), "Expected unit to be reset to normalized"
+        assert (
+            nf.current.location == normalized.location
+        ), "Expected location to be reset to normalized"
+        assert (
+            nf.current.identifier == normalized.identifier
+        ), "Expected identifier to be reset to normalized"
+        assert (
+            nf.current.cas_number == normalized.cas_number
+        ), "Expected cas_number to be reset to normalized"
+
+
+class TestNormalizedFlowUpdateCurrent:
+    """Test NormalizedFlow update_current method."""
+
+    def test_update_current_with_name(self):
+        """Test update_current with name parameter."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(name="Updated name")
+        assert (
+            nf.current.name.data == "Updated name"
+        ), f"Expected current.name to be 'Updated name', but got {nf.current.name.data!r}"
+        assert (
+            nf.current.unit.data == normalized.unit.data
+        ), "Expected unit to remain unchanged"
+        assert (
+            nf.current.context.value == normalized.context.value
+        ), "Expected context to remain unchanged"
+
+    def test_update_current_with_unit(self):
+        """Test update_current with unit parameter."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(unit="g")
+        assert (
+            nf.current.unit.data == "g"
+        ), f"Expected current.unit to be 'g', but got {nf.current.unit.data!r}"
+        assert (
+            nf.current.name.data == normalized.name.data
+        ), "Expected name to remain unchanged"
+
+    def test_update_current_with_context(self):
+        """Test update_current with context parameter."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(context=["water", "unspecified"])
+        assert nf.current.context.value == [
+            "water",
+            "unspecified",
+        ], f"Expected current.context to be ['water', 'unspecified'], but got {nf.current.context.value!r}"
+
+    def test_update_current_with_multiple_fields(self):
+        """Test update_current with multiple fields."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(name="Updated name", unit="g", context="water")
+        assert nf.current.name.data == "Updated name", "Expected name to be updated"
+        assert nf.current.unit.data == "g", "Expected unit to be updated"
+        assert nf.current.context.value == "water", "Expected context to be updated"
+
+    def test_update_current_with_location(self):
+        """Test update_current with location parameter."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "location": "US",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(location="CA")
+        assert (
+            nf.current.location == "CA"
+        ), f"Expected current.location to be 'CA', but got {nf.current.location!r}"
+
+    def test_update_current_with_identifier(self):
+        """Test update_current with identifier parameter."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "original-id",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(identifier="new-id")
+        assert (
+            nf.current.identifier == "new-id"
+        ), f"Expected current.identifier to be 'new-id', but got {nf.current.identifier!r}"
+
+    def test_update_current_with_cas_number(self):
+        """Test update_current with cas_number parameter."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "cas_number": "000124-38-9",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(cas_number="000078-79-5")
+        # CAS numbers are normalized (leading zeros removed) when passed through from_string
+        assert (
+            nf.current.cas_number.data == "78-79-5"
+        ), f"Expected current.cas_number to be '78-79-5' (normalized), but got {nf.current.cas_number.data!r}"
+
+    def test_update_current_with_synonyms(self):
+        """Test update_current with synonyms parameter."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "synonyms": ["CO2"],
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(synonyms=["CO2", "carbon dioxide"])
+        assert nf.current.synonyms == [
+            "CO2",
+            "carbon dioxide",
+        ], f"Expected current.synonyms to be ['CO2', 'carbon dioxide'], but got {nf.current.synonyms!r}"
+
+    def test_update_current_creates_new_instance(self):
+        """Test update_current creates a new Flow instance."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        old_current_id = nf.current._id
+        nf.update_current(name="Updated")
+        assert (
+            nf.current._id != old_current_id
+        ), "Expected update_current to create a new Flow instance with different _id"
+
+    def test_update_current_preserves_normalized(self):
+        """Test update_current does not modify normalized flow."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(name="Updated", unit="g")
+        assert (
+            normalized.name.data == "carbon dioxide"
+        ), "Expected normalized.name to be unchanged"
+        # Unit is normalized (kg -> kilogram), so check normalized value
+        assert (
+            normalized.unit.data == "kilogram"
+        ), f"Expected normalized.unit to be unchanged, but got {normalized.unit.data!r}"
+
+    def test_update_current_based_on_normalized(self):
+        """Test update_current uses normalized as base, not current."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        # First update
+        nf.update_current(name="First update")
+        assert nf.current.name.data == "First update", "Expected first update to work"
+
+        # Second update - should be based on normalized, not "First update"
+        nf.update_current(unit="g")
+        assert (
+            nf.current.name.data == normalized.name.data
+        ), "Expected name to revert to normalized value when not specified in update"
+        assert nf.current.unit.data == "g", "Expected unit to be updated"
+
+    def test_update_current_with_empty_synonyms(self):
+        """Test update_current with empty synonyms list."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "synonyms": ["CO2"],
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(synonyms=[])
+        assert (
+            nf.current.synonyms == []
+        ), f"Expected current.synonyms to be empty list, but got {nf.current.synonyms!r}"
+
+    def test_update_current_with_none_location(self):
+        """Test update_current with None location."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "location": "US",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        nf.update_current(location=None)
+        assert (
+            nf.current.location is None
+        ), f"Expected current.location to be None, but got {nf.current.location!r}"
+
+    def test_update_current_with_oxidation_state(self):
+        """Test update_current with oxidation_state parameter."""
+        data = {
+            "name": "Iron(II) oxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        # Note: oxidation_state is extracted from name during normalization
+        # This test verifies we can update it if needed
+        from flowmapper.fields import OxidationState
+
+        nf.update_current(oxidation_state=3)
+        assert (
+            nf.current.oxidation_state.value == 3
+        ), f"Expected current.oxidation_state to be 3, but got {nf.current.oxidation_state.value if nf.current.oxidation_state else None!r}"
+
+
+class TestNormalizedFlowRepr:
+    """Test NormalizedFlow __repr__ method."""
+
+    def test_repr_basic_normalized_flow(self):
+        """Test NormalizedFlow __repr__ with basic flow."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        result = repr(nf)
+        assert "NormalizedFlow(" in result, "Expected 'NormalizedFlow(' in repr"
+        assert "original=" in result, "Expected 'original=' in repr"
+        assert "current=" in result, "Expected 'current=' in repr"
+        assert "matched=" in result, "Expected 'matched=' in repr"
+
+    def test_repr_shows_original_and_current(self):
+        """Test NormalizedFlow __repr__ shows both original and current flows."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        result = repr(nf)
+        # Check that original Flow repr is included
+        assert "Flow(" in result, "Expected 'Flow(' in repr (from original or current)"
+        # Check that both original and current are represented
+        assert result.count("Flow(") >= 2, "Expected at least 2 Flow() representations"
+
+    def test_repr_with_matched_true(self):
+        """Test NormalizedFlow __repr__ with matched=True."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original,
+            normalized=normalized,
+            current=copy(normalized),
+            matched=True,
+        )
+
+        result = repr(nf)
+        assert "matched=True" in result, "Expected 'matched=True' in repr"
+
+    def test_repr_with_matched_false(self):
+        """Test NormalizedFlow __repr__ with matched=False."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original,
+            normalized=normalized,
+            current=copy(normalized),
+            matched=False,
+        )
+
+        result = repr(nf)
+        assert "matched=False" in result, "Expected 'matched=False' in repr"
+
+    def test_repr_with_modified_current(self):
+        """Test NormalizedFlow __repr__ shows modified current flow."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        # Modify current
+        nf.update_current(name="Modified name")
+
+        result = repr(nf)
+        assert (
+            "Modified name" in result or "modified name" in result
+        ), "Expected modified name in repr"
+        # Original should still be in repr
+        assert (
+            "Carbon dioxide" in result or "carbon dioxide" in result
+        ), "Expected original name in repr"
+
+    def test_repr_with_all_fields(self):
+        """Test NormalizedFlow __repr__ with flows containing all fields."""
+        data = {
+            "name": "Carbon dioxide, in air",
+            "context": ["Raw", "(unspecified)"],
+            "unit": "kg",
+            "identifier": "test-id-123",
+            "location": "US",
+            "cas_number": "000124-38-9",
+            "synonyms": ["CO2"],
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        result = repr(nf)
+        # Should include information from both original and current
+        assert "original=" in result, "Expected 'original=' in repr"
+        assert "current=" in result, "Expected 'current=' in repr"
+        # The Flow reprs should include their fields
+        assert (
+            "identifier=" in result or "test-id-123" in result
+        ), "Expected identifier in repr"
+
+    def test_repr_multiline_format(self):
+        """Test NormalizedFlow __repr__ uses multiline format."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        result = repr(nf)
+        # Should be multiline (contains newlines)
+        assert "\n" in result, "Expected multiline repr format"
+        assert result.count("\n") >= 2, "Expected at least 2 newlines in repr"
+
+    def test_repr_original_and_current_different(self):
+        """Test NormalizedFlow __repr__ when original and current differ."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        original = Flow.from_dict(data)
+        normalized = original.normalize()
+        nf = NormalizedFlow(
+            original=original, normalized=normalized, current=copy(normalized)
+        )
+
+        # Modify current significantly
+        nf.update_current(name="Water", unit="g", location="US")
+
+        result = repr(nf)
+        # Both should be represented
+        assert "original=" in result, "Expected 'original=' in repr"
+        assert "current=" in result, "Expected 'current=' in repr"
+        # Original name should be present
+        assert (
+            "Carbon dioxide" in result or "carbon dioxide" in result
+        ), "Expected original name in repr"
+        # Modified name should be present
+        assert "Water" in result or "water" in result, "Expected modified name in repr"
+
+
+class TestNormalizedFlowFromDict:
+    """Test NormalizedFlow from_dict static method."""
+
+    def test_from_dict_creates_normalized_flow(self):
+        """Test from_dict creates NormalizedFlow from dictionary."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        nf = NormalizedFlow.from_dict(data)
+
+        assert isinstance(nf, NormalizedFlow), "Expected NormalizedFlow instance"
+        assert nf.original.name.data == "Carbon dioxide", "Expected original name"
+        assert nf.normalized.name.data == "carbon dioxide", "Expected normalized name"
+        assert nf.current.name.data == "carbon dioxide", "Expected current name"
+
+    def test_from_dict_sets_original_correctly(self):
+        """Test from_dict sets original flow correctly."""
+        data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+            "location": "US",
+        }
+        nf = NormalizedFlow.from_dict(data)
+
+        assert (
+            nf.original.name.data == "Carbon dioxide, NL"
+        ), "Expected original name preserved"
+        assert nf.original.location == "US", "Expected original location preserved"
+
+    def test_from_dict_sets_normalized_correctly(self):
+        """Test from_dict sets normalized flow correctly."""
+        data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        nf = NormalizedFlow.from_dict(data)
+
+        # Normalized should extract location from name
+        assert (
+            nf.normalized.location == "NL"
+        ), "Expected normalized location extracted from name"
+        assert (
+            nf.normalized.name.data == "carbon dioxide"
+        ), "Expected normalized name without location"
+
+    def test_from_dict_sets_current_as_copy_of_normalized(self):
+        """Test from_dict sets current as copy of normalized."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        nf = NormalizedFlow.from_dict(data)
+
+        assert (
+            nf.current.name.data == nf.normalized.name.data
+        ), "Expected current equals normalized"
+        assert (
+            nf.current is not nf.normalized
+        ), "Expected current is a copy, not same object"
+
+
+class TestNormalizedFlowUnitCompatible:
+    """Test NormalizedFlow unit_compatible method."""
+
+    def test_unit_compatible_same_units(self):
+        """Test unit_compatible with same units."""
+        data1 = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        data2 = {"name": "Methane", "context": "air", "unit": "kg"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        assert nf1.unit_compatible(nf2) is True, "Expected same units to be compatible"
+
+    def test_unit_compatible_different_compatible_units(self):
+        """Test unit_compatible with different but compatible units."""
+        data1 = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        data2 = {"name": "Methane", "context": "air", "unit": "g"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        assert nf1.unit_compatible(nf2) is True, "Expected kg and g to be compatible"
+
+    def test_unit_compatible_incompatible_units(self):
+        """Test unit_compatible with incompatible units."""
+        data1 = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        data2 = {"name": "Water", "context": "water", "unit": "m3"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        assert (
+            nf1.unit_compatible(nf2) is False
+        ), "Expected kg and m3 to be incompatible"
+
+
+class TestNormalizedFlowConversionFactor:
+    """Test NormalizedFlow conversion_factor method."""
+
+    def test_conversion_factor_same_units(self):
+        """Test conversion_factor for same units (should be 1.0)."""
+        data1 = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        data2 = {"name": "Methane", "context": "air", "unit": "kg"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        result = nf1.conversion_factor(nf2)
+        assert result == 1.0, f"Expected conversion_factor to be 1.0, but got {result}"
+
+    def test_conversion_factor_compatible_units(self):
+        """Test conversion_factor for compatible units."""
+        data1 = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        data2 = {"name": "Methane", "context": "air", "unit": "g"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        result = nf1.conversion_factor(nf2)
+        assert (
+            result == 1000.0
+        ), f"Expected conversion_factor to be 1000.0 (kg to g), but got {result}"
+
+    def test_conversion_factor_reverse_direction(self):
+        """Test conversion_factor in reverse direction."""
+        data1 = {"name": "Carbon dioxide", "context": "air", "unit": "g"}
+        data2 = {"name": "Methane", "context": "air", "unit": "kg"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        result = nf1.conversion_factor(nf2)
+        assert (
+            result == 0.001
+        ), f"Expected conversion_factor to be 0.001 (g to kg), but got {result}"
+
+    def test_conversion_factor_incompatible_units(self):
+        """Test conversion_factor with incompatible units returns NaN."""
+        import math
+
+        data1 = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        data2 = {"name": "Water", "context": "water", "unit": "m3"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        result = nf1.conversion_factor(nf2)
+        assert math.isnan(
+            result
+        ), f"Expected conversion_factor to be NaN for incompatible units, but got {result}"
+
+    def test_conversion_factor_with_transformation_factor(self):
+        """Test conversion_factor multiplies transformation factor by unit conversion."""
+        data1 = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "conversion_factor": 2.5,
+        }
+        data2 = {"name": "Methane", "context": "air", "unit": "g"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        result = nf1.conversion_factor(nf2)
+        # transformation_factor (2.5) * unit_conversion (1000.0 kg to g) = 2500.0
+        assert (
+            result == 2500.0
+        ), f"Expected conversion_factor to be 2500.0 (2.5 * 1000.0), but got {result}"
+
+    def test_conversion_factor_with_transformation_factor_reverse(self):
+        """Test conversion_factor with transformation factor in reverse direction."""
+        data1 = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "g",
+            "conversion_factor": 0.5,
+        }
+        data2 = {"name": "Methane", "context": "air", "unit": "kg"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        result = nf1.conversion_factor(nf2)
+        # transformation_factor (0.5) * unit_conversion (0.001 g to kg) = 0.0005
+        assert (
+            result == 0.0005
+        ), f"Expected conversion_factor to be 0.0005 (0.5 * 0.001), but got {result}"
+
+    def test_conversion_factor_with_transformation_factor_same_units(self):
+        """Test conversion_factor with transformation factor but same units."""
+        data1 = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "conversion_factor": 3.0,
+        }
+        data2 = {"name": "Methane", "context": "air", "unit": "kg"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        result = nf1.conversion_factor(nf2)
+        # transformation_factor (3.0) * unit_conversion (1.0 same units) = 3.0
+        assert (
+            result == 3.0
+        ), f"Expected conversion_factor to be 3.0 (3.0 * 1.0), but got {result}"
+
+    def test_conversion_factor_with_none_transformation_factor(self):
+        """Test conversion_factor when transformation_factor is None (defaults to 1.0)."""
+        data1 = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        data2 = {"name": "Methane", "context": "air", "unit": "g"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        # Ensure conversion_factor is None
+        assert (
+            nf1.current.conversion_factor is None
+        ), "Expected conversion_factor to be None"
+
+        result = nf1.conversion_factor(nf2)
+        # None defaults to 1.0, so 1.0 * 1000.0 = 1000.0
+        assert (
+            result == 1000.0
+        ), f"Expected conversion_factor to be 1000.0 (1.0 * 1000.0), but got {result}"
+
+    def test_conversion_factor_with_transformation_factor_zero(self):
+        """Test conversion_factor with transformation_factor of 0.0.
+
+        Note: Due to Python's 'or' operator behavior, 0.0 is treated as falsy
+        and defaults to 1.0, so the result is 1.0 * unit_conversion.
+        """
+        data1 = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "conversion_factor": 0.0,
+        }
+        data2 = {"name": "Methane", "context": "air", "unit": "g"}
+
+        nf1 = NormalizedFlow.from_dict(data1)
+        nf2 = NormalizedFlow.from_dict(data2)
+
+        result = nf1.conversion_factor(nf2)
+        # Due to 'or 1.0', 0.0 is treated as falsy and defaults to 1.0
+        # So: 1.0 * unit_conversion (1000.0) = 1000.0
+        assert (
+            result == 1000.0
+        ), f"Expected conversion_factor to be 1000.0 (1.0 * 1000.0 due to 'or' behavior), but got {result}"
+
+
+class TestNormalizedFlowExport:
+    """Test NormalizedFlow export method."""
+
+    def test_export_exports_original_flow_data(self):
+        """Test export exports original flow data."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        nf = NormalizedFlow.from_dict(data)
+        result = nf.export()
+
+        assert result["name"] == "Carbon dioxide", "Expected original name in export"
+        assert result["unit"] == "kg", "Expected original unit in export"
+        # Context.value returns the original value (string in this case)
+        assert result["context"] == "air", "Expected original context in export"
+
+    def test_export_only_non_none_values(self):
+        """Test export only includes non-None values."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        nf = NormalizedFlow.from_dict(data)
+        result = nf.export()
+
+        assert "identifier" not in result, "Expected identifier not in export when None"
+        assert "location" not in result, "Expected location not in export when None"
+
+    def test_export_includes_location_when_present(self):
+        """Test export includes location when present."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "location": "NL",
+        }
+        nf = NormalizedFlow.from_dict(data)
+        result = nf.export()
+
+        assert "location" in result, "Expected location in export when present"
+        assert result["location"] == "NL", "Expected location value in export"
+
+    def test_export_includes_identifier_when_present(self):
+        """Test export includes identifier when present."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "test-id-123",
+        }
+        nf = NormalizedFlow.from_dict(data)
+        result = nf.export()
+
+        assert "identifier" in result, "Expected identifier in export when present"
+        assert (
+            result["identifier"] == "test-id-123"
+        ), "Expected identifier value in export"
+
+    def test_export_cas_number_correctly(self):
+        """Test CAS number is exported correctly."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "cas_number": "000124-38-9",
+        }
+        nf = NormalizedFlow.from_dict(data)
+        result = nf.export()
+
+        assert "cas_number" in result, "Expected cas_number in export when present"
+        # CAS number is exported from normalized flow
+        assert isinstance(result["cas_number"], str), "Expected cas_number to be string"
+
+
+class TestNormalizedFlowProperties:
+    """Test NormalizedFlow property accessors."""
+
+    def test_properties_return_current_flow_values(self):
+        """Test properties return correct value from current flow."""
+        data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "location": "NL",
+            "identifier": "test-id",
+        }
+        nf = NormalizedFlow.from_dict(data)
+
+        assert nf.name == "carbon dioxide", "Expected name property from current"
+        # Unit is normalized, so "kg" becomes "kilogram"
+        assert nf.unit == "kilogram", "Expected unit property from current (normalized)"
+        assert nf.context == ("air",), "Expected context property from current"
+        assert nf.location == "NL", "Expected location property from current"
+        assert nf.identifier == "test-id", "Expected identifier property from current"
+
+    def test_properties_reflect_update_current(self):
+        """Test properties reflect changes after update_current()."""
+        data = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        nf = NormalizedFlow.from_dict(data)
+
+        original_name = nf.name
+        nf.update_current(name="Modified name", unit="g")
+
+        # Name is not normalized when passed to update_current via Flow.from_dict
+        assert nf.name == "Modified name", "Expected name property to reflect update"
+        # Unit is not normalized when passed to update_current via Flow.from_dict
+        assert nf.unit == "g", "Expected unit property to reflect update"
+        assert nf.name != original_name, "Expected name to change after update"
+
+    def test_properties_reflect_reset_current(self):
+        """Test properties reflect reset after reset_current()."""
+        data = {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        nf = NormalizedFlow.from_dict(data)
+
+        normalized_name = nf.name
+        nf.update_current(name="Modified name")
+        assert nf.name != normalized_name, "Expected name to change after update"
+
+        nf.reset_current()
+        assert nf.name == normalized_name, "Expected name to reset after reset_current"
diff --git a/tests/unit/matching/test_equivalent_names.py b/tests/unit/matching/test_equivalent_names.py
new file mode 100644
index 0000000..213531d
--- /dev/null
+++ b/tests/unit/matching/test_equivalent_names.py
@@ -0,0 +1,121 @@
+"""Unit tests for equivalent_names function."""
+
+import pytest
+
+from flowmapper.matching.specialized import equivalent_names
+
+
+class TestEquivalentNames:
+    """Test equivalent_names function."""
+
+    def test_equivalent_with_in_ground_suffix(self):
+        """Test that names with ', in ground' suffix are equivalent."""
+        assert equivalent_names("Carbon dioxide, in ground", "Carbon dioxide") is True
+        assert equivalent_names("Carbon dioxide", "Carbon dioxide, in ground") is True
+
+    def test_equivalent_with_ion_suffix(self):
+        """Test that names with ', ion' suffix are equivalent."""
+        assert equivalent_names("Carbon dioxide, ion", "Carbon dioxide") is True
+        assert equivalent_names("Carbon dioxide", "Carbon dioxide, ion") is True
+
+    def test_equivalent_with_in_air_suffix(self):
+        """Test that names with ', in air' suffix are equivalent."""
+        assert equivalent_names("Carbon dioxide, in air", "Carbon dioxide") is True
+        assert equivalent_names("Carbon dioxide", "Carbon dioxide, in air") is True
+
+    def test_equivalent_with_in_water_suffix(self):
+        """Test that names with ', in water' suffix are equivalent."""
+        assert equivalent_names("Carbon dioxide, in water", "Carbon dioxide") is True
+        assert equivalent_names("Carbon dioxide", "Carbon dioxide, in water") is True
+
+    def test_equivalent_with_unspecified_origin_suffix(self):
+        """Test that names with ', unspecified origin' suffix are equivalent."""
+        assert (
+            equivalent_names("Carbon dioxide, unspecified origin", "Carbon dioxide")
+            is True
+        )
+        assert (
+            equivalent_names("Carbon dioxide", "Carbon dioxide, unspecified origin")
+            is True
+        )
+
+    def test_not_equivalent_different_suffixes(self):
+        """Test that names with different suffixes are not equivalent."""
+        assert (
+            equivalent_names("Carbon dioxide, in ground", "Carbon dioxide, in air")
+            is False
+        )
+        assert (
+            equivalent_names("Carbon dioxide, in air", "Carbon dioxide, in water")
+            is False
+        )
+
+    def test_equivalent_biogenic_and_non_fossil(self):
+        """Test that biogenic and non-fossil names are equivalent."""
+        assert equivalent_names("Methane, biogenic", "Methane, non-fossil") is True
+        assert equivalent_names("Methane, non-fossil", "Methane, biogenic") is True
+
+    def test_biogenic_non_fossil_with_matching_base(self):
+        """Test biogenic/non-fossil equivalence with matching base names."""
+        assert (
+            equivalent_names("Carbon dioxide, biogenic", "Carbon dioxide, non-fossil")
+            is True
+        )
+        assert equivalent_names("Water, biogenic", "Water, non-fossil") is True
+
+    def test_biogenic_non_fossil_with_different_base(self):
+        """Test that biogenic/non-fossil with different base names are not equivalent."""
+        assert equivalent_names("Methane, biogenic", "Ethane, non-fossil") is False
+
+    def test_not_equivalent_different_base_names(self):
+        """Test that names with different base names are not equivalent."""
+        assert equivalent_names("Carbon dioxide", "Carbon monoxide") is False
+        assert equivalent_names("Methane", "Ethane") is False
+
+    def test_not_equivalent_same_suffix_both_sides(self):
+        """Test that names with same suffix on both sides are not equivalent."""
+        # Both have the same suffix, so they're not equivalent (base names differ)
+        assert equivalent_names("Carbon dioxide, in air", "Methane, in air") is False
+
+    def test_case_sensitive_base_name(self):
+        """Test that base name comparison is case-sensitive."""
+        assert equivalent_names("Carbon dioxide, in air", "carbon dioxide") is False
+        assert equivalent_names("carbon dioxide, in air", "Carbon dioxide") is False
+
+    def test_empty_strings(self):
+        """Test that empty strings are not equivalent."""
+        assert equivalent_names("", "") is False
+        assert equivalent_names("Carbon dioxide", "") is False
+        assert equivalent_names("", "Carbon dioxide") is False
+
+    def test_suffix_only(self):
+        """Test that suffix-only strings are handled correctly."""
+        # When one string is just the suffix and the other is empty,
+        # removing the suffix from the first gives an empty string,
+        # which matches the second empty string, so they're equivalent
+        assert equivalent_names(", in air", "") is True
+        assert equivalent_names("", ", in air") is True
+
+    def test_multiple_suffixes_not_supported(self):
+        """Test that names with multiple supported suffixes are not equivalent."""
+        # Note: This tests the current behavior - names with multiple suffixes
+        # are not handled by the function
+        assert (
+            equivalent_names("Carbon dioxide, in air, ion", "Carbon dioxide") is False
+        )
+
+    def test_biogenic_with_other_suffix(self):
+        """Test that biogenic with other suffix is not equivalent to base."""
+        # "Carbon dioxide, biogenic" should not match "Carbon dioxide, in air"
+        # because biogenic is only equivalent to non-fossil
+        assert (
+            equivalent_names("Carbon dioxide, biogenic", "Carbon dioxide, in air")
+            is False
+        )
+
+    def test_non_fossil_with_other_suffix(self):
+        """Test that non-fossil with other suffix is not equivalent to base."""
+        assert (
+            equivalent_names("Carbon dioxide, non-fossil", "Carbon dioxide, in air")
+            is False
+        )
diff --git a/tests/unit/matching/test_match_identical_names_target_uuid_identifier.py b/tests/unit/matching/test_match_identical_names_target_uuid_identifier.py
new file mode 100644
index 0000000..51a79a2
--- /dev/null
+++ b/tests/unit/matching/test_match_identical_names_target_uuid_identifier.py
@@ -0,0 +1,624 @@
+"""Unit tests for match_identical_names_target_uuid_identifier function."""
+
+from copy import copy
+
+import pytest
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.matching.basic import match_identical_names_target_uuid_identifier
+
+
+class TestMatchIdenticalNamesTargetUuidIdentifier:
+    """Test match_identical_names_target_uuid_identifier function."""
+
+    def test_basic_matching_with_uuid_identifier(self):
+        """Test basic matching when target has valid UUID identifier."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",  # Valid UUID
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        assert len(matches) == 1, "Expected one match"
+        assert matches[0].source == source_flow, "Expected source to match"
+        assert matches[0].target == target_flow, "Expected target to match"
+        assert (
+            matches[0].condition == MatchCondition.exact
+        ), "Expected condition to be exact"
+        assert (
+            matches[0].function_name == "match_identical_names_target_uuid_identifier"
+        ), "Expected correct function name"
+
+    def test_no_match_when_target_has_no_identifier(self):
+        """Test that no match occurs when target has no identifier."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            # No identifier
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        assert len(matches) == 0, "Expected no match when target has no identifier"
+
+    def test_no_match_when_target_identifier_not_uuid(self):
+        """Test that no match occurs when target identifier is not a UUID."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "not-a-uuid",  # Not a valid UUID
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        assert (
+            len(matches) == 0
+        ), "Expected no match when target identifier is not a UUID"
+
+    def test_no_match_when_names_differ(self):
+        """Test that no match occurs when names differ."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Methane",  # Different name
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        assert len(matches) == 0, "Expected no match when names differ"
+
+    def test_no_match_when_contexts_differ(self):
+        """Test that no match occurs when contexts differ."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "water",  # Different context
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        assert len(matches) == 0, "Expected no match when contexts differ"
+
+    def test_no_match_when_locations_differ(self):
+        """Test that no match occurs when locations differ."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Carbon dioxide, DE",  # Different location
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        assert len(matches) == 0, "Expected no match when locations differ"
+
+    def test_no_match_when_oxidation_states_differ(self):
+        """Test that no match occurs when oxidation states differ."""
+        source_data = {
+            "name": "Iron(II) oxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Iron(III) oxide",  # Different oxidation state
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        assert len(matches) == 0, "Expected no match when oxidation states differ"
+
+    def test_matches_with_custom_function_name(self):
+        """Test that custom function_name parameter is used."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf],
+            target_flows=[target_nf],
+            function_name="custom_function",
+        )
+
+        assert len(matches) == 1, "Expected one match"
+        assert (
+            matches[0].function_name == "custom_function"
+        ), "Expected custom function name"
+
+    def test_matches_with_custom_comment(self):
+        """Test that custom comment parameter is used."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf],
+            target_flows=[target_nf],
+            comment="Custom comment",
+        )
+
+        assert len(matches) == 1, "Expected one match"
+        assert matches[0].comment == "Custom comment", "Expected custom comment"
+
+    def test_matches_with_custom_match_condition(self):
+        """Test that custom match_condition parameter is used."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf],
+            target_flows=[target_nf],
+            match_condition=MatchCondition.related,
+        )
+
+        assert len(matches) == 1, "Expected one match"
+        assert (
+            matches[0].condition == MatchCondition.related
+        ), "Expected custom match condition"
+
+    def test_multiple_source_flows_same_group(self):
+        """Test matching multiple source flows in the same group."""
+        source_flows = []
+        for i in range(3):
+            source_data = {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+            }
+            source_flow = Flow.from_dict(source_data)
+            source_normalized = source_flow.normalize()
+            source_nf = NormalizedFlow(
+                original=source_flow,
+                normalized=source_normalized,
+                current=copy(source_normalized),
+            )
+            source_flows.append(source_nf)
+
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=source_flows, target_flows=[target_nf]
+        )
+
+        assert len(matches) == 3, "Expected three matches for three source flows"
+
+    def test_filters_targets_without_uuid(self):
+        """Test that only targets with UUID identifiers are considered."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # Target with UUID - should match
+        target1_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target1_flow = Flow.from_dict(target1_data)
+        target1_normalized = target1_flow.normalize()
+        target1_nf = NormalizedFlow(
+            original=target1_flow,
+            normalized=target1_normalized,
+            current=copy(target1_normalized),
+        )
+
+        # Target without identifier - should not match
+        target2_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        target2_flow = Flow.from_dict(target2_data)
+        target2_normalized = target2_flow.normalize()
+        target2_nf = NormalizedFlow(
+            original=target2_flow,
+            normalized=target2_normalized,
+            current=copy(target2_normalized),
+        )
+
+        # Target with non-UUID identifier - should not match
+        target3_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "not-a-uuid",
+        }
+        target3_flow = Flow.from_dict(target3_data)
+        target3_normalized = target3_flow.normalize()
+        target3_nf = NormalizedFlow(
+            original=target3_flow,
+            normalized=target3_normalized,
+            current=copy(target3_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf],
+            target_flows=[target1_nf, target2_nf, target3_nf],
+        )
+
+        assert len(matches) == 1, "Expected one match (only target with UUID)"
+        assert matches[0].target == target1_flow, "Expected match with UUID target"
+
+    def test_uuid_format_validation(self):
+        """Test that UUID format is strictly validated."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # Invalid UUID formats that should not match
+        invalid_identifiers = [
+            "550e8400-e29b-41d4-a716",  # Too short
+            "550e8400-e29b-41d4-a716-446655440000-extra",  # Too long
+            "550e8400e29b41d4a716446655440000",  # Missing hyphens
+            "550e8400-e29b-41d4-a716-44665544000g",  # Invalid character
+            "550E8400-E29B-41D4-A716-446655440000",  # Uppercase (should work but let's test)
+        ]
+
+        for invalid_id in invalid_identifiers:
+            target_data = {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "identifier": invalid_id,
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+
+            matches = match_identical_names_target_uuid_identifier(
+                source_flows=[source_nf], target_flows=[target_nf]
+            )
+
+            # Note: Uppercase UUIDs should actually match (regex allows A-F)
+            if invalid_id == "550E8400-E29B-41D4-A716-446655440000":
+                assert (
+                    len(matches) == 1
+                ), f"Expected match for uppercase UUID: {invalid_id}"
+            else:
+                assert (
+                    len(matches) == 0
+                ), f"Expected no match for invalid UUID format: {invalid_id}"
+
+    def test_unit_compatibility_required(self):
+        """Test that only unit-compatible flows are matched."""
+        source_data = {
+            "name": "Water",
+            "context": "water",
+            "unit": "m3",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_data = {
+            "name": "Water",
+            "context": "water",
+            "unit": "kg",  # Incompatible unit
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        # get_matches filters by unit compatibility
+        assert len(matches) == 0, "Expected no match for incompatible units"
+
+    def test_empty_source_flows(self):
+        """Test with empty source flows list."""
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+            "identifier": "550e8400-e29b-41d4-a716-446655440000",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[], target_flows=[target_nf]
+        )
+
+        assert len(matches) == 0, "Expected no matches with empty source flows"
+
+    def test_empty_target_flows(self):
+        """Test with empty target flows list."""
+        source_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        matches = match_identical_names_target_uuid_identifier(
+            source_flows=[source_nf], target_flows=[]
+        )
+
+        assert len(matches) == 0, "Expected no matches with empty target flows"
diff --git a/tests/unit/matching/test_match_names_with_suffix_removal.py b/tests/unit/matching/test_match_names_with_suffix_removal.py
new file mode 100644
index 0000000..e6dd9a1
--- /dev/null
+++ b/tests/unit/matching/test_match_names_with_suffix_removal.py
@@ -0,0 +1,413 @@
+"""Unit tests for match_names_with_suffix_removal function."""
+
+from copy import copy
+
+import pytest
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.matching.specialized import match_names_with_suffix_removal
+
+
+class TestMatchNamesWithSuffixRemoval:
+    """Test match_names_with_suffix_removal function."""
+
+    def test_matches_with_in_air_suffix(self):
+        """Test matching flows where one has ', in air' suffix."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+        assert matches[0].source == source.original
+        assert matches[0].target == target.original
+        assert matches[0].condition == MatchCondition.close
+        assert matches[0].function_name == "match_names_with_suffix_removal"
+
+    def test_matches_with_in_ground_suffix(self):
+        """Test matching flows where one has ', in ground' suffix."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Methane, in ground", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Methane", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_matches_with_ion_suffix(self):
+        """Test matching flows where one has ', ion' suffix."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, ion", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_matches_biogenic_to_non_fossil(self):
+        """Test matching biogenic to non-fossil flows."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Methane, biogenic", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Methane, non-fossil", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_requires_matching_context(self):
+        """Test that flows must have matching context."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "water",  # Different context
+                "unit": "kg",
+            }
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 0
+
+    def test_requires_matching_oxidation_state(self):
+        """Test that flows must have matching oxidation state."""
+        # Create flows with different oxidation states by using names that
+        # will be parsed differently
+        source = NormalizedFlow.from_dict(
+            {"name": "Iron(II), in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {
+                "name": "Iron(III)",  # Different oxidation state (III vs II)
+                "context": "air",
+                "unit": "kg",
+            }
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 0
+
+    def test_requires_matching_location(self):
+        """Test that flows must have matching location."""
+        source = NormalizedFlow.from_dict(
+            {
+                "name": "Carbon dioxide, in air",
+                "context": "air",
+                "unit": "kg",
+                "location": "NL",
+            }
+        )
+        target = NormalizedFlow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "location": "DE",  # Different location
+            }
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 0
+
+    def test_matches_with_matching_location(self):
+        """Test that flows with matching location are matched."""
+        source = NormalizedFlow.from_dict(
+            {
+                "name": "Carbon dioxide, in air",
+                "context": "air",
+                "unit": "kg",
+                "location": "NL",
+            }
+        )
+        target = NormalizedFlow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "location": "NL",  # Same location
+            }
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_matches_with_none_location(self):
+        """Test that flows with None location match."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_requires_unit_compatibility(self):
+        """Test that flows must be unit-compatible."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "m3",  # Incompatible unit
+            }
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 0
+
+    def test_matches_multiple_sources_same_group(self):
+        """Test matching multiple source flows in the same group."""
+        source1 = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        source2 = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source1, source2], target_flows=[target]
+        )
+
+        assert len(matches) == 2
+
+    def test_matches_multiple_targets(self):
+        """Test matching when multiple target flows match."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target1 = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target2 = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target1, target2]
+        )
+
+        # get_matches only creates matches when exactly one target remains
+        # after filtering. If multiple targets match and have the same context,
+        # no match is created (to avoid ambiguity)
+        # In this case, both targets have the same context, so no match is created
+        assert len(matches) == 0
+
+    def test_case_insensitive_name_matching(self):
+        """Test that name matching is case-insensitive."""
+        source = NormalizedFlow.from_dict(
+            {
+                "name": "Carbon Dioxide, in air",  # Mixed case
+                "context": "air",
+                "unit": "kg",
+            }
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "carbon dioxide", "context": "air", "unit": "kg"}  # Lowercase
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_custom_function_name(self):
+        """Test that custom function_name is used."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source],
+            target_flows=[target],
+            function_name="custom_match_function",
+        )
+
+        assert matches[0].function_name == "custom_match_function"
+
+    def test_custom_comment(self):
+        """Test that custom comment is used."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target], comment="Custom comment"
+        )
+
+        assert matches[0].comment == "Custom comment"
+
+    def test_custom_match_condition(self):
+        """Test that custom match_condition is used."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source],
+            target_flows=[target],
+            match_condition=MatchCondition.exact,
+        )
+
+        assert matches[0].condition == MatchCondition.exact
+
+    def test_no_match_when_names_not_equivalent(self):
+        """Test that flows with non-equivalent names don't match."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {
+                "name": "Carbon monoxide",  # Different base name
+                "context": "air",
+                "unit": "kg",
+            }
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 0
+
+    def test_matches_with_unspecified_origin_suffix(self):
+        """Test matching flows with ', unspecified origin' suffix."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Methane, unspecified origin", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Methane", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_matches_with_in_water_suffix(self):
+        """Test matching flows with ', in water' suffix."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in water", "context": "water", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "water", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_matches_reverse_direction(self):
+        """Test matching when target has suffix and source doesn't."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+
+    def test_matches_multiple_different_groups(self):
+        """Test matching multiple groups of flows."""
+        source1 = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        source2 = NormalizedFlow.from_dict(
+            {"name": "Methane, in ground", "context": "air", "unit": "kg"}
+        )
+        target1 = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target2 = NormalizedFlow.from_dict(
+            {"name": "Methane", "context": "air", "unit": "kg"}
+        )
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source1, source2], target_flows=[target1, target2]
+        )
+
+        assert len(matches) == 2
+
+    def test_marked_as_matched(self):
+        """Test that matched source flows are marked as matched."""
+        source = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide, in air", "context": "air", "unit": "kg"}
+        )
+        target = NormalizedFlow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        assert source.matched is False
+
+        matches = match_names_with_suffix_removal(
+            source_flows=[source], target_flows=[target]
+        )
+
+        assert len(matches) == 1
+        assert source.matched is True
diff --git a/tests/unit/test_add_missing_regionalized_flows.py b/tests/unit/test_add_missing_regionalized_flows.py
new file mode 100644
index 0000000..36ee3e9
--- /dev/null
+++ b/tests/unit/test_add_missing_regionalized_flows.py
@@ -0,0 +1,698 @@
+"""Unit tests for add_missing_regionalized_flows function."""
+
+from copy import copy
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.matching import add_missing_regionalized_flows
+
+
+class TestAddMissingRegionalizedFlows:
+    """Test add_missing_regionalized_flows function."""
+
+    def test_basic_functionality_with_enough_regions(self):
+        """Test basic functionality when there are enough regions in target."""
+        # Source flow with location
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        assert source_nf.location == "NL"
+        assert source_nf.name == "carbon dioxide"
+
+        # Target flows with different locations (other_regions)
+        target_flows = []
+        for location in ["DE", "FR", "US", "CA"]:
+            target_data = {
+                "name": f"Carbon dioxide, {location}",
+                "context": "air",
+                "unit": "kg",
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            assert target_nf.name == "carbon dioxide"
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        assert len(matches) == 1, "Expected one match"
+        assert matches[0].new_target_flow is True, "Expected new_target_flow to be True"
+        assert (
+            matches[0].function_name == "add_missing_regionalized_flows"
+        ), "Expected correct function name"
+        assert (
+            matches[0].condition == MatchCondition.related
+        ), "Expected condition to be related"
+        assert matches[0].source == source_flow, "Expected source to match"
+        # Target should have the source's location in the name
+        assert matches[0].target.name == "Carbon dioxide, NL"
+        # Note: location attribute is not set by copy_with_new_location, only name is updated
+
+    def test_with_other_regions_exists(self):
+        """Test that matches are created when other regionalized flows exist."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # 2 target flows with different locations (other_regions)
+        target_flows = []
+        for location in ["DE", "FR"]:
+            target_data = {
+                "name": f"Carbon dioxide, {location}",
+                "context": "air",
+                "unit": "kg",
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        assert len(matches) == 1, "Expected one match when other_regions exist"
+        assert (
+            matches[0].target.name == "Carbon dioxide, NL"
+        ), "Expected target name to have source location"
+
+    def test_with_single_other_region(self):
+        """Test with single other regionalized flow."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # 1 target flow with different location
+        target_data = {
+            "name": "Carbon dioxide, DE",
+            "context": "air",
+            "unit": "kg",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        assert len(matches) == 1, "Expected one match with single other region"
+        assert (
+            matches[0].target.name == "Carbon dioxide, NL"
+        ), "Expected target name to have source location"
+
+    def test_unit_compatibility_filtering(self):
+        """Test that only unit-compatible flows are matched."""
+        source_data = {
+            "name": "Water, NL",
+            "context": "water",
+            "unit": "m3",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # Target flows with incompatible unit
+        target_flows = []
+        for location in ["DE", "FR", "US"]:
+            target_data = {
+                "name": f"Water, {location}",
+                "context": "water",
+                "unit": "kg",  # Different unit
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        # Should have no matches if units are incompatible
+        # (assuming m3 and kg are not compatible)
+        assert isinstance(matches, list), "Expected list of matches"
+
+    def test_multiple_sources_same_group(self):
+        """Test with multiple source flows in the same group."""
+        source_flows = []
+        for i in range(3):
+            source_data = {
+                "name": "Carbon dioxide, NL",
+                "context": "air",
+                "unit": "kg",
+            }
+            source_flow = Flow.from_dict(source_data)
+            source_normalized = source_flow.normalize()
+            source_nf = NormalizedFlow(
+                original=source_flow,
+                normalized=source_normalized,
+                current=copy(source_normalized),
+            )
+            source_flows.append(source_nf)
+
+        # Target flows with different locations
+        target_flows = []
+        for location in ["DE", "FR", "US", "CA"]:
+            target_data = {
+                "name": f"Carbon dioxide, {location}",
+                "context": "air",
+                "unit": "kg",
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=source_flows, target_flows=target_flows
+        )
+
+        # Should create a match for each source flow
+        assert len(matches) == 3, "Expected three matches for three source flows"
+
+    def test_filters_out_flows_without_location(self):
+        """Test that source flows without location are filtered out."""
+        # Source flow with location
+        source_with_location = Flow.from_dict(
+            {"name": "Carbon dioxide, NL", "context": "air", "unit": "kg"}
+        )
+        source_nf_with = NormalizedFlow(
+            original=source_with_location,
+            normalized=source_with_location.normalize(),
+            current=copy(source_with_location.normalize()),
+        )
+
+        # Source flow without location
+        source_without_location = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        source_nf_without = NormalizedFlow(
+            original=source_without_location,
+            normalized=source_without_location.normalize(),
+            current=copy(source_without_location.normalize()),
+        )
+
+        # Target flows
+        target_flows = []
+        for location in ["DE", "FR", "US"]:
+            target_flow = Flow.from_dict(
+                {"name": f"Carbon dioxide, {location}", "context": "air", "unit": "kg"}
+            )
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_flow.normalize(),
+                current=copy(target_flow.normalize()),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf_with, source_nf_without],
+            target_flows=target_flows,
+        )
+
+        # Should only match the flow with location
+        assert len(matches) == 1, "Expected one match (only for flow with location)"
+        assert (
+            matches[0].source == source_with_location
+        ), "Expected match to be for flow with location"
+
+    def test_different_oxidation_states_not_matched(self):
+        """Test that flows with different oxidation states are not matched."""
+        # Source flow with oxidation state
+        source_data = {
+            "name": "Iron(II) oxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # Target flows with different oxidation state (or none)
+        target_flows = []
+        for location in ["DE", "FR", "US"]:
+            target_data = {
+                "name": "Iron(III) oxide, " + location,  # Different oxidation state
+                "context": "air",
+                "unit": "kg",
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        # Should not match if oxidation states differ
+        assert len(matches) == 0, "Expected no matches with different oxidation states"
+
+    def test_different_contexts_not_matched(self):
+        """Test that flows with different contexts are not matched."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # Target flows with different context
+        target_flows = []
+        for location in ["DE", "FR", "US"]:
+            target_data = {
+                "name": f"Carbon dioxide, {location}",
+                "context": "water",  # Different context
+                "unit": "kg",
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        assert len(matches) == 0, "Expected no matches with different contexts"
+
+    def test_different_names_not_matched(self):
+        """Test that flows with different names are not matched."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # Target flows with different name
+        target_flows = []
+        for location in ["DE", "FR", "US"]:
+            target_data = {
+                "name": f"Water, {location}",  # Different name
+                "context": "air",
+                "unit": "kg",
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        assert len(matches) == 0, "Expected no matches with different names"
+
+    def test_empty_source_flows(self):
+        """Test with empty source flows list."""
+        target_flows = []
+        for location in ["DE", "FR", "US"]:
+            target_flow = Flow.from_dict(
+                {"name": f"Carbon dioxide, {location}", "context": "air", "unit": "kg"}
+            )
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_flow.normalize(),
+                current=copy(target_flow.normalize()),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[], target_flows=target_flows
+        )
+
+        assert len(matches) == 0, "Expected no matches with empty source flows"
+
+    def test_empty_target_flows(self):
+        """Test with empty target flows list."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=[]
+        )
+
+        assert len(matches) == 0, "Expected no matches with empty target flows"
+
+    def test_conversion_factor_calculated(self):
+        """Test that conversion factor is calculated correctly."""
+        source_data = {
+            "name": "Water, NL",
+            "context": "water",
+            "unit": "m3",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # Target flows with compatible unit
+        target_flows = []
+        for location in ["DE", "FR", "US"]:
+            target_data = {
+                "name": f"Water, {location}",
+                "context": "water",
+                "unit": "m3",  # Same unit
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        if len(matches) > 0:
+            assert (
+                matches[0].conversion_factor == 1.0
+            ), "Expected conversion_factor to be calculated (1.0 for same unit)"
+
+    def test_comment_includes_location(self):
+        """Test that comment includes the location information."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_flows = []
+        for location in ["DE", "FR", "US", "CA"]:
+            target_flow = Flow.from_dict(
+                {"name": f"Carbon dioxide, {location}", "context": "air", "unit": "kg"}
+            )
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_flow.normalize(),
+                current=copy(target_flow.normalize()),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        if len(matches) > 0:
+            assert (
+                "location" in matches[0].comment.lower()
+            ), "Expected comment to mention location"
+            assert (
+                "new target flow" in matches[0].comment.lower()
+                or "added" in matches[0].comment.lower()
+            ), "Expected comment to mention new target flow"
+
+    def test_multiple_groups_processed(self):
+        """Test that multiple groups of source flows are processed."""
+        source_flows = []
+        # Group 1: Carbon dioxide, NL
+        source1 = Flow.from_dict(
+            {"name": "Carbon dioxide, NL", "context": "air", "unit": "kg"}
+        )
+        source_nf1 = NormalizedFlow(
+            original=source1,
+            normalized=source1.normalize(),
+            current=copy(source1.normalize()),
+        )
+        source_flows.append(source_nf1)
+
+        # Group 2: Water, FR
+        source2 = Flow.from_dict(
+            {"name": "Water, FR", "context": "water", "unit": "kg"}
+        )
+        source_nf2 = NormalizedFlow(
+            original=source2,
+            normalized=source2.normalize(),
+            current=copy(source2.normalize()),
+        )
+        source_flows.append(source_nf2)
+
+        # Target flows for both groups
+        target_flows = []
+        # For carbon dioxide
+        for location in ["DE", "US", "CA"]:
+            target_flow = Flow.from_dict(
+                {"name": f"Carbon dioxide, {location}", "context": "air", "unit": "kg"}
+            )
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_flow.normalize(),
+                current=copy(target_flow.normalize()),
+            )
+            target_flows.append(target_nf)
+
+        # For water
+        for location in ["DE", "US", "CA"]:
+            target_flow = Flow.from_dict(
+                {"name": f"Water, {location}", "context": "water", "unit": "kg"}
+            )
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_flow.normalize(),
+                current=copy(target_flow.normalize()),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=source_flows, target_flows=target_flows
+        )
+
+        # Should create matches for both groups
+        assert len(matches) >= 2, "Expected matches for both groups"
+
+    def test_target_without_location_not_considered(self):
+        """Test that target flows without location are not considered as other_regions."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        target_flows = []
+        # One target with location
+        target1 = Flow.from_dict(
+            {"name": "Carbon dioxide, DE", "context": "air", "unit": "kg"}
+        )
+        target_nf1 = NormalizedFlow(
+            original=target1,
+            normalized=target1.normalize(),
+            current=copy(target1.normalize()),
+        )
+        target_flows.append(target_nf1)
+
+        # One target without location (should not be counted)
+        target2 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        target_nf2 = NormalizedFlow(
+            original=target2,
+            normalized=target2.normalize(),
+            current=copy(target2.normalize()),
+        )
+        target_flows.append(target_nf2)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        # Should have matches because other_regions exists (even if only 1)
+        assert (
+            len(matches) == 1
+        ), "Expected one match when other_regions exists (even if only 1)"
+
+    def test_with_non_regionalized_target(self):
+        """Test that uses non-regionalized target when exactly one exists and no other_regions."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # One non-regionalized target (no location)
+        target_data = {
+            "name": "Carbon dioxide",
+            "context": "air",
+            "unit": "kg",
+        }
+        target_flow = Flow.from_dict(target_data)
+        target_normalized = target_flow.normalize()
+        target_nf = NormalizedFlow(
+            original=target_flow,
+            normalized=target_normalized,
+            current=copy(target_normalized),
+        )
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=[target_nf]
+        )
+
+        # Should match because exactly one non_regionalized exists
+        assert (
+            len(matches) == 1
+        ), "Expected one match when exactly one non_regionalized exists"
+        assert (
+            matches[0].target.name == "Carbon dioxide, NL"
+        ), "Expected target name to have source location"
+
+    def test_with_multiple_non_regionalized_targets(self):
+        """Test that no match when multiple non-regionalized targets exist."""
+        source_data = {
+            "name": "Carbon dioxide, NL",
+            "context": "air",
+            "unit": "kg",
+        }
+        source_flow = Flow.from_dict(source_data)
+        source_normalized = source_flow.normalize()
+        source_nf = NormalizedFlow(
+            original=source_flow,
+            normalized=source_normalized,
+            current=copy(source_normalized),
+        )
+
+        # Two non-regionalized targets (should not match)
+        target_flows = []
+        for i in range(2):
+            target_data = {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+            }
+            target_flow = Flow.from_dict(target_data)
+            target_normalized = target_flow.normalize()
+            target_nf = NormalizedFlow(
+                original=target_flow,
+                normalized=target_normalized,
+                current=copy(target_normalized),
+            )
+            target_flows.append(target_nf)
+
+        matches = add_missing_regionalized_flows(
+            source_flows=[source_nf], target_flows=target_flows
+        )
+
+        # Should not match because more than one non_regionalized exists
+        assert (
+            len(matches) == 0
+        ), "Expected no match when multiple non_regionalized exist"
diff --git a/tests/unit/test_cas.py b/tests/unit/test_cas.py
new file mode 100644
index 0000000..8d33088
--- /dev/null
+++ b/tests/unit/test_cas.py
@@ -0,0 +1,410 @@
+"""Unit tests for CASField class."""
+
+import pytest
+
+from flowmapper.fields import CASField
+
+
+class TestCASFieldInitialization:
+    """Test CASField initialization."""
+
+    def test_init_with_valid_cas_string(self):
+        """Test initialization with valid CAS string."""
+        cas = CASField("0000096-49-1")
+        assert (
+            cas.data == "0000096-49-1"
+        ), f"Expected cas.data to be '0000096-49-1', but got {cas.data!r}"
+        from collections import UserString
+
+        assert isinstance(
+            cas, UserString
+        ), f"Expected cas to be an instance of UserString, but got {type(cas)}"
+
+    def test_init_with_empty_string_raises_error(self):
+        """Test initialization with empty string raises ValueError."""
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            CASField("")
+
+    def test_init_with_none_raises_error(self):
+        """Test initialization with None raises TypeError."""
+        with pytest.raises(TypeError, match="CASField takes only `str`, but got"):
+            CASField(None)  # type: ignore[arg-type]
+
+    def test_init_with_integer_raises_error(self):
+        """Test initialization with integer raises TypeError."""
+        with pytest.raises(TypeError, match="CASField takes only `str`, but got"):
+            CASField(96491)  # type: ignore[arg-type]
+
+    def test_init_with_userstring(self):
+        """Test initialization with UserString works."""
+        from collections import UserString
+
+        us = UserString("7782-40-3")
+        # CASField converts UserString to string before regex search, so it works
+        cas = CASField(us)
+        assert (
+            cas.data == "7782-40-3"
+        ), f"Expected cas.data to be '7782-40-3', but got {cas.data!r}"
+        assert isinstance(
+            cas, CASField
+        ), f"Expected cas to be an instance of CASField, but got {type(cas)}"
+
+    def test_init_with_two_digits_in_front(self):
+        """Test initialization with whitespace."""
+        assert CASField(
+            "94-75-7"
+        ), "Initialization with two numbers in front section failed"
+
+    def test_init_with_whitespace(self):
+        """Test initialization with whitespace."""
+        cas = CASField("  7782-40-3  ")
+        assert (
+            cas.data == "  7782-40-3  "
+        ), f"Expected cas.data to preserve whitespace, but got {cas.data!r}"
+
+    def test_inherits_from_userstring(self):
+        """Test that CASField inherits from UserString."""
+        cas = CASField("7782-40-3")
+        from collections import UserString
+
+        assert isinstance(
+            cas, UserString
+        ), f"Expected cas to be an instance of UserString, but got {type(cas)}"
+        # UserString is not a subclass of str
+        assert not isinstance(
+            cas, str
+        ), f"Expected cas to not be an instance of str (UserString is not a subclass), but got {type(cas)}"
+
+    def test_init_with_casfield(self):
+        """Test initialization with another CASField object."""
+        cas1 = CASField("7782-40-3")
+        cas2 = CASField(cas1)
+        assert (
+            cas2.data == "7782-40-3"
+        ), f"Expected cas2.data to be '7782-40-3', but got {cas2.data!r}"
+        assert (
+            cas1 == cas2
+        ), f"Expected cas1 to equal cas2, but they are not equal (cas1={cas1!r}, cas2={cas2!r})"
+        assert (
+            cas1 is not cas2
+        ), "Expected cas1 and cas2 to be different instances, but they are the same instance"
+        assert isinstance(
+            cas2, CASField
+        ), f"Expected cas2 to be an instance of CASField, but got {type(cas2)}"
+
+
+class TestCASFieldDigits:
+    """Test CASField digits property."""
+
+    def test_digits_with_dashes(self):
+        """Test digits property with dashes."""
+        cas = CASField("0000096-49-1")
+        assert cas.digits == [
+            0,
+            0,
+            0,
+            0,
+            0,
+            9,
+            6,
+            4,
+            9,
+            1,
+        ], f"Expected cas.digits to be [0, 0, 0, 0, 0, 9, 6, 4, 9, 1], but got {cas.digits}"
+
+    def test_digits_without_dashes_raises_error(self):
+        """Test digits property without dashes raises ValueError."""
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            CASField("0000096491")
+
+    def test_digits_with_empty_string_raises_error(self):
+        """Test digits property with empty string raises ValueError."""
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            CASField("")
+
+
+class TestCASFieldExport:
+    """Test CASField export method."""
+
+    def test_export_with_standard_format(self):
+        """Test export with standard CAS format."""
+        cas = CASField("7782-40-3")
+        assert (
+            cas.export() == "7782-40-3"
+        ), f"Expected cas.export() to be '7782-40-3', but got {cas.export()!r}"
+
+    def test_export_without_dashes_raises_error(self):
+        """Test export without dashes raises ValueError."""
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            CASField("7782403")
+
+    def test_export_with_leading_zeros(self):
+        """Test export with leading zeros."""
+        cas = CASField("0007782-40-3")
+        # Export keeps leading zeros in the first part
+        assert (
+            cas.export() == "0007782-40-3"
+        ), f"Expected cas.export() to be '0007782-40-3', but got {cas.export()!r}"
+
+    def test_export_with_empty_string_raises_error(self):
+        """Test export with empty string raises ValueError."""
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            CASField("")
+
+    def test_export_with_single_digit_raises_error(self):
+        """Test export with single digit raises ValueError."""
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            CASField("1")
+
+
+class TestCASFieldCheckDigitExpected:
+    """Test CASField check_digit_expected property."""
+
+    def test_check_digit_expected_valid_cas(self):
+        """Test check_digit_expected with CAS number."""
+        cas = CASField("7732-18-5")
+        expected = cas.check_digit_expected
+        assert (
+            expected == 5
+        ), f"Expected check_digit_expected to be 5, but got {expected}"
+
+    def test_check_digit_expected_invalid_cas(self):
+        """Test check_digit_expected with invalid CAS number."""
+        cas = CASField("7782-40-2")
+        # Check digit is 2, but expected is 3
+        expected = cas.check_digit_expected
+        assert (
+            expected == 3
+        ), f"Expected check_digit_expected to be 3, but got {expected}"
+
+
+class TestCASFieldValid:
+    """Test CASField valid method."""
+
+    def test_valid_with_invalid_cas(self):
+        """Test valid with invalid CAS number."""
+        cas = CASField("7782-40-2")
+        assert (
+            not cas.valid()
+        ), f"Expected cas.valid() to be False, but got {cas.valid()}"
+
+    def test_valid_with_leading_zeros(self):
+        """Test valid with leading zeros."""
+        cas = CASField("0000096-49-1")
+        # Check digit calculation includes leading zeros
+        is_valid = cas.valid()
+        assert is_valid and isinstance(
+            is_valid, bool
+        ), f"Expected cas.valid() to return a bool, but got {type(is_valid)}"
+
+
+class TestCASFieldFromString:
+    """Test CASField from_string method."""
+
+    def test_from_string_with_valid_cas(self):
+        """Test from_string with valid CAS number."""
+        cas = CASField("7782-40-3")
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            cas.from_string("000009-49-1")
+
+    def test_from_string_with_whitespace(self):
+        """Test from_string with whitespace."""
+        cas = CASField("7782-40-3")
+        result = cas.from_string("  7782-40-3  ")
+        # Testing actual behavior
+        assert result is None or isinstance(
+            result, CASField
+        ), f"Expected result to be None or CASField, but got {type(result)}"
+
+    def test_from_string_with_invalid_cas(self):
+        """Test from_string with invalid CAS number."""
+        cas = CASField("7782-40-3")
+        result = cas.from_string("7782-40-2")
+        # Invalid CAS should return None
+        assert (
+            result is None
+        ), f"Expected from_string to return None for invalid CAS, but got {result}"
+
+    def test_from_string_with_empty_string(self):
+        """Test from_string with empty string."""
+        cas = CASField("7782-40-3")
+        # Empty string will fail validation in __init__
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            cas.from_string("")
+
+    def test_from_string_with_none(self):
+        """Test from_string with None."""
+        cas = CASField("7782-40-3")
+        result = cas.from_string(None)
+        assert (
+            result is None
+        ), f"Expected from_string to return None for None, but got {result}"
+
+    def test_from_string_returns_new_instance(self):
+        """Test that from_string returns a new instance when valid."""
+        cas = CASField("7782-40-3")
+        result = cas.from_string("7440-05-3")
+        if result is not None:
+            assert (
+                result is not cas
+            ), "Expected from_string() to return a new instance, but it returned the same instance"
+            assert (
+                cas.data == "7782-40-3"
+            ), f"Expected original cas.data to remain '7782-40-3', but got {cas.data!r}"
+
+
+class TestCASFieldEquality:
+    """Test CASField equality comparison."""
+
+    def test_eq_with_same_casfield(self):
+        """Test equality with same CASField instance (exact data match)."""
+        cas1 = CASField("7440-05-3")
+        cas2 = CASField("7440-05-3")
+        # CASField to CASField comparison uses exact data comparison
+        assert (
+            cas1 == cas2
+        ), f"Expected cas1 to equal cas2, but they are not equal (cas1={cas1!r}, cas2={cas2!r})"
+
+    def test_eq_with_different_casfield_data(self):
+        """Test equality with CASField having different data."""
+        cas1 = CASField("7440-05-3")
+        cas2 = CASField("7782-40-3")
+        assert (
+            cas1 != cas2
+        ), f"Expected cas1 to not equal cas2, but they are equal (cas1={cas1!r}, cas2={cas2!r})"
+
+    def test_eq_with_casfield_different_formatting(self):
+        """Test equality with CASField having same CAS but different formatting."""
+        cas1 = CASField("7440-05-3")
+        cas2 = CASField("0007440-05-3")
+        # CASField to CASField uses exact data comparison, so formatting matters
+        assert (
+            cas1 != cas2
+        ), f"Expected cas1 to not equal cas2 (different formatting), but they are equal (cas1={cas1!r}, cas2={cas2!r})"
+
+    def test_eq_with_string_exact_match(self):
+        """Test equality with string that exactly matches."""
+        cas = CASField("7440-05-3")
+        assert (
+            cas == "7440-05-3"
+        ), f"Expected cas to equal '7440-05-3', but they are not equal (cas={cas!r})"
+
+    def test_eq_with_string_different_cas(self):
+        """Test equality with string containing different CAS number."""
+        cas = CASField("7440-05-3")
+        assert (
+            cas != "7782-40-3"
+        ), f"Expected cas to not equal '7782-40-3', but they are equal (cas={cas!r})"
+
+    def test_eq_with_string_leading_zeros(self):
+        """Test equality with string containing leading zeros (should normalize)."""
+        cas = CASField("7440-05-3")
+        # String comparison uses from_string which normalizes (strips leading zeros)
+        assert (
+            cas == "0007440-05-3"
+        ), f"Expected cas to equal '0007440-05-3' (normalized), but they are not equal (cas={cas!r})"
+
+    def test_eq_with_string_whitespace(self):
+        """Test equality with string containing whitespace (should normalize)."""
+        cas = CASField("7440-05-3")
+        # String comparison uses from_string which normalizes (strips whitespace)
+        assert (
+            cas == "  7440-05-3  "
+        ), f"Expected cas to equal '  7440-05-3  ' (normalized), but they are not equal (cas={cas!r})"
+
+    def test_eq_with_string_leading_zeros_and_whitespace(self):
+        """Test equality with string containing both leading zeros and whitespace."""
+        cas = CASField("7440-05-3")
+        # String comparison normalizes both whitespace and leading zeros
+        assert (
+            cas == "  0007440-05-3  "
+        ), f"Expected cas to equal '  0007440-05-3  ' (normalized), but they are not equal (cas={cas!r})"
+
+    def test_eq_with_string_invalid_cas(self):
+        """Test equality with string containing invalid CAS number."""
+        cas = CASField("7440-05-3")
+        # Invalid CAS strings return None from from_string, so equality is False
+        assert (
+            cas != "7440-05-2"
+        ), f"Expected cas to not equal '7440-05-2' (invalid check digit), but they are equal (cas={cas!r})"
+
+    def test_eq_with_string_empty_string(self):
+        """Test equality with empty string raises ValueError."""
+        cas = CASField("7440-05-3")
+        # Empty string is invalid CAS, so from_string raises ValueError when creating CASField
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            _ = cas == ""
+
+    def test_eq_with_userstring(self):
+        """Test equality with UserString."""
+        from collections import UserString
+
+        cas = CASField("7440-05-3")
+        us = UserString("7440-05-3")
+        # UserString is handled like str in __eq__, so it should normalize
+        assert (
+            cas == us
+        ), f"Expected cas to equal UserString('7440-05-3'), but they are not equal (cas={cas!r})"
+
+    def test_eq_with_userstring_leading_zeros(self):
+        """Test equality with UserString containing leading zeros."""
+        from collections import UserString
+
+        cas = CASField("7440-05-3")
+        us = UserString("0007440-05-3")
+        # UserString should normalize like str
+        assert (
+            cas == us
+        ), f"Expected cas to equal UserString('0007440-05-3') (normalized), but they are not equal (cas={cas!r})"
+
+    def test_eq_with_other_types(self):
+        """Test equality with other types returns False."""
+        cas = CASField("7440-05-3")
+        # Non-string, non-CASField types should return False
+        assert (
+            cas != 744053
+        ), f"Expected cas to not equal integer, but they are equal (cas={cas!r})"
+        assert (
+            cas != None
+        ), f"Expected cas to not equal None, but they are equal (cas={cas!r})"
+        assert (
+            cas != []
+        ), f"Expected cas to not equal list, but they are equal (cas={cas!r})"
+
+    def test_ne_with_different_casfield(self):
+        """Test inequality with different CASField."""
+        cas1 = CASField("7440-05-3")
+        cas2 = CASField("7782-40-3")
+        assert (
+            cas1 != cas2
+        ), f"Expected cas1 to not equal cas2, but they are equal (cas1={cas1!r}, cas2={cas2!r})"
+
+    def test_ne_with_string(self):
+        """Test inequality with different string."""
+        cas = CASField("7440-05-3")
+        assert (
+            cas != "7782-40-3"
+        ), f"Expected cas to not equal '7782-40-3', but they are equal (cas={cas!r})"
+
+
+class TestCASFieldStringBehavior:
+    """Test CASField string behavior (inherited from UserString)."""
+
+    def test_string_operations(self):
+        """Test that CASField behaves like a string."""
+        cas = CASField("7782-40-3")
+        assert len(cas) == 9, f"Expected len(cas) to be 9, but got {len(cas)}"
+        assert (
+            cas.upper() == "7782-40-3"
+        ), f"Expected cas.upper() to be '7782-40-3', but got {cas.upper()!r}"
+        assert cas.startswith(
+            "778"
+        ), f"Expected cas.startswith('778') to be True, but got {cas.startswith('778')}"
+
+    def test_string_concatenation_raises_error(self):
+        """Test that CASField concatenation raises ValueError for invalid format."""
+        cas1 = CASField("7782-40-3")
+        cas2 = CASField("7440-05-3")
+        # Concatenation creates a string that doesn't match CAS format, so __init__ raises ValueError
+        with pytest.raises(ValueError, match="Given input is not valid CAS formatting"):
+            _ = cas1 + " and " + cas2
diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py
new file mode 100644
index 0000000..7d71f92
--- /dev/null
+++ b/tests/unit/test_context.py
@@ -0,0 +1,644 @@
+"""Unit tests for ContextField class."""
+
+import pytest
+
+from flowmapper.fields import ContextField
+from flowmapper.utils import MISSING_VALUES
+
+
+class TestContextFieldInitialization:
+    """Test ContextField initialization."""
+
+    def test_init_with_string(self):
+        """Test initialization with string."""
+        c = ContextField("Raw/(unspecified)")
+        assert (
+            c.value == "Raw/(unspecified)"
+        ), f"Expected c.value to be 'Raw/(unspecified)', but got {c.value!r}"
+        assert isinstance(
+            c.value, str
+        ), f"Expected c.value to be a str, but got {type(c.value)}"
+
+    def test_init_with_list(self):
+        """Test initialization with list."""
+        c = ContextField(["Raw", "(unspecified)"])
+        assert c.value == [
+            "Raw",
+            "(unspecified)",
+        ], f"Expected c.value to be ['Raw', '(unspecified)'], but got {c.value!r}"
+        assert isinstance(
+            c.value, list
+        ), f"Expected c.value to be a list, but got {type(c.value)}"
+
+    def test_init_with_tuple(self):
+        """Test initialization with tuple."""
+        c = ContextField(("Raw",))
+        assert c.value == (
+            "Raw",
+        ), f"Expected c.value to be ('Raw',), but got {c.value!r}"
+        assert isinstance(
+            c.value, tuple
+        ), f"Expected c.value to be a tuple, but got {type(c.value)}"
+
+    def test_init_with_empty_string(self):
+        """Test initialization with empty string."""
+        c = ContextField("")
+        assert c.value == "", f"Expected c.value to be '', but got {c.value!r}"
+
+    def test_init_with_empty_list(self):
+        """Test initialization with empty list."""
+        c = ContextField([])
+        assert c.value == [], f"Expected c.value to be [], but got {c.value!r}"
+
+    def test_init_with_empty_tuple(self):
+        """Test initialization with empty tuple."""
+        c = ContextField(tuple([]))
+        assert c.value == (), f"Expected c.value to be (), but got {c.value!r}"
+
+
+class TestContextFieldNormalize:
+    """Test ContextField normalize method."""
+
+    def test_normalize_with_string(self):
+        """Test normalize with string value."""
+        c = ContextField("A/B")
+        normalized = c.normalize()
+        assert normalized.value == (
+            "a",
+            "b",
+        ), f"Expected normalized.value to be ('a', 'b'), but got {normalized.value!r}"
+        assert isinstance(
+            normalized.value, tuple
+        ), f"Expected normalized.value to be a tuple, but got {type(normalized.value)}"
+        assert (
+            c.value == "A/B"
+        ), f"Expected original c.value to remain 'A/B', but got {c.value!r}"
+
+    def test_normalize_with_string_no_slash(self):
+        """Test normalize with string without slash."""
+        c = ContextField("A-B")
+        normalized = c.normalize()
+        assert normalized.value == (
+            "a-b",
+        ), f"Expected normalized.value to be ('a-b',), but got {normalized.value!r}"
+
+    def test_normalize_with_list(self):
+        """Test normalize with list value."""
+        c = ContextField(["Raw", "(unspecified)"])
+        normalized = c.normalize()
+        assert normalized.value == (
+            "raw",
+        ), f"Expected normalized.value to be ('raw',), but got {normalized.value!r}"
+
+    def test_normalize_with_only_unspecified(self):
+        """Test normalize with only unspecified value."""
+        # When the only value is unspecified, it should be kept
+        c = ContextField(["unspecified"])
+        normalized = c.normalize()
+        assert normalized.value == (
+            "unspecified",
+        ), f"Expected normalized.value to be ('unspecified',), but got {normalized.value!r}"
+
+        # Test with (unspecified) in parentheses
+        c2 = ContextField(["(unspecified)"])
+        normalized2 = c2.normalize()
+        assert normalized2.value == (
+            "(unspecified)",
+        ), f"Expected normalized.value to be ('(unspecified)',), but got {normalized2.value!r}"
+
+        # Test with string "unspecified"
+        c3 = ContextField("unspecified")
+        normalized3 = c3.normalize()
+        assert normalized3.value == (
+            "unspecified",
+        ), f"Expected normalized.value to be ('unspecified',), but got {normalized3.value!r}"
+
+        # Test with multipleunspecified in parentheses
+        c2 = ContextField(["(unspecified)", "(unspecified)"])
+        normalized2 = c2.normalize()
+        assert normalized2.value == (
+            "(unspecified)",
+        ), f"Expected normalized.value to be ('(unspecified)',), but got {normalized2.value!r}"
+
+    def test_normalize_with_tuple(self):
+        """Test normalize with tuple value."""
+        c = ContextField(("Raw",))
+        normalized = c.normalize()
+        assert normalized.value == (
+            "raw",
+        ), f"Expected normalized.value to be ('raw',), but got {normalized.value!r}"
+
+    def test_normalize_with_obj_parameter(self):
+        """Test normalize with obj parameter."""
+        c = ContextField("X/Y")
+        normalized = c.normalize("A/B")
+        assert normalized.value == (
+            "a",
+            "b",
+        ), f"Expected normalized.value to be ('a', 'b'), but got {normalized.value!r}"
+        assert (
+            c.value == "X/Y"
+        ), f"Expected original c.value to remain 'X/Y', but got {c.value!r}"
+
+    def test_normalize_lowercase(self):
+        """Test normalize converts to lowercase."""
+        c = ContextField("A-B")
+        normalized = c.normalize()
+        assert normalized.value == (
+            "a-b",
+        ), f"Expected normalized.value to be ('a-b',), but got {normalized.value!r}"
+
+    def test_normalize_strip(self):
+        """Test normalize strips whitespace."""
+        c = ContextField(" A-B\t\n")
+        normalized = c.normalize()
+        assert normalized.value == (
+            "a-b",
+        ), f"Expected normalized.value to be ('a-b',), but got {normalized.value!r}"
+
+    def test_normalize_removes_trailing_missing_values(self):
+        """Test normalize removes trailing missing values."""
+        c = ContextField(("A", "(unknown)"))
+        normalized = c.normalize()
+        assert normalized.value == (
+            "a",
+        ), f"Expected normalized.value to be ('a',), but got {normalized.value!r}"
+
+    @pytest.mark.parametrize("missing_value", MISSING_VALUES)
+    def test_normalize_removes_trailing_missing_value(self, missing_value):
+        """Test normalize removes trailing missing values."""
+        c = ContextField(("A", missing_value))
+        normalized = c.normalize()
+        assert normalized.value == (
+            "a",
+        ), f"Expected normalized.value to be ('a',) for missing value {missing_value!r}, but got {normalized.value!r}"
+
+    def test_normalize_removes_multiple_trailing_missing_values(self):
+        """Test normalize removes multiple trailing missing values."""
+        c = ContextField(("A", "(unknown)", "(unspecified)"))
+        normalized = c.normalize()
+        assert normalized.value == (
+            "a",
+        ), f"Expected normalized.value to be ('a',), but got {normalized.value!r}"
+
+    def test_normalize_does_not_remove_leading_missing_values(self):
+        """Test normalize does not remove leading missing values."""
+        c = ContextField(("(unknown)", "A"))
+        normalized = c.normalize()
+        assert normalized.value == (
+            "(unknown)",
+            "a",
+        ), f"Expected normalized.value to be ('(unknown)', 'a'), but got {normalized.value!r}"
+
+    def test_normalize_returns_new_instance(self):
+        """Test that normalize returns a new instance."""
+        c = ContextField("A/B")
+        normalized = c.normalize()
+        assert (
+            normalized is not c
+        ), "Expected normalize() to return a new instance, but it returned the same instance"
+        assert (
+            c.value == "A/B"
+        ), f"Expected original c.value to remain 'A/B', but got {c.value!r}"
+
+    def test_normalize_with_invalid_type_raises_error(self):
+        """Test normalize with invalid type raises ValueError."""
+
+        class Foo:
+            pass
+
+        c = ContextField("A/B")
+        with pytest.raises(ValueError, match="Can't understand input context"):
+            c.normalize(Foo())
+
+
+class TestContextFieldExportAsString:
+    """Test ContextField export_as_string method."""
+
+    def test_export_as_string_with_list(self):
+        """Test export_as_string with list value."""
+        c = ContextField(["A", "B"])
+        result = c.export_as_string()
+        assert (
+            result == "A✂️B"
+        ), f"Expected export_as_string() to be 'A✂️B', but got {result!r}"
+
+    def test_export_as_string_with_tuple(self):
+        """Test export_as_string with tuple value."""
+        c = ContextField(("A", "B"))
+        result = c.export_as_string()
+        assert (
+            result == "A✂️B"
+        ), f"Expected export_as_string() to be 'A✂️B', but got {result!r}"
+
+    def test_export_as_string_with_string(self):
+        """Test export_as_string with string value."""
+        c = ContextField("A/B")
+        result = c.export_as_string()
+        assert (
+            result == "A/B"
+        ), f"Expected export_as_string() to be 'A/B', but got {result!r}"
+
+    def test_export_as_string_with_custom_join_character_list(self):
+        """Test export_as_string with custom join_character for list value."""
+        c = ContextField(["A", "B"])
+        result = c.export_as_string("/")
+        assert (
+            result == "A/B"
+        ), f"Expected export_as_string('/') to be 'A/B', but got {result!r}"
+
+    def test_export_as_string_with_custom_join_character_tuple(self):
+        """Test export_as_string with custom join_character for tuple value."""
+        c = ContextField(("A", "B", "C"))
+        result = c.export_as_string("|")
+        assert (
+            result == "A|B|C"
+        ), f"Expected export_as_string('|') to be 'A|B|C', but got {result!r}"
+
+    def test_export_as_string_with_custom_join_character_dash(self):
+        """Test export_as_string with custom join_character '-'."""
+        c = ContextField(["A", "B"])
+        result = c.export_as_string("-")
+        assert (
+            result == "A-B"
+        ), f"Expected export_as_string('-') to be 'A-B', but got {result!r}"
+
+    def test_export_as_string_with_custom_join_character_string_value(self):
+        """Test export_as_string with custom join_character for string value (should not use join_character)."""
+        c = ContextField("A/B")
+        result = c.export_as_string("/")
+        # String values are returned as-is, join_character is not used
+        assert (
+            result == "A/B"
+        ), f"Expected export_as_string('/') to be 'A/B' for string value, but got {result!r}"
+
+    def test_export_as_string_with_custom_join_character_empty_string(self):
+        """Test export_as_string with custom join_character as empty string."""
+        c = ContextField(["A", "B"])
+        result = c.export_as_string("")
+        assert (
+            result == "AB"
+        ), f"Expected export_as_string('') to be 'AB', but got {result!r}"
+
+    def test_export_as_string_with_custom_join_character_space(self):
+        """Test export_as_string with custom join_character as space."""
+        c = ContextField(["A", "B", "C"])
+        result = c.export_as_string(" ")
+        assert (
+            result == "A B C"
+        ), f"Expected export_as_string(' ') to be 'A B C', but got {result!r}"
+
+
+class TestContextFieldEq:
+    """Test ContextField __eq__ method."""
+
+    def test_eq_with_same_contextfield(self):
+        """Test equality with same ContextField instance."""
+        c1 = ContextField("A/B")
+        c2 = ContextField("A/B")
+        assert (
+            c1 == c2
+        ), f"Expected c1 to equal c2, but they are not equal (c1={c1!r}, c2={c2!r})"
+
+    def test_eq_with_different_contextfield(self):
+        """Test equality with different ContextField."""
+        c1 = ContextField("A/B")
+        c2 = ContextField("X/Y")
+        assert (
+            c1 != c2
+        ), f"Expected c1 to not equal c2, but they are equal (c1={c1!r}, c2={c2!r})"
+
+    def test_eq_with_list_and_string(self):
+        """Test equality with list and string values."""
+        c1 = ContextField("A/B")
+        c2 = ContextField(["A", "B"])
+        # Different value types, so not equal
+        assert (
+            c1 != c2
+        ), f"Expected c1 to not equal c2, but they are equal (c1={c1!r}, c2={c2!r})"
+
+    def test_eq_with_string_other(self):
+        """Test equality with string other."""
+        c = ContextField("A/B")
+        # __eq__ normalizes the other value and compares
+        # "A/B" normalized is ('a', 'b'), but c.value is "A/B", so not equal
+        assert (
+            c != "A/B"
+        ), f"Expected c to not equal 'A/B', but they are equal (c={c!r})"
+
+    def test_eq_with_empty_contextfield(self):
+        """Test equality with empty ContextField."""
+        c1 = ContextField("")
+        c2 = ContextField("")
+        # Empty strings are falsy, so __eq__ goes to else branch
+        # Empty string normalizes to ('',), so c1.value ("") != normalized c2.value (('',))
+        assert (
+            c1 != c2
+        ), f"Expected c1 to not equal c2 for empty strings, but they are equal (c1={c1!r}, c2={c2!r})"
+
+    def test_eq_with_other_type(self):
+        """Test equality with non-ContextField type."""
+        c = ContextField("A/B")
+        assert c != 123, f"Expected c to not equal 123, but they are equal (c={c!r})"
+        assert c != None, f"Expected c to not equal None, but they are equal (c={c!r})"
+        assert c != [], f"Expected c to not equal [], but they are equal (c={c!r})"
+
+
+class TestContextFieldBool:
+    """Test ContextField __bool__ method."""
+
+    def test_bool_with_non_empty_string(self):
+        """Test __bool__ with non-empty string."""
+        c = ContextField("A/B")
+        assert bool(c) is True, f"Expected bool(c) to be True, but got {bool(c)}"
+
+    def test_bool_with_empty_string(self):
+        """Test __bool__ with empty string."""
+        c = ContextField("")
+        assert bool(c) is False, f"Expected bool(c) to be False, but got {bool(c)}"
+
+    def test_bool_with_non_empty_list(self):
+        """Test __bool__ with non-empty list."""
+        c = ContextField(["A", "B"])
+        assert bool(c) is True, f"Expected bool(c) to be True, but got {bool(c)}"
+
+    def test_bool_with_empty_list(self):
+        """Test __bool__ with empty list."""
+        c = ContextField([])
+        assert bool(c) is False, f"Expected bool(c) to be False, but got {bool(c)}"
+
+    def test_bool_with_non_empty_tuple(self):
+        """Test __bool__ with non-empty tuple."""
+        c = ContextField(("A",))
+        assert bool(c) is True, f"Expected bool(c) to be True, but got {bool(c)}"
+
+    def test_bool_with_empty_tuple(self):
+        """Test __bool__ with empty tuple."""
+        c = ContextField(())
+        assert bool(c) is False, f"Expected bool(c) to be False, but got {bool(c)}"
+
+
+class TestContextFieldHash:
+    """Test ContextField __hash__ method."""
+
+    def test_hash_with_string(self):
+        """Test __hash__ with string value."""
+        c = ContextField("A/B")
+        result = hash(c)
+        assert isinstance(
+            result, int
+        ), f"Expected hash(c) to be an int, but got {type(result)}"
+
+    def test_hash_with_list_raises_error(self):
+        """Test __hash__ with list value raises TypeError."""
+        c = ContextField(["A", "B"])
+        # Lists are not hashable, so hash() raises TypeError
+        with pytest.raises(TypeError):
+            _ = hash(c)
+
+    def test_hash_with_tuple(self):
+        """Test __hash__ with tuple value."""
+        c = ContextField(("A", "B"))
+        result = hash(c)
+        assert isinstance(
+            result, int
+        ), f"Expected hash(c) to be an int, but got {type(result)}"
+
+    def test_hash_same_values(self):
+        """Test __hash__ with same values."""
+        c1 = ContextField("A/B")
+        c2 = ContextField("A/B")
+        assert hash(c1) == hash(
+            c2
+        ), f"Expected hash(c1) to equal hash(c2), but got {hash(c1)} and {hash(c2)}"
+
+
+class TestContextFieldIter:
+    """Test ContextField __iter__ method."""
+
+    def test_iter_with_string(self):
+        """Test __iter__ with string value."""
+        c = ContextField("A/B")
+        result = list(c)
+        assert result == [
+            "A",
+            "/",
+            "B",
+        ], f"Expected list(c) to be ['A', '/', 'B'], but got {result!r}"
+
+    def test_iter_with_list(self):
+        """Test __iter__ with list value."""
+        c = ContextField(["A", "B"])
+        result = list(c)
+        assert result == [
+            "A",
+            "B",
+        ], f"Expected list(c) to be ['A', 'B'], but got {result!r}"
+
+    def test_iter_with_tuple(self):
+        """Test __iter__ with tuple value."""
+        c = ContextField(("A", "B"))
+        result = list(c)
+        assert result == [
+            "A",
+            "B",
+        ], f"Expected list(c) to be ['A', 'B'], but got {result!r}"
+
+
+class TestContextFieldContains:
+    """Test ContextField __contains__ method."""
+
+    def test_contains_with_string_values(self):
+        """Test __contains__ with string values."""
+        c1 = ContextField("A")
+        c2 = ContextField("A/B")
+        # c2 in c1 means c1 is more generic than c2
+        # This checks if c1.value == c2.value[:len(c1.value)]
+        # "A" == "A/B"[:1] -> "A" == "A" -> True
+        assert (
+            c2 in c1
+        ), f"Expected c2 to be in c1, but it was not (c1={c1!r}, c2={c2!r})"
+        assert (
+            c1 not in c2
+        ), f"Expected c1 to not be in c2, but it was (c1={c1!r}, c2={c2!r})"
+
+    def test_contains_with_tuple_values(self):
+        """Test __contains__ with tuple values."""
+        c1 = ContextField(("A",))
+        c2 = ContextField(("A", "B"))
+        # c2 in c1 means c1 is more generic than c2
+        # This checks if c1.value == c2.value[:len(c1.value)]
+        # ("A",) == ("A", "B")[:1] -> ("A",) == ("A",) -> True
+        assert (
+            c2 in c1
+        ), f"Expected c2 to be in c1, but it was not (c1={c1!r}, c2={c2!r})"
+        assert (
+            c1 not in c2
+        ), f"Expected c1 to not be in c2, but it was (c1={c1!r}, c2={c2!r})"
+
+    def test_contains_with_list_values(self):
+        """Test __contains__ with list values."""
+        c1 = ContextField(["A"])
+        c2 = ContextField(["A", "B"])
+        # c2 in c1 means c1 is more generic than c2
+        assert (
+            c2 in c1
+        ), f"Expected c2 to be in c1, but it was not (c1={c1!r}, c2={c2!r})"
+        assert (
+            c1 not in c2
+        ), f"Expected c1 to not be in c2, but it was (c1={c1!r}, c2={c2!r})"
+
+    def test_contains_with_non_contextfield(self):
+        """Test __contains__ with non-ContextField returns False."""
+        c = ContextField("A/B")
+        assert "A/B" not in c, f"Expected 'A/B' to not be in c, but it was (c={c!r})"
+        assert 123 not in c, f"Expected 123 to not be in c, but it was (c={c!r})"
+
+
+class TestContextFieldRepr:
+    """Test ContextField __repr__ method."""
+
+    def test_repr_with_string(self):
+        """Test __repr__ with string value."""
+        c = ContextField("A/B")
+        result = repr(c)
+        assert result == "A/B", f"Expected repr(c) to be 'A/B', but got {result!r}"
+
+    def test_repr_with_list(self):
+        """Test __repr__ with list value."""
+        c = ContextField(["A", "B"])
+        result = repr(c)
+        assert (
+            result == "['A', 'B']"
+        ), f"Expected repr(c) to be '['A', 'B']', but got {result!r}"
+
+    def test_repr_with_tuple(self):
+        """Test __repr__ with tuple value."""
+        c = ContextField(("A", "B"))
+        result = repr(c)
+        assert (
+            result == "('A', 'B')"
+        ), f"Expected repr(c) to be '('A', 'B')', but got {result!r}"
+
+
+class TestContextFieldEdgeCases:
+    """Test ContextField edge cases."""
+
+    def test_normalize_preserves_original_value(self):
+        """Test that normalize preserves original value."""
+        c = ContextField("ORIGINAL")
+        normalized = c.normalize()
+        assert (
+            c.value == "ORIGINAL"
+        ), f"Expected original c.value to remain 'ORIGINAL', but got {c.value!r}"
+        assert normalized.value == (
+            "original",
+        ), f"Expected normalized.value to be ('original',), but got {normalized.value!r}"
+
+    def test_multiple_normalize_calls(self):
+        """Test multiple normalize calls."""
+        c = ContextField("  TEST  ")
+        norm1 = c.normalize()
+        norm2 = norm1.normalize()
+        assert norm1.value == (
+            "test",
+        ), f"Expected norm1.value to be ('test',), but got {norm1.value!r}"
+        assert norm2.value == (
+            "test",
+        ), f"Expected norm2.value to be ('test',), but got {norm2.value!r}"
+
+    def test_normalize_with_mapping_parameter(self):
+        """Test normalize with mapping parameter (currently not implemented)."""
+        c = ContextField("A/B")
+        # mapping parameter is accepted but not used (TODO in code)
+        normalized = c.normalize(mapping={"A": "X"})
+        assert normalized.value == (
+            "a",
+            "b",
+        ), f"Expected normalized.value to be ('a', 'b'), but got {normalized.value!r}"
+
+
+class TestContextFieldIsResource:
+    """Test ContextField is_resource method."""
+
+    @pytest.mark.parametrize(
+        "value,expected",
+        [
+            # String values that should return True (resource categories)
+            ("resource", True),
+            ("resources", True),
+            ("natural resource", True),
+            ("natural resources", True),
+            ("land use", True),
+            ("economic", True),
+            ("social", True),
+            ("raw materials", True),
+            ("raw", True),
+            # Case insensitivity
+            ("RESOURCE", True),
+            ("Natural Resource", True),
+            # Substring matches
+            ("water resource extraction", True),
+            ("natural resource extraction", True),
+            ("economic activity", True),
+            ("social aspect", True),
+            # Slash-separated strings with resource
+            ("resource/air", True),
+            # String values that should return False
+            ("emission", False),
+            ("air", False),
+            ("water", False),
+            ("", False),
+            ("emission/air", False),
+        ],
+    )
+    def test_is_resource_with_string(self, value, expected):
+        """Test is_resource with string values."""
+        c = ContextField(value)
+        assert (
+            c.is_resource() is expected
+        ), f"Expected is_resource() to be {expected} for {value!r}, but got {c.is_resource()}"
+
+    @pytest.mark.parametrize(
+        "value,expected",
+        [
+            # List values that should return True
+            (["resource"], True),
+            (["resources"], True),
+            (["raw"], True),
+            (["land use"], True),
+            (["economic"], True),
+            (["social"], True),
+            (["raw materials"], True),
+            (["RESOURCE"], True),  # Case insensitive
+            (["emission", "resource", "air"], True),  # Multiple elements, one resource
+            # List values that should return False
+            (["emission", "air", "water"], False),
+            ([], False),
+        ],
+    )
+    def test_is_resource_with_list(self, value, expected):
+        """Test is_resource with list values."""
+        c = ContextField(value)
+        assert (
+            c.is_resource() is expected
+        ), f"Expected is_resource() to be {expected} for {value!r}, but got {c.is_resource()}"
+
+    @pytest.mark.parametrize(
+        "value,expected",
+        [
+            # Tuple values that should return True
+            (("resource",), True),
+            (("raw",), True),
+            (("emission", "resource", "air"), True),  # Multiple elements, one resource
+            # Tuple values that should return False
+            (("emission", "air"), False),
+            ((), False),
+        ],
+    )
+    def test_is_resource_with_tuple(self, value, expected):
+        """Test is_resource with tuple values."""
+        c = ContextField(value)
+        assert (
+            c.is_resource() is expected
+        ), f"Expected is_resource() to be {expected} for {value!r}, but got {c.is_resource()}"
diff --git a/tests/unit/test_flowmap.py b/tests/unit/test_flowmap.py
new file mode 100644
index 0000000..483d8be
--- /dev/null
+++ b/tests/unit/test_flowmap.py
@@ -0,0 +1,1274 @@
+"""Unit tests for Flowmap class using mocks."""
+
+from copy import copy
+from pathlib import Path
+from unittest.mock import MagicMock, Mock, patch
+
+import pandas as pd
+import pytest
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.match import Match
+from flowmapper.domain.match_condition import MatchCondition
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.flowmap import Flowmap
+
+
+class TestFlowmapInit:
+    """Test Flowmap __init__ method."""
+
+    @patch("flowmapper.flowmap.match_rules")
+    def test_init_with_default_rules(self, mock_match_rules):
+        """Test initialization with default rules."""
+        mock_rules = [Mock(), Mock()]
+        mock_match_rules.return_value = mock_rules
+
+        source_flows = [Mock(spec=NormalizedFlow)]
+        target_flows = [Mock(spec=NormalizedFlow)]
+        data_prep_funcs = [Mock()]
+
+        flowmap = Flowmap(
+            source_flows=source_flows,
+            target_flows=target_flows,
+            data_preparation_functions=data_prep_funcs,
+        )
+
+        assert flowmap.source_flows == source_flows
+        assert flowmap.target_flows == target_flows
+        assert flowmap.data_preparation_functions == data_prep_funcs
+        assert flowmap.rules == mock_rules
+        assert flowmap.matches == []
+        assert flowmap.show_progressbar is True
+        mock_match_rules.assert_called_once()
+
+    def test_init_with_custom_rules(self):
+        """Test initialization with custom rules."""
+        source_flows = [Mock(spec=NormalizedFlow)]
+        target_flows = [Mock(spec=NormalizedFlow)]
+        data_prep_funcs = [Mock()]
+        custom_rules = [Mock(), Mock()]
+
+        flowmap = Flowmap(
+            source_flows=source_flows,
+            target_flows=target_flows,
+            data_preparation_functions=data_prep_funcs,
+            rules=custom_rules,
+        )
+
+        assert flowmap.rules == custom_rules
+
+    def test_init_with_show_progressbar_false(self):
+        """Test initialization with show_progressbar=False."""
+        source_flows = [Mock(spec=NormalizedFlow)]
+        target_flows = [Mock(spec=NormalizedFlow)]
+        data_prep_funcs = [Mock()]
+
+        flowmap = Flowmap(
+            source_flows=source_flows,
+            target_flows=target_flows,
+            data_preparation_functions=data_prep_funcs,
+            show_progressbar=False,
+        )
+
+        assert flowmap.show_progressbar is False
+
+
+class TestFlowmapGenerateMatches:
+    """Test Flowmap generate_matches method."""
+
+    @patch("flowmapper.flowmap.logger")
+    @patch("flowmapper.flowmap.time")
+    def test_generate_matches_applies_rules(self, mock_time, mock_logger):
+        """Test that generate_matches applies all rules."""
+        # time() is called once per rule for start time, then again for elapsed
+        # Provide enough values: start1, end1, start2, end2
+        mock_time.side_effect = [0.0, 1.0, 1.0, 2.0]
+
+        # Create mock flows
+        source_flow1 = Mock(spec=NormalizedFlow)
+        source_flow1.matched = False
+        source_flow2 = Mock(spec=NormalizedFlow)
+        source_flow2.matched = False
+        target_flow = Mock(spec=NormalizedFlow)
+
+        # Create mock matches
+        match1 = Mock(spec=Match)
+        match1.new_target_flow = False
+        match2 = Mock(spec=Match)
+        match2.new_target_flow = False
+
+        # Create mock rules
+        rule1 = Mock()
+        rule1.__name__ = "rule1"
+        rule1.return_value = [match1]
+
+        rule2 = Mock()
+        rule2.__name__ = "rule2"
+        rule2.return_value = [match2]
+
+        flowmap = Flowmap(
+            source_flows=[source_flow1, source_flow2],
+            target_flows=[target_flow],
+            data_preparation_functions=[],
+            rules=[rule1, rule2],
+        )
+
+        flowmap.generate_matches()
+
+        # Verify rules were called with unmatched flows
+        assert rule1.called
+        assert rule2.called
+        assert len(flowmap.matches) == 2
+        assert flowmap.matches == [match1, match2]
+
+    @patch("flowmapper.flowmap.logger")
+    @patch("flowmapper.flowmap.time")
+    def test_generate_matches_filters_matched_flows(self, mock_time, mock_logger):
+        """Test that generate_matches only passes unmatched flows to rules."""
+        mock_time.side_effect = [0.0, 1.0]
+
+        source_flow1 = Mock(spec=NormalizedFlow)
+        source_flow1.matched = False
+        source_flow2 = Mock(spec=NormalizedFlow)
+        source_flow2.matched = True  # Already matched
+        target_flow = Mock(spec=NormalizedFlow)
+
+        rule = Mock()
+        rule.__name__ = "test_rule"
+        rule.return_value = []
+
+        flowmap = Flowmap(
+            source_flows=[source_flow1, source_flow2],
+            target_flows=[target_flow],
+            data_preparation_functions=[],
+            rules=[rule],
+        )
+
+        flowmap.generate_matches()
+
+        # Verify rule was called with only unmatched flow
+        rule.assert_called_once()
+        call_args = rule.call_args
+        assert len(call_args.kwargs["source_flows"]) == 1
+        assert call_args.kwargs["source_flows"][0] == source_flow1
+
+    @patch("flowmapper.flowmap.logger")
+    @patch("flowmapper.flowmap.time")
+    def test_generate_matches_adds_new_target_flows(self, mock_time, mock_logger):
+        """Test that generate_matches adds new target flows when created."""
+        mock_time.side_effect = [0.0, 1.0]
+
+        source_flow = Mock(spec=NormalizedFlow)
+        source_flow.matched = False
+        target_flow = Mock(spec=NormalizedFlow)
+        new_target_flow = Mock(spec=Flow)
+
+        match = Mock(spec=Match)
+        match.new_target_flow = True
+        match.target = new_target_flow
+
+        rule = Mock()
+        rule.__name__ = "test_rule"
+        rule.return_value = [match]
+
+        flowmap = Flowmap(
+            source_flows=[source_flow],
+            target_flows=[target_flow],
+            data_preparation_functions=[],
+            rules=[rule],
+        )
+
+        # Mock the add_new_target_flows method
+        flowmap.add_new_target_flows = Mock()
+
+        flowmap.generate_matches()
+
+        # Verify add_new_target_flows was called with new target flow
+        flowmap.add_new_target_flows.assert_called_once_with([new_target_flow])
+
+    @patch("flowmapper.flowmap.logger")
+    @patch("flowmapper.flowmap.time")
+    def test_generate_matches_logs_with_new_target_flows(self, mock_time, mock_logger):
+        """Test that generate_matches logs correctly when new target flows are created."""
+        mock_time.side_effect = [0.0, 1.0]
+
+        source_flow = Mock(spec=NormalizedFlow)
+        source_flow.matched = False
+        target_flow = Mock(spec=NormalizedFlow)
+        new_target_flow = Mock(spec=Flow)
+
+        match = Mock(spec=Match)
+        match.new_target_flow = True
+        match.target = new_target_flow
+
+        rule = Mock()
+        rule.__name__ = "test_rule"
+        rule.return_value = [match]
+
+        flowmap = Flowmap(
+            source_flows=[source_flow],
+            target_flows=[target_flow],
+            data_preparation_functions=[],
+            rules=[rule],
+        )
+        flowmap.add_new_target_flows = Mock()
+
+        flowmap.generate_matches()
+
+        # Verify logger was called with message about new target flows
+        mock_logger.info.assert_called()
+        call_args = mock_logger.info.call_args[0][0]
+        assert "new target flows" in call_args.lower()
+        assert "1" in call_args  # 1 new target flow
+
+    @patch("flowmapper.flowmap.logger")
+    @patch("flowmapper.flowmap.time")
+    def test_generate_matches_logs_without_new_target_flows(
+        self, mock_time, mock_logger
+    ):
+        """Test that generate_matches logs correctly when no new target flows."""
+        mock_time.side_effect = [0.0, 1.0]
+
+        source_flow = Mock(spec=NormalizedFlow)
+        source_flow.matched = False
+        target_flow = Mock(spec=NormalizedFlow)
+
+        match = Mock(spec=Match)
+        match.new_target_flow = False
+
+        rule = Mock()
+        rule.__name__ = "test_rule"
+        rule.return_value = [match]
+
+        flowmap = Flowmap(
+            source_flows=[source_flow],
+            target_flows=[target_flow],
+            data_preparation_functions=[],
+            rules=[rule],
+        )
+
+        flowmap.generate_matches()
+
+        # Verify logger was called without mention of new target flows
+        mock_logger.info.assert_called()
+        call_args = mock_logger.info.call_args[0][0]
+        assert "new target flows" not in call_args.lower()
+
+
+class TestFlowmapAddNewTargetFlows:
+    """Test Flowmap add_new_target_flows method."""
+
+    @patch(
+        "flowmapper.flowmap.apply_transformation_and_convert_flows_to_normalized_flows"
+    )
+    def test_add_new_target_flows_normalizes_and_adds(self, mock_apply):
+        """Test that add_new_target_flows normalizes flows and adds them."""
+        new_flow1 = Mock(spec=Flow)
+        new_flow2 = Mock(spec=Flow)
+
+        normalized_flow1 = Mock(spec=NormalizedFlow)
+        normalized_flow2 = Mock(spec=NormalizedFlow)
+        mock_apply.return_value = [normalized_flow1, normalized_flow2]
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[Mock()],
+        )
+
+        flowmap.add_new_target_flows([new_flow1, new_flow2])
+
+        # Verify flows were normalized
+        mock_apply.assert_called_once_with(
+            functions=flowmap.data_preparation_functions, flows=[new_flow1, new_flow2]
+        )
+
+        # Verify normalized flows were added
+        assert len(flowmap.target_flows) == 2
+        assert flowmap.target_flows == [normalized_flow1, normalized_flow2]
+
+
+class TestFlowmapMatchedSource:
+    """Test Flowmap matched_source method."""
+
+    def test_matched_source_returns_matched_flows(self):
+        """Test that matched_source returns only matched flows."""
+        # Create flows with IDs
+        source_flow1 = Mock(spec=NormalizedFlow)
+        source_flow1.id = 1
+        source_flow2 = Mock(spec=NormalizedFlow)
+        source_flow2.id = 2
+        source_flow3 = Mock(spec=NormalizedFlow)
+        source_flow3.id = 3
+
+        # Create matches with source flows
+        source_flow_for_match1 = Mock(spec=Flow)
+        source_flow_for_match1._id = 1
+        source_flow_for_match2 = Mock(spec=Flow)
+        source_flow_for_match2._id = 2
+
+        match1 = Mock(spec=Match)
+        match1.source = source_flow_for_match1
+        match2 = Mock(spec=Match)
+        match2.source = source_flow_for_match2
+
+        flowmap = Flowmap(
+            source_flows=[source_flow1, source_flow2, source_flow3],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1, match2]
+
+        result = flowmap.matched_source()
+
+        assert len(result) == 2
+        assert source_flow1 in result
+        assert source_flow2 in result
+        assert source_flow3 not in result
+
+    def test_matched_source_returns_empty_when_no_matches(self):
+        """Test that matched_source returns empty list when no matches."""
+        source_flow = Mock(spec=NormalizedFlow)
+        source_flow.id = 1
+
+        flowmap = Flowmap(
+            source_flows=[source_flow],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = []
+
+        result = flowmap.matched_source()
+
+        assert result == []
+
+
+class TestFlowmapUnmatchedSource:
+    """Test Flowmap unmatched_source property."""
+
+    def test_unmatched_source_returns_unmatched_flows(self):
+        """Test that unmatched_source returns only unmatched flows."""
+        # Create flows with IDs
+        source_flow1 = Mock(spec=NormalizedFlow)
+        source_flow1.id = 1
+        source_flow2 = Mock(spec=NormalizedFlow)
+        source_flow2.id = 2
+        source_flow3 = Mock(spec=NormalizedFlow)
+        source_flow3.id = 3
+
+        # Create matches for flow1 and flow2
+        source_flow_for_match1 = Mock(spec=Flow)
+        source_flow_for_match1._id = 1
+        source_flow_for_match2 = Mock(spec=Flow)
+        source_flow_for_match2._id = 2
+
+        match1 = Mock(spec=Match)
+        match1.source = source_flow_for_match1
+        match2 = Mock(spec=Match)
+        match2.source = source_flow_for_match2
+
+        flowmap = Flowmap(
+            source_flows=[source_flow1, source_flow2, source_flow3],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1, match2]
+
+        result = flowmap.unmatched_source
+
+        assert len(result) == 1
+        assert source_flow3 in result
+        assert source_flow1 not in result
+        assert source_flow2 not in result
+
+    def test_unmatched_source_returns_all_when_no_matches(self):
+        """Test that unmatched_source returns all flows when no matches."""
+        source_flow1 = Mock(spec=NormalizedFlow)
+        source_flow1.id = 1
+        source_flow2 = Mock(spec=NormalizedFlow)
+        source_flow2.id = 2
+
+        flowmap = Flowmap(
+            source_flows=[source_flow1, source_flow2],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = []
+
+        result = flowmap.unmatched_source
+
+        assert len(result) == 2
+        assert source_flow1 in result
+        assert source_flow2 in result
+
+
+class TestFlowmapMatchedSourceStatistics:
+    """Test Flowmap matched_source_statistics method."""
+
+    def test_matched_source_statistics_creates_dataframe(self):
+        """Test that matched_source_statistics returns a DataFrame."""
+        # Create flows with contexts
+        context1 = Mock()
+        context1.value = "air"
+        context2 = Mock()
+        context2.value = "water"
+
+        source_flow1 = Mock(spec=NormalizedFlow)
+        source_flow1.original = Mock()
+        source_flow1.original.context = context1
+        source_flow2 = Mock(spec=NormalizedFlow)
+        source_flow2.original = Mock()
+        source_flow2.original.context = context2
+
+        # Create matches
+        match_context1 = Mock()
+        match_context1.value = "air"
+        match_context2 = Mock()
+        match_context2.value = "air"
+
+        match1 = Mock(spec=Match)
+        match1.source = Mock()
+        match1.source.context = match_context1
+        match2 = Mock(spec=Match)
+        match2.source = Mock()
+        match2.source.context = match_context2
+
+        flowmap = Flowmap(
+            source_flows=[source_flow1, source_flow2],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1, match2]
+
+        result = flowmap.matched_source_statistics()
+
+        assert isinstance(result, pd.DataFrame)
+        assert "context" in result.columns
+        assert "matched" in result.columns
+        assert "total" in result.columns
+        assert "percent" in result.columns
+
+    def test_matched_source_statistics_calculates_percentages(self):
+        """Test that matched_source_statistics calculates correct percentages."""
+        # Create flows with contexts
+        air_context = Mock()
+        air_context.value = "air"
+        water_context = Mock()
+        water_context.value = "water"
+
+        source_flow1 = Mock(spec=NormalizedFlow)
+        source_flow1.original = Mock()
+        source_flow1.original.context = air_context
+        source_flow2 = Mock(spec=NormalizedFlow)
+        source_flow2.original = Mock()
+        source_flow2.original.context = air_context
+        source_flow3 = Mock(spec=NormalizedFlow)
+        source_flow3.original = Mock()
+        source_flow3.original.context = water_context
+
+        # Create match for one air flow
+        match_air_context = Mock()
+        match_air_context.value = "air"
+        match1 = Mock(spec=Match)
+        match1.source = Mock()
+        match1.source.context = match_air_context
+
+        flowmap = Flowmap(
+            source_flows=[source_flow1, source_flow2, source_flow3],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1]
+
+        result = flowmap.matched_source_statistics()
+
+        # Check air context: 1 matched, 2 total
+        air_row = result[result["context"] == "air"].iloc[0]
+        assert air_row["matched"] == 1
+        assert air_row["total"] == 2
+        assert air_row["percent"] == 0.5
+
+        # Check water context: 0 matched, 1 total
+        water_row = result[result["context"] == "water"].iloc[0]
+        assert water_row["matched"] == 0
+        assert water_row["total"] == 1
+        assert water_row["percent"] == 0.0
+
+    def test_matched_source_statistics_sorts_by_percent(self):
+        """Test that matched_source_statistics sorts by percentage."""
+        # Create flows with different contexts
+        air_context = Mock()
+        air_context.value = "air"
+        water_context = Mock()
+        water_context.value = "water"
+
+        source_flow1 = Mock(spec=NormalizedFlow)
+        source_flow1.original = Mock()
+        source_flow1.original.context = air_context
+        source_flow2 = Mock(spec=NormalizedFlow)
+        source_flow2.original = Mock()
+        source_flow2.original.context = water_context
+
+        # Create match for air
+        match_air_context = Mock()
+        match_air_context.value = "air"
+        match1 = Mock(spec=Match)
+        match1.source = Mock()
+        match1.source.context = match_air_context
+
+        flowmap = Flowmap(
+            source_flows=[source_flow1, source_flow2],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1]
+
+        result = flowmap.matched_source_statistics()
+
+        # Should be sorted by percent ascending
+        assert result.iloc[0]["percent"] <= result.iloc[1]["percent"]
+
+
+class TestFlowmapMatchedTargetStatistics:
+    """Test Flowmap matched_target_statistics property."""
+
+    def test_matched_target_statistics_creates_dataframe(self):
+        """Test that matched_target_statistics returns a DataFrame."""
+        # Create flows with contexts
+        air_context = Mock()
+        air_context.value = "air"
+        water_context = Mock()
+        water_context.value = "water"
+
+        target_flow1 = Mock(spec=NormalizedFlow)
+        target_flow1.original = Mock()
+        target_flow1.original.context = air_context
+        target_flow2 = Mock(spec=NormalizedFlow)
+        target_flow2.original = Mock()
+        target_flow2.original.context = water_context
+
+        # Create matches
+        match_air_context = Mock()
+        match_air_context.value = "air"
+        match1 = Mock(spec=Match)
+        match1.target = Mock()
+        match1.target.context = match_air_context
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[target_flow1, target_flow2],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1]
+
+        result = flowmap.matched_target_statistics
+
+        assert isinstance(result, pd.DataFrame)
+        assert "context" in result.columns
+        assert "matched" in result.columns
+        assert "total" in result.columns
+        assert "percent" in result.columns
+
+    def test_matched_target_statistics_calculates_percentages(self):
+        """Test that matched_target_statistics calculates correct percentages."""
+        # Create flows with contexts
+        air_context = Mock()
+        air_context.value = "air"
+        water_context = Mock()
+        water_context.value = "water"
+
+        target_flow1 = Mock(spec=NormalizedFlow)
+        target_flow1.original = Mock()
+        target_flow1.original.context = air_context
+        target_flow2 = Mock(spec=NormalizedFlow)
+        target_flow2.original = Mock()
+        target_flow2.original.context = air_context
+        target_flow3 = Mock(spec=NormalizedFlow)
+        target_flow3.original = Mock()
+        target_flow3.original.context = water_context
+
+        # Create match
+        match_air_context = Mock()
+        match_air_context.value = "air"
+        match1 = Mock(spec=Match)
+        match1.target = Mock()
+        match1.target.context = match_air_context
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[target_flow1, target_flow2, target_flow3],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1]
+
+        result = flowmap.matched_target_statistics
+
+        # Check air context: 1 matched, 2 total
+        air_row = result[result["context"] == "air"].iloc[0]
+        assert air_row["matched"] == 1
+        assert air_row["total"] == 2
+        assert air_row["percent"] == 0.5
+
+
+class TestFlowmapPrintStatistics:
+    """Test Flowmap print_statistics method."""
+
+    @patch("builtins.print")
+    def test_print_statistics_outputs_summary(self, mock_print):
+        """Test that print_statistics outputs correct summary."""
+        source_flow = Mock(spec=NormalizedFlow)
+        target_flow = Mock(spec=NormalizedFlow)
+
+        source_flow_for_match = Mock(spec=Flow)
+        source_flow_for_match._id = 1
+        target_flow_for_match = Mock(spec=Flow)
+        target_flow_for_match._id = 2
+
+        match = Mock(spec=Match)
+        match.source = source_flow_for_match
+        match.target = target_flow_for_match
+
+        flowmap = Flowmap(
+            source_flows=[source_flow],
+            target_flows=[target_flow],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match]
+
+        flowmap.print_statistics()
+
+        # Verify print was called
+        mock_print.assert_called_once()
+        output = mock_print.call_args[0][0]
+
+        assert "1 source" in output
+        assert "1 target" in output
+        assert "1 mappings" in output
+        assert "cardinalities" in output.lower()
+
+    @patch("builtins.print")
+    def test_print_statistics_handles_zero_division(self, mock_print):
+        """Test that print_statistics handles zero source flows."""
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = []
+
+        # Should not raise ZeroDivisionError
+        flowmap.print_statistics()
+        mock_print.assert_called_once()
+
+
+class TestFlowmapCardinalities:
+    """Test Flowmap cardinalities method."""
+
+    def test_cardinalities_1_to_1(self):
+        """Test cardinalities for 1:1 relationships."""
+        source_flow = Mock(spec=Flow)
+        source_flow._id = 1
+        target_flow = Mock(spec=Flow)
+        target_flow._id = 2
+
+        match = Mock(spec=Match)
+        match.source = source_flow
+        match.target = target_flow
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match]
+
+        result = flowmap.cardinalities()
+
+        assert len(result) == 1
+        assert result[0]["from"] == 1
+        assert result[0]["to"] == 2
+        assert result[0]["cardinality"] == "1:1"
+
+    def test_cardinalities_1_to_n(self):
+        """Test cardinalities for 1:N relationships."""
+        source_flow = Mock(spec=Flow)
+        source_flow._id = 1
+        target_flow1 = Mock(spec=Flow)
+        target_flow1._id = 2
+        target_flow2 = Mock(spec=Flow)
+        target_flow2._id = 3
+
+        match1 = Mock(spec=Match)
+        match1.source = source_flow
+        match1.target = target_flow1
+        match2 = Mock(spec=Match)
+        match2.source = source_flow
+        match2.target = target_flow2
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1, match2]
+
+        result = flowmap.cardinalities()
+
+        assert len(result) == 2
+        assert all(r["cardinality"] == "1:N" for r in result)
+
+    def test_cardinalities_n_to_1(self):
+        """Test cardinalities for N:1 relationships."""
+        source_flow1 = Mock(spec=Flow)
+        source_flow1._id = 1
+        source_flow2 = Mock(spec=Flow)
+        source_flow2._id = 2
+        target_flow = Mock(spec=Flow)
+        target_flow._id = 3
+
+        match1 = Mock(spec=Match)
+        match1.source = source_flow1
+        match1.target = target_flow
+        match2 = Mock(spec=Match)
+        match2.source = source_flow2
+        match2.target = target_flow
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1, match2]
+
+        result = flowmap.cardinalities()
+
+        assert len(result) == 2
+        assert all(r["cardinality"] == "N:1" for r in result)
+
+    def test_cardinalities_n_to_m(self):
+        """Test cardinalities for N:M relationships."""
+        source_flow1 = Mock(spec=Flow)
+        source_flow1._id = 1
+        source_flow2 = Mock(spec=Flow)
+        source_flow2._id = 2
+        target_flow1 = Mock(spec=Flow)
+        target_flow1._id = 3
+        target_flow2 = Mock(spec=Flow)
+        target_flow2._id = 4
+
+        match1 = Mock(spec=Match)
+        match1.source = source_flow1
+        match1.target = target_flow1
+        match2 = Mock(spec=Match)
+        match2.source = source_flow1
+        match2.target = target_flow2
+        match3 = Mock(spec=Match)
+        match3.source = source_flow2
+        match3.target = target_flow1
+        match4 = Mock(spec=Match)
+        match4.source = source_flow2
+        match4.target = target_flow2
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1, match2, match3, match4]
+
+        result = flowmap.cardinalities()
+
+        assert len(result) == 4
+        assert all(r["cardinality"] == "N:M" for r in result)
+
+    def test_cardinalities_sorted_by_from(self):
+        """Test that cardinalities are sorted by source ID."""
+        matches = []
+        for i in range(5, 0, -1):  # Reverse order
+            source_flow = Mock(spec=Flow)
+            source_flow._id = i
+            target_flow = Mock(spec=Flow)
+            target_flow._id = i + 10
+            match = Mock(spec=Match)
+            match.source = source_flow
+            match.target = target_flow
+            matches.append(match)
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = matches
+
+        result = flowmap.cardinalities()
+
+        # Verify sorted by 'from' (source ID)
+        from_ids = [r["from"] for r in result]
+        assert from_ids == sorted(from_ids)
+
+
+class TestFlowmapToRandonneur:
+    """Test Flowmap to_randonneur method."""
+
+    @patch("flowmapper.flowmap.randonneur.Datapackage")
+    def test_to_randonneur_creates_datapackage(self, mock_datapackage_class):
+        """Test that to_randonneur creates a Datapackage."""
+        mock_dp = Mock()
+        mock_datapackage_class.return_value = mock_dp
+
+        match = Mock(spec=Match)
+        match.export.return_value = {"source": "test"}
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match]
+
+        result = flowmap.to_randonneur(
+            source_id="source",
+            target_id="target",
+            contributors=[],
+            mapping_source={},
+            mapping_target={},
+        )
+
+        # Verify Datapackage was created
+        mock_datapackage_class.assert_called_once()
+        assert result == mock_dp
+
+    @patch("flowmapper.flowmap.randonneur.Datapackage")
+    def test_to_randonneur_adds_match_data(self, mock_datapackage_class):
+        """Test that to_randonneur adds match data to datapackage."""
+        mock_dp = Mock()
+        mock_datapackage_class.return_value = mock_dp
+
+        match1 = Mock(spec=Match)
+        match1.export.return_value = {"source": "test1"}
+        match2 = Mock(spec=Match)
+        match2.export.return_value = {"source": "test2"}
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match1, match2]
+
+        flowmap.to_randonneur(
+            source_id="source",
+            target_id="target",
+            contributors=[],
+            mapping_source={},
+            mapping_target={},
+        )
+
+        # Verify add_data was called with exported matches
+        mock_dp.add_data.assert_called_once()
+        call_args = mock_dp.add_data.call_args
+        assert call_args.kwargs["verb"] == "update"
+        assert len(call_args.kwargs["data"]) == 2
+
+    @patch("flowmapper.flowmap.randonneur.Datapackage")
+    def test_to_randonneur_saves_to_path(self, mock_datapackage_class):
+        """Test that to_randonneur saves to path if provided."""
+        mock_dp = Mock()
+        mock_datapackage_class.return_value = mock_dp
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = []
+
+        test_path = Path("/tmp/test.json")
+
+        flowmap.to_randonneur(
+            source_id="source",
+            target_id="target",
+            contributors=[],
+            mapping_source={},
+            mapping_target={},
+            path=test_path,
+        )
+
+        # Verify to_json was called
+        mock_dp.to_json.assert_called_once_with(test_path)
+
+    @patch("flowmapper.flowmap.randonneur.Datapackage")
+    def test_to_randonneur_uses_custom_name(self, mock_datapackage_class):
+        """Test that to_randonneur uses custom name if provided."""
+        mock_dp = Mock()
+        mock_datapackage_class.return_value = mock_dp
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = []
+
+        flowmap.to_randonneur(
+            source_id="source",
+            target_id="target",
+            contributors=[],
+            mapping_source={},
+            mapping_target={},
+            name="custom-name",
+        )
+
+        # Verify name was used
+        call_args = mock_datapackage_class.call_args
+        assert call_args.kwargs["name"] == "custom-name"
+
+    @patch("flowmapper.flowmap.randonneur.Datapackage")
+    def test_to_randonneur_defaults_name(self, mock_datapackage_class):
+        """Test that to_randonneur defaults name to source-target."""
+        mock_dp = Mock()
+        mock_datapackage_class.return_value = mock_dp
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = []
+
+        flowmap.to_randonneur(
+            source_id="source-v1",
+            target_id="target-v2",
+            contributors=[],
+            mapping_source={},
+            mapping_target={},
+        )
+
+        # Verify default name was used
+        call_args = mock_datapackage_class.call_args
+        assert call_args.kwargs["name"] == "source-v1-target-v2"
+
+
+class TestFlowmapToGlad:
+    """Test Flowmap to_glad method."""
+
+    def test_to_glad_creates_dataframe(self):
+        """Test that to_glad returns a DataFrame."""
+        # Create match with all required attributes
+        source_name = Mock()
+        source_name.__str__ = Mock(return_value="Source Flow")
+        source_context = Mock()
+        source_context.export_as_string.return_value = "air"
+        source_unit = Mock()
+        source_unit.__str__ = Mock(return_value="kg")
+
+        source_flow = Mock(spec=Flow)
+        source_flow.name = source_name
+        source_flow.identifier = "source-uuid"
+        source_flow.context = source_context
+        source_flow.unit = source_unit
+
+        target_name = Mock()
+        target_name.__str__ = Mock(return_value="Target Flow")
+        target_context = Mock()
+        target_context.export_as_string.return_value = "air"
+        target_unit = Mock()
+        target_unit.__str__ = Mock(return_value="kg")
+
+        target_flow = Mock(spec=Flow)
+        target_flow.name = target_name
+        target_flow.identifier = "target-uuid"
+        target_flow.context = target_context
+        target_flow.unit = target_unit
+
+        match_condition = Mock()
+        match_condition.as_glad.return_value = "exact"
+
+        match = Mock(spec=Match)
+        match.source = source_flow
+        match.target = target_flow
+        match.condition = match_condition
+        match.conversion_factor = 1.0
+        match.comment = "Test match"
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match]
+
+        result = flowmap.to_glad()
+
+        assert isinstance(result, pd.DataFrame)
+        assert len(result) == 1
+        assert result.iloc[0]["SourceFlowName"] == "Source Flow"
+        assert result.iloc[0]["TargetFlowName"] == "Target Flow"
+
+    def test_to_glad_includes_all_columns(self):
+        """Test that to_glad includes all required GLAD columns."""
+        source_name = Mock()
+        source_name.__str__ = Mock(return_value="Source")
+        source_context = Mock()
+        source_context.export_as_string.return_value = "air"
+        source_unit = Mock()
+        source_unit.__str__ = Mock(return_value="kg")
+
+        source_flow = Mock(spec=Flow)
+        source_flow.name = source_name
+        source_flow.identifier = "source-id"
+        source_flow.context = source_context
+        source_flow.unit = source_unit
+
+        target_name = Mock()
+        target_name.__str__ = Mock(return_value="Target")
+        target_context = Mock()
+        target_context.export_as_string.return_value = "air"
+        target_unit = Mock()
+        target_unit.__str__ = Mock(return_value="kg")
+
+        target_flow = Mock(spec=Flow)
+        target_flow.name = target_name
+        target_flow.identifier = "target-id"
+        target_flow.context = target_context
+        target_flow.unit = target_unit
+
+        match_condition = Mock()
+        match_condition.as_glad.return_value = "exact"
+
+        match = Mock(spec=Match)
+        match.source = source_flow
+        match.target = target_flow
+        match.condition = match_condition
+        match.conversion_factor = 1.0
+        match.comment = "Comment"
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match]
+
+        result = flowmap.to_glad()
+
+        expected_columns = [
+            "SourceFlowName",
+            "SourceFlowUUID",
+            "SourceFlowContext",
+            "SourceUnit",
+            "MatchCondition",
+            "ConversionFactor",
+            "TargetFlowName",
+            "TargetFlowUUID",
+            "TargetFlowContext",
+            "TargetUnit",
+            "MemoMapper",
+        ]
+        assert all(col in result.columns for col in expected_columns)
+
+    def test_to_glad_ensure_id_replaces_none_with_empty_string(self):
+        """Test that to_glad replaces None identifiers with empty string when ensure_id=True."""
+        source_name = Mock()
+        source_name.__str__ = Mock(return_value="Source")
+        source_context = Mock()
+        source_context.export_as_string.return_value = "air"
+        source_unit = Mock()
+        source_unit.__str__ = Mock(return_value="kg")
+
+        source_flow = Mock(spec=Flow)
+        source_flow.name = source_name
+        source_flow.identifier = None
+        source_flow.context = source_context
+        source_flow.unit = source_unit
+
+        target_name = Mock()
+        target_name.__str__ = Mock(return_value="Target")
+        target_context = Mock()
+        target_context.export_as_string.return_value = "air"
+        target_unit = Mock()
+        target_unit.__str__ = Mock(return_value="kg")
+
+        target_flow = Mock(spec=Flow)
+        target_flow.name = target_name
+        target_flow.identifier = None
+        target_flow.context = target_context
+        target_flow.unit = target_unit
+
+        match_condition = Mock()
+        match_condition.as_glad.return_value = "exact"
+
+        match = Mock(spec=Match)
+        match.source = source_flow
+        match.target = target_flow
+        match.condition = match_condition
+        match.conversion_factor = 1.0
+        match.comment = "Comment"
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match]
+
+        result = flowmap.to_glad(ensure_id=True)
+
+        assert result.iloc[0]["SourceFlowUUID"] == ""
+        assert result.iloc[0]["TargetFlowUUID"] == ""
+
+    def test_to_glad_ensure_id_false_keeps_none(self):
+        """Test that to_glad keeps None identifiers when ensure_id=False."""
+        source_name = Mock()
+        source_name.__str__ = Mock(return_value="Source")
+        source_context = Mock()
+        source_context.export_as_string.return_value = "air"
+        source_unit = Mock()
+        source_unit.__str__ = Mock(return_value="kg")
+
+        source_flow = Mock(spec=Flow)
+        source_flow.name = source_name
+        source_flow.identifier = None
+        source_flow.context = source_context
+        source_flow.unit = source_unit
+
+        target_name = Mock()
+        target_name.__str__ = Mock(return_value="Target")
+        target_context = Mock()
+        target_context.export_as_string.return_value = "air"
+        target_unit = Mock()
+        target_unit.__str__ = Mock(return_value="kg")
+
+        target_flow = Mock(spec=Flow)
+        target_flow.name = target_name
+        target_flow.identifier = None
+        target_flow.context = target_context
+        target_flow.unit = target_unit
+
+        match_condition = Mock()
+        match_condition.as_glad.return_value = "exact"
+
+        match = Mock(spec=Match)
+        match.source = source_flow
+        match.target = target_flow
+        match.condition = match_condition
+        match.conversion_factor = 1.0
+        match.comment = "Comment"
+
+        flowmap = Flowmap(
+            source_flows=[],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = [match]
+
+        result = flowmap.to_glad(ensure_id=False)
+
+        assert pd.isna(result.iloc[0]["SourceFlowUUID"])
+        assert pd.isna(result.iloc[0]["TargetFlowUUID"])
+
+    def test_to_glad_missing_source_includes_unmatched(self):
+        """Test that to_glad includes unmatched source flows when missing_source=True."""
+        # Create unmatched source flow
+        unmatched_name = Mock()
+        unmatched_name.__str__ = Mock(return_value="Unmatched")
+        unmatched_context = Mock()
+        unmatched_context.export_as_string.return_value = "air"
+        unmatched_unit = Mock()
+        unmatched_unit.__str__ = Mock(return_value="kg")
+
+        unmatched_original = Mock(spec=Flow)
+        unmatched_original.name = unmatched_name
+        unmatched_original.identifier = "unmatched-id"
+        unmatched_original.context = unmatched_context
+        unmatched_original.unit = unmatched_unit
+
+        unmatched_flow = Mock(spec=NormalizedFlow)
+        unmatched_flow.matched = False
+        unmatched_flow.original = unmatched_original
+
+        flowmap = Flowmap(
+            source_flows=[unmatched_flow],
+            target_flows=[],
+            data_preparation_functions=[],
+        )
+        flowmap.matches = []
+
+        result = flowmap.to_glad(missing_source=True)
+
+        assert len(result) == 1
+        assert result.iloc[0]["SourceFlowName"] == "Unmatched"
+        # Unmatched flows only have source columns, target columns will be NaN
+        # The DataFrame will have all columns but target values will be NaN
+        if "TargetFlowName" in result.columns:
+            assert pd.isna(result.iloc[0]["TargetFlowName"])
+
+    @patch("flowmapper.flowmap.Path")
+    def test_to_glad_saves_to_excel(self, mock_path_class):
+        """Test that to_glad saves to Excel when path is provided."""
+        import os
+        import tempfile
+        from pathlib import Path as RealPath
+
+        # Use a temporary file that we can actually create
+        with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
+            test_path = RealPath(tmp.name)
+
+        try:
+            mock_path_class.return_value = test_path
+
+            source_name = Mock()
+            source_name.__str__ = Mock(return_value="Source")
+            source_context = Mock()
+            source_context.export_as_string.return_value = "air"
+            source_unit = Mock()
+            source_unit.__str__ = Mock(return_value="kg")
+
+            source_flow = Mock(spec=Flow)
+            source_flow.name = source_name
+            source_flow.identifier = "source-id"
+            source_flow.context = source_context
+            source_flow.unit = source_unit
+
+            target_name = Mock()
+            target_name.__str__ = Mock(return_value="Target")
+            target_context = Mock()
+            target_context.export_as_string.return_value = "air"
+            target_unit = Mock()
+            target_unit.__str__ = Mock(return_value="kg")
+
+            target_flow = Mock(spec=Flow)
+            target_flow.name = target_name
+            target_flow.identifier = "target-id"
+            target_flow.context = target_context
+            target_flow.unit = target_unit
+
+            match_condition = Mock()
+            match_condition.as_glad.return_value = "exact"
+
+            match = Mock(spec=Match)
+            match.source = source_flow
+            match.target = target_flow
+            match.condition = match_condition
+            match.conversion_factor = 1.0
+            match.comment = "Comment"
+
+            flowmap = Flowmap(
+                source_flows=[],
+                target_flows=[],
+                data_preparation_functions=[],
+            )
+            flowmap.matches = [match]
+
+            result = flowmap.to_glad(path=test_path)
+
+            # Verify path was converted to Path
+            mock_path_class.assert_called_once_with(test_path)
+
+            # Verify file was created
+            assert test_path.exists()
+        finally:
+            # Clean up
+            if test_path.exists():
+                os.unlink(test_path)
diff --git a/tests/unit/test_oxidation_state.py b/tests/unit/test_oxidation_state.py
new file mode 100644
index 0000000..85cd5b8
--- /dev/null
+++ b/tests/unit/test_oxidation_state.py
@@ -0,0 +1,433 @@
+"""Unit tests for OxidationState class."""
+
+import pytest
+
+from flowmapper.fields import OxidationState
+
+
+class TestOxidationStateInitialization:
+    """Test OxidationState initialization."""
+
+    def test_init_with_positive_value(self):
+        """Test initialization with positive value."""
+        os = OxidationState(3)
+        assert os.value == 3, f"Expected os.value to be 3, but got {os.value}"
+
+    def test_init_with_negative_value(self):
+        """Test initialization with negative value."""
+        os = OxidationState(-2)
+        assert os.value == -2, f"Expected os.value to be -2, but got {os.value}"
+
+    def test_init_with_zero(self):
+        """Test initialization with zero."""
+        os = OxidationState(0)
+        assert os.value == 0, f"Expected os.value to be 0, but got {os.value}"
+
+    def test_init_with_boundary_values(self):
+        """Test initialization with boundary values."""
+        os_min = OxidationState(-5)
+        os_max = OxidationState(9)
+        assert (
+            os_min.value == -5
+        ), f"Expected os_min.value to be -5, but got {os_min.value}"
+        assert (
+            os_max.value == 9
+        ), f"Expected os_max.value to be 9, but got {os_max.value}"
+
+
+class TestOxidationStateEq:
+    """Test OxidationState __eq__ method."""
+
+    def test_eq_with_same_oxidation_state(self):
+        """Test equality with same OxidationState instance."""
+        os1 = OxidationState(3)
+        os2 = OxidationState(3)
+        assert (
+            os1 == os2
+        ), f"Expected os1 to equal os2, but they are not equal (os1={os1.value}, os2={os2.value})"
+
+    def test_eq_with_different_oxidation_state(self):
+        """Test equality with different OxidationState."""
+        os1 = OxidationState(3)
+        os2 = OxidationState(4)
+        assert (
+            os1 != os2
+        ), f"Expected os1 to not equal os2, but they are equal (os1={os1.value}, os2={os2.value})"
+
+    def test_eq_with_integer(self):
+        """Test equality with integer."""
+        os = OxidationState(3)
+        assert (
+            os == 3
+        ), f"Expected os to equal 3, but they are not equal (os={os.value})"
+        assert (
+            os != 4
+        ), f"Expected os to not equal 4, but they are equal (os={os.value})"
+
+    def test_eq_with_negative_integer(self):
+        """Test equality with negative integer."""
+        os = OxidationState(-2)
+        assert (
+            os == -2
+        ), f"Expected os to equal -2, but they are not equal (os={os.value})"
+        assert (
+            os != -3
+        ), f"Expected os to not equal -3, but they are equal (os={os.value})"
+
+    def test_eq_with_zero(self):
+        """Test equality with zero."""
+        os = OxidationState(0)
+        assert (
+            os == 0
+        ), f"Expected os to equal 0, but they are not equal (os={os.value})"
+        assert (
+            os != 1
+        ), f"Expected os to not equal 1, but they are equal (os={os.value})"
+
+
+class TestOxidationStateHasOxidationState:
+    """Test OxidationState has_oxidation_state static method."""
+
+    def test_has_oxidation_state_with_roman_numeral_lowercase(self):
+        """Test has_oxidation_state with lowercase roman numeral."""
+        assert OxidationState.has_oxidation_state(
+            "chromium (iii)"
+        ), "Expected has_oxidation_state('chromium (iii)') to return True, but it returned False"
+        assert OxidationState.has_oxidation_state(
+            "iron (ii)"
+        ), "Expected has_oxidation_state('iron (ii)') to return True, but it returned False"
+        assert OxidationState.has_oxidation_state(
+            "manganese (vi)"
+        ), "Expected has_oxidation_state('manganese (vi)') to return True, but it returned False"
+
+    def test_has_oxidation_state_with_roman_numeral_uppercase(self):
+        """Test has_oxidation_state with uppercase roman numeral."""
+        assert OxidationState.has_oxidation_state(
+            "Iron (II)"
+        ), "Expected has_oxidation_state('Iron (II)') to return True, but it returned False"
+        assert OxidationState.has_oxidation_state(
+            "Chromium (III)"
+        ), "Expected has_oxidation_state('Chromium (III)') to return True, but it returned False"
+        assert OxidationState.has_oxidation_state(
+            "Mercury (IV)"
+        ), "Expected has_oxidation_state('Mercury (IV)') to return True, but it returned False"
+
+    def test_has_oxidation_state_with_roman_numeral_no_parentheses(self):
+        """Test has_oxidation_state with roman numeral without parentheses."""
+        assert OxidationState.has_oxidation_state(
+            "chromium iii"
+        ), "Expected has_oxidation_state('chromium iii') to return True, but it returned False"
+        assert OxidationState.has_oxidation_state(
+            "iron II"
+        ), "Expected has_oxidation_state('iron II') to return True, but it returned False"
+
+    def test_has_oxidation_state_with_number(self):
+        """Test has_oxidation_state with number."""
+        # The new regex requires a sign before the number
+        assert OxidationState.has_oxidation_state(
+            "iron (+2)"
+        ), "Expected has_oxidation_state('iron (+2)') to return True, but it returned False"
+        assert OxidationState.has_oxidation_state(
+            "iron (-2)"
+        ), "Expected has_oxidation_state('iron (-2)') to return True, but it returned False"
+        # Numbers without signs or with signs after no longer match
+        assert not OxidationState.has_oxidation_state(
+            "iron (2)"
+        ), "Expected has_oxidation_state('iron (2)') to return False (no sign), but it returned True"
+        assert not OxidationState.has_oxidation_state(
+            "iron (3+)"
+        ), "Expected has_oxidation_state('iron (3+)') to return False (sign after), but it returned True"
+        assert not OxidationState.has_oxidation_state(
+            "iron (2-)"
+        ), "Expected has_oxidation_state('iron (2-)') to return False (sign after), but it returned True"
+
+    def test_has_oxidation_state_with_number_no_parentheses(self):
+        """Test has_oxidation_state with number without parentheses."""
+        # The new regex requires a sign before the number
+        assert OxidationState.has_oxidation_state(
+            "iron +3"
+        ), "Expected has_oxidation_state('iron +3') to return True, but it returned False"
+        assert OxidationState.has_oxidation_state(
+            "iron -2"
+        ), "Expected has_oxidation_state('iron -2') to return True, but it returned False"
+        # Numbers without signs or with signs after no longer match
+        assert not OxidationState.has_oxidation_state(
+            "iron 2"
+        ), "Expected has_oxidation_state('iron 2') to return False (no sign), but it returned True"
+        assert not OxidationState.has_oxidation_state(
+            "iron 2-"
+        ), "Expected has_oxidation_state('iron 2-') to return False (sign after), but it returned True"
+        assert not OxidationState.has_oxidation_state(
+            "iron 02-"
+        ), "Expected has_oxidation_state('iron 02-') to return False (sign after), but it returned True"
+
+    def test_has_oxidation_state_without_oxidation_state(self):
+        """Test has_oxidation_state without oxidation state."""
+        assert not OxidationState.has_oxidation_state(
+            "water"
+        ), "Expected has_oxidation_state('water') to return False, but it returned True"
+        assert not OxidationState.has_oxidation_state(
+            "iron"
+        ), "Expected has_oxidation_state('iron') to return False, but it returned True"
+        assert not OxidationState.has_oxidation_state(
+            "chromium oxide"
+        ), "Expected has_oxidation_state('chromium oxide') to return False, but it returned True"
+
+    def test_has_oxidation_state_with_compound_identifier(self):
+        """Test has_oxidation_state should not match numbers in compound identifiers."""
+        assert not OxidationState.has_oxidation_state(
+            "Ethane,, 1,1,2-trichloro-1,2,2-trifluoro-, CFC-113"
+        ), "Expected has_oxidation_state('Ethane,, 1,1,2-trichloro-1,2,2-trifluoro-, CFC-113') to return False, but it returned True"
+
+    def test_has_oxidation_state_should_not_match_roman_numeral_in_word(self):
+        """Test has_oxidation_state should not match roman numerals embedded in words."""
+        assert not OxidationState.has_oxidation_state(
+            "Bifenox"
+        ), "Expected has_oxidation_state('Bifenox') to return False, but it returned True"
+
+    def test_has_oxidation_state_with_comma(self):
+        """Test has_oxidation_state with comma before oxidation state."""
+        assert OxidationState.has_oxidation_state(
+            "iron, (II)"
+        ), "Expected has_oxidation_state('iron, (II)') to return True, but it returned False"
+        # The new regex requires a sign before the number
+        assert OxidationState.has_oxidation_state(
+            "iron, (+2)"
+        ), "Expected has_oxidation_state('iron, (+2)') to return True, but it returned False"
+
+
+class TestOxidationStateFromString:
+    """Test OxidationState from_string class method."""
+
+    def test_from_string_with_roman_numeral_lowercase(self):
+        """Test from_string with lowercase roman numeral."""
+        os, remaining = OxidationState.from_string("chromium (iii)")
+        assert os.value == 3, f"Expected os.value to be 3, but got {os.value}"
+        assert (
+            remaining == "chromium"
+        ), f"Expected remaining to be 'chromium', but got {remaining!r}"
+
+    def test_from_string_with_roman_numeral_uppercase(self):
+        """Test from_string with uppercase roman numeral."""
+        os, remaining = OxidationState.from_string("Iron (II)")
+        assert os.value == 2, f"Expected os.value to be 2, but got {os.value}"
+        assert (
+            remaining == "Iron"
+        ), f"Expected remaining to be 'Iron', but got {remaining!r}"
+
+    def test_from_string_with_roman_numeral_no_parentheses(self):
+        """Test from_string with roman numeral without parentheses."""
+        os, remaining = OxidationState.from_string("chromium iii")
+        assert os.value == 3, f"Expected os.value to be 3, but got {os.value}"
+        assert (
+            remaining == "chromium"
+        ), f"Expected remaining to be 'chromium', but got {remaining!r}"
+
+    def test_from_string_with_roman_numeral_negative(self):
+        """Test from_string with negative roman numeral."""
+        os, remaining = OxidationState.from_string("iron (II-)")
+        assert os.value == -2, f"Expected os.value to be -2, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_roman_numeral_positive_sign(self):
+        """Test from_string with positive sign in roman numeral."""
+        os, remaining = OxidationState.from_string("iron (II+)")
+        assert os.value == 2, f"Expected os.value to be 2, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_number(self):
+        """Test from_string with number."""
+        # The new regex requires a sign before the number
+        os, remaining = OxidationState.from_string("iron (+2)")
+        assert os.value == 2, f"Expected os.value to be 2, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_number_positive(self):
+        """Test from_string with positive number."""
+        # The new regex requires a sign before the number
+        os, remaining = OxidationState.from_string("iron (+3)")
+        assert os.value == 3, f"Expected os.value to be 3, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_number_negative(self):
+        """Test from_string with negative number."""
+        # The new regex requires a sign before the number
+        os, remaining = OxidationState.from_string("iron (-2)")
+        assert os.value == -2, f"Expected os.value to be -2, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_number_no_parentheses(self):
+        """Test from_string with number without parentheses."""
+        # The new regex requires a sign before the number
+        os, remaining = OxidationState.from_string("iron +2")
+        assert os.value == 2, f"Expected os.value to be 2, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_number_sign_before(self):
+        """Test from_string with sign before number."""
+        os, remaining = OxidationState.from_string("iron +3")
+        assert os.value == 3, f"Expected os.value to be 3, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_number_sign_before_negative(self):
+        """Test from_string with negative sign before number."""
+        os, remaining = OxidationState.from_string("iron -2")
+        assert os.value == -2, f"Expected os.value to be -2, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_comma(self):
+        """Test from_string with comma before oxidation state."""
+        os, remaining = OxidationState.from_string("iron, (II)")
+        assert os.value == 2, f"Expected os.value to be 2, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_with_comma_and_leading_zeros(self):
+        """Test from_string with comma and number with leading zeros."""
+        os, remaining = OxidationState.from_string("foo, +002")
+        assert os.value == 2, f"Expected os.value to be 2, but got {os.value}"
+        assert (
+            remaining == "foo"
+        ), f"Expected remaining to be 'foo', but got {remaining!r}"
+
+    def test_from_string_with_whitespace(self):
+        """Test from_string with whitespace around oxidation state."""
+        os, remaining = OxidationState.from_string("iron ( II )")
+        assert os.value == 2, f"Expected os.value to be 2, but got {os.value}"
+        assert (
+            remaining == "iron"
+        ), f"Expected remaining to be 'iron', but got {remaining!r}"
+
+    def test_from_string_raises_error_invalid_roman_numeral(self):
+        """Test from_string raises error for invalid roman numeral."""
+        with pytest.raises(ValueError, match="is not a valid roman numeral"):
+            OxidationState.from_string("iron (IIII)")
+
+        # Test various invalid roman numerals
+        invalid_cases = [
+            "iron (IIII)",  # Four I's in a row
+            "iron (VV)",  # Two V's
+            "iron (VX)",  # Invalid subtraction
+        ]
+        for invalid_case in invalid_cases:
+            with pytest.raises(ValueError, match="is not a valid roman numeral"):
+                OxidationState.from_string(invalid_case)
+
+    def test_from_string_raises_error_both_signs(self):
+        """Test from_string raises error when both signs are present."""
+        # The new regex only matches signs before the number, so "iron (+2-)" won't match
+        with pytest.raises(ValueError, match="No match found"):
+            OxidationState.from_string("iron (+2-)")
+
+    def test_from_string_raises_error_no_match(self):
+        """Test from_string raises error when no match is found."""
+        with pytest.raises(ValueError, match="No match found"):
+            OxidationState.from_string("iron")
+        with pytest.raises(ValueError, match="No match found"):
+            OxidationState.from_string(
+                "Ethane,, 1,1,2-trichloro-1,2,2-trifluoro-, CFC-113"
+            )
+        with pytest.raises(ValueError, match="No match found"):
+            OxidationState.from_string("Bifenox")
+
+    def test_from_string_raises_error_too_low(self):
+        """Test from_string raises error for value too low."""
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (-6)")
+
+    def test_from_string_raises_error_too_high(self):
+        """Test from_string raises error for value too high."""
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (+10)")
+
+    def test_from_string_raises_error_values_outside_bounds_roman(self):
+        """Test from_string raises error for roman numeral values outside bounds."""
+        # Test values too low
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (VI-)")  # -6
+
+        # Test values too high
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (X)")  # 10
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (XI)")  # 11
+
+    def test_from_string_raises_error_values_outside_bounds_numbers(self):
+        """Test from_string raises error for number values outside bounds."""
+        # Test values too low
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (-6)")
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (-10)")
+
+        # Test values too high
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (+10)")
+        with pytest.raises(ValueError, match="outside physical bounds"):
+            OxidationState.from_string("iron (+15)")
+
+    def test_from_string_boundary_values(self):
+        """Test from_string with boundary values."""
+        os_min, remaining = OxidationState.from_string("iron (-5)")
+        assert (
+            os_min.value == -5
+        ), f"Expected os_min.value to be -5, but got {os_min.value}"
+
+        # The new regex requires a sign before the number
+        os_max, remaining = OxidationState.from_string("iron (+9)")
+        assert (
+            os_max.value == 9
+        ), f"Expected os_max.value to be 9, but got {os_max.value}"
+
+    def test_from_string_various_roman_numerals(self):
+        """Test from_string with various roman numerals."""
+        test_cases = [
+            ("iron (i)", 1),
+            ("iron (ii)", 2),
+            ("iron (iii)", 3),
+            ("iron (iv)", 4),
+            ("iron (v)", 5),
+            ("iron (vi)", 6),
+            ("iron (vii)", 7),
+            ("iron (viii)", 8),
+            ("iron (ix)", 9),
+        ]
+        for string, expected_value in test_cases:
+            os, remaining = OxidationState.from_string(string)
+            assert (
+                os.value == expected_value
+            ), f"Expected os.value to be {expected_value} for '{string}', but got {os.value}"
+
+    def test_from_string_remaining_string(self):
+        """Test from_string returns correct remaining string."""
+        test_cases = [
+            ("chromium (iii)", "chromium"),
+            ("iron (II)", "iron"),
+            ("manganese (vi)", "manganese"),
+            # The new regex requires a sign before the number
+            ("mercury (+2)", "mercury"),
+            ("tin (+3)", "tin"),
+            ("beryllium (-2)", "beryllium"),
+        ]
+        for string, expected_remaining in test_cases:
+            os, remaining = OxidationState.from_string(string)
+            assert (
+                remaining == expected_remaining
+            ), f"Expected remaining to be {expected_remaining!r} for '{string}', but got {remaining!r}"
diff --git a/tests/unit/test_randonneur.py b/tests/unit/test_randonneur.py
new file mode 100644
index 0000000..26c8ae5
--- /dev/null
+++ b/tests/unit/test_randonneur.py
@@ -0,0 +1,283 @@
+"""Unit tests for randonneur-based transformation utilities."""
+
+from flowmapper.domain.flow import Flow
+from flowmapper.domain.normalized_flow import NormalizedFlow
+from flowmapper.utils import apply_transformation_and_convert_flows_to_normalized_flows
+
+
+class TestApplyGenericTransformationsToFlows:
+    """Test apply_transformation_and_convert_flows_to_normalized_flows function."""
+
+    def test_basic_transformation_single_function(self):
+        """Test basic transformation with a single function."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        def transform_func(graph):
+            # Modify the name in the dict
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["name"] = "Modified name"
+                result.append(modified)
+            return result
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_func], flows=[flow]
+        )
+
+        assert len(result) == 1, "Expected one NormalizedFlow"
+        assert isinstance(result[0], NormalizedFlow), "Expected NormalizedFlow object"
+        assert result[0].original == flow, "Expected original flow to be preserved"
+        assert (
+            result[0].normalized.name.data == "modified name"
+        ), "Expected normalized name to be transformed and normalized"
+        assert (
+            result[0].current.name.data == "modified name"
+        ), "Expected current to match normalized"
+
+    def test_multiple_transformations_sequential(self):
+        """Test that multiple transformations are applied sequentially."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        def transform_name(graph):
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["name"] = "First transformation"
+                result.append(modified)
+            return result
+
+        def transform_unit(graph):
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["unit"] = "g"
+                result.append(modified)
+            return result
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_name, transform_unit], flows=[flow]
+        )
+
+        assert len(result) == 1, "Expected one NormalizedFlow"
+        # Both transformations should be applied
+        assert (
+            result[0].normalized.name.data == "first transformation"
+        ), "Expected name to be transformed by first function"
+        assert (
+            result[0].original.unit.data == "kg"
+        ), "Expected original unit to be preserved as `kg`"
+        assert (
+            result[0].normalized.unit.data == "gram"
+        ), "Expected unit to be transformed by second function and normalized from `g` to `gram`"
+
+    def test_empty_functions_list(self):
+        """Test with empty list of functions (no transformations)."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[], flows=[flow]
+        )
+
+        assert len(result) == 1, "Expected one NormalizedFlow"
+        assert result[0].original == flow, "Expected original flow to be preserved"
+        # Without transformations, normalized should be the same as flow.normalize()
+        expected_normalized = flow.normalize()
+        assert (
+            result[0].normalized.name.data == expected_normalized.name.data
+        ), "Expected normalized to match flow.normalize()"
+
+    def test_empty_flows_list(self):
+        """Test with empty list of flows."""
+
+        def transform_func(graph):
+            return graph
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_func], flows=[]
+        )
+
+        assert len(result) == 0, "Expected empty list"
+
+    def test_multiple_flows(self):
+        """Test transformation of multiple flows."""
+        flow1 = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        flow2 = Flow.from_dict({"name": "Water", "context": "water", "unit": "kg"})
+
+        def transform_func(graph):
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["name"] = f"Modified {flow_dict['name']}"
+                result.append(modified)
+            return result
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_func], flows=[flow1, flow2]
+        )
+
+        assert len(result) == 2, "Expected two NormalizedFlow objects"
+        assert (
+            result[0].original == flow1
+        ), "Expected first original flow to be preserved"
+        assert (
+            result[1].original == flow2
+        ), "Expected second original flow to be preserved"
+        assert (
+            "modified carbon dioxide" in result[0].normalized.name.data.lower()
+        ), "Expected first flow name to be transformed"
+        assert (
+            "modified water" in result[1].normalized.name.data.lower()
+        ), "Expected second flow name to be transformed"
+
+    def test_transformation_modifies_context(self):
+        """Test transformation that modifies context."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        def transform_context(graph):
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["context"] = ("emissions", "to air")
+                result.append(modified)
+            return result
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_context], flows=[flow]
+        )
+
+        assert len(result) == 1, "Expected one NormalizedFlow"
+        # Context should be transformed and normalized
+        assert isinstance(
+            result[0].normalized.context.value, tuple
+        ), "Expected context to be tuple"
+        assert (
+            "emissions" in result[0].normalized.context.value
+        ), "Expected transformed context to be present"
+
+    def test_transformation_modifies_multiple_fields(self):
+        """Test transformation that modifies multiple fields at once."""
+        flow = Flow.from_dict(
+            {
+                "name": "Carbon dioxide",
+                "context": "air",
+                "unit": "kg",
+                "location": "US",
+            }
+        )
+
+        def transform_multiple(graph):
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["name"] = "CO2"
+                modified["unit"] = "g"
+                modified["location"] = "CA"
+                result.append(modified)
+            return result
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_multiple], flows=[flow]
+        )
+
+        assert len(result) == 1, "Expected one NormalizedFlow"
+        assert (
+            result[0].normalized.name.data == "co2"
+        ), "Expected name to be transformed"
+        assert (
+            result[0].normalized.unit.data == "gram"
+        ), "Expected unit to be transformed to `g` and normalized to `gram`"
+        assert (
+            result[0].normalized.location == "CA"
+        ), "Expected location to be transformed"
+
+    def test_original_flows_unchanged(self):
+        """Test that original Flow objects are not modified."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+        original_name = flow.name.data
+
+        def transform_func(graph):
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["name"] = "Modified name"
+                result.append(modified)
+            return result
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_func], flows=[flow]
+        )
+
+        # Original flow should be unchanged
+        assert flow.name.data == original_name, "Expected original flow to be unchanged"
+        assert result[0].original == flow, "Expected original reference to be preserved"
+
+    def test_current_is_copy_of_normalized(self):
+        """Test that current is a copy of normalized, not a reference."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        def transform_func(graph):
+            return graph  # No transformation
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_func], flows=[flow]
+        )
+
+        assert (
+            result[0].current is not result[0].normalized
+        ), "Expected current to be a copy, not a reference"
+        assert (
+            result[0].current.name.data == result[0].normalized.name.data
+        ), "Expected current to have same data as normalized"
+
+    def test_transformation_chain_preserves_order(self):
+        """Test that transformations are applied in the correct order."""
+        flow = Flow.from_dict(
+            {"name": "Carbon dioxide", "context": "air", "unit": "kg"}
+        )
+
+        call_order = []
+
+        def transform_first(graph):
+            call_order.append("first")
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["name"] = "First"
+                result.append(modified)
+            return result
+
+        def transform_second(graph):
+            call_order.append("second")
+            result = []
+            for flow_dict in graph:
+                modified = flow_dict.copy()
+                modified["name"] = f"{flow_dict['name']} then Second"
+                result.append(modified)
+            return result
+
+        result = apply_transformation_and_convert_flows_to_normalized_flows(
+            functions=[transform_first, transform_second], flows=[flow]
+        )
+
+        assert call_order == [
+            "first",
+            "second",
+        ], "Expected functions to be called in order"
+        assert (
+            "second" in result[0].normalized.name.data.lower()
+        ), "Expected second transformation to be applied last"
diff --git a/tests/unit/test_remove_unit_slash.py b/tests/unit/test_remove_unit_slash.py
new file mode 100644
index 0000000..db488c4
--- /dev/null
+++ b/tests/unit/test_remove_unit_slash.py
@@ -0,0 +1,227 @@
+"""Unit tests for remove_unit_slash function."""
+
+from unittest.mock import patch
+
+from flowmapper.domain.flow import Flow
+from flowmapper.utils import remove_unit_slash
+
+
+class TestRemoveUnitSlash:
+    """Test remove_unit_slash function."""
+
+    def test_no_match_returns_original_name(self):
+        """Test that remove_unit_slash returns original name when no match is found."""
+        flow = Flow.from_dict({"name": "water", "unit": "kg", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        assert result == "water", f"Expected result to be 'water', but got {result!r}"
+
+    def test_match_at_end_removes_slash_and_unit(self):
+        """Test that remove_unit_slash removes /m3 or /kg when at end of string with whitespace."""
+        # Test with /m3 at end with whitespace - unit is captured
+        flow = Flow.from_dict({"name": "water/m3 ", "unit": "m3", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        # match.end() == len(name), so removes from match.start() to end
+        assert result == "water", f"Expected result to be 'water', but got {result!r}"
+
+        # Test with /kg at end with whitespace
+        flow = Flow.from_dict({"name": "water/kg ", "unit": "kg", "context": "air"})
+        result = remove_unit_slash(flow)
+        assert result == "water", f"Expected result to be 'water', but got {result!r}"
+
+    def test_match_at_end_with_comma(self):
+        """Test that remove_unit_slash skips match with only comma after unit at end."""
+        flow = Flow.from_dict({"name": "water/m3,", "unit": "m3", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water/m3,"
+        ), f"Expected result to be 'water/m3,', but got {result!r}"
+
+    def test_match_in_middle_replaces_with_comma_space(self):
+        """Test that remove_unit_slash replaces /m3 or /kg in middle with ', '."""
+        flow = Flow.from_dict(
+            {"name": "water/m3, pure", "unit": "m3", "context": "air"}
+        )
+
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water, pure"
+        ), f"Expected result to be 'water, pure', but got {result!r}"
+
+        # Test with /kg
+        flow = Flow.from_dict(
+            {"name": "water/kg, pure", "unit": "kg", "context": "air"}
+        )
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water, pure"
+        ), f"Expected result to be 'water, pure', but got {result!r}"
+
+    def test_match_with_whitespace(self):
+        """Test that remove_unit_slash handles whitespace after unit."""
+        flow = Flow.from_dict({"name": "water/m3 ", "unit": "m3", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        # match.end() == len(name) (whitespace is included in match), so removes from start to end
+        assert result == "water", f"Expected result to be 'water', but got {result!r}"
+
+    def test_match_with_comma_and_whitespace(self):
+        """Test that remove_unit_slash handles comma and whitespace."""
+        flow = Flow.from_dict(
+            {"name": "water/m3, pure", "unit": "m3", "context": "air"}
+        )
+
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water, pure"
+        ), f"Expected result to be 'water, pure', but got {result!r}"
+
+    def test_multiple_matches_skipped(self):
+        """Test that remove_unit_slash only processes the first match."""
+        # With the fixed regex, /kg at the end will match, so it removes /kg
+        flow = Flow.from_dict({"name": "water/m3/kg", "unit": "kg", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        # The regex matches /kg at the end, so it removes /kg
+        assert (
+            result == "water/m3"
+        ), f"Expected result to be 'water/m3' (removes /kg at end), but got {result!r}"
+
+    def test_no_match_without_slash_and_unit(self):
+        """Test that remove_unit_slash doesn't match strings without slash and unit."""
+        # This was the original bug - "Caesium I" should not match
+        flow = Flow.from_dict({"name": "Caesium I", "unit": "kg", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        # Should not match because there's no /m3 or /kg
+        assert (
+            result == "Caesium I"
+        ), f"Expected result to be 'Caesium I' (no match), but got {result!r}"
+
+    @patch("flowmapper.utils.flow_names.logger")
+    def test_incompatible_unit_logs_warning(self, mock_logger):
+        """Test that remove_unit_slash logs warning for incompatible units."""
+        # Create flow with m3 in name but kg as unit (incompatible)
+        flow = Flow.from_dict({"name": "water/m3 ", "unit": "kg", "context": "air"})
+
+        # Should still return the modified name
+        result = remove_unit_slash(flow)
+        assert result == "water", f"Expected result to be 'water', but got {result!r}"
+        # Verify warning was called
+        mock_logger.warning.assert_called_once()
+
+    @patch("flowmapper.utils.flow_names.logger")
+    def test_incompatible_unit_logs_warning_message(self, mock_logger):
+        """Test that remove_unit_slash logs the correct warning message for incompatible units."""
+        # Create flow with m3 in name but kg as unit (incompatible)
+        flow = Flow.from_dict({"name": "water/m3 pure", "unit": "kg", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water, pure"
+        ), f"Expected result to be 'water, pure', but got {result!r}"
+
+        # Verify warning was called
+        mock_logger.warning.assert_called_once()
+        warning_call = mock_logger.warning.call_args[0][0]
+        assert (
+            "has unit" in warning_call
+        ), f"Expected warning message to contain 'has unit', but got {warning_call!r}"
+        assert (
+            "but name refers to incompatible unit" in warning_call
+        ), f"Expected warning message to contain 'but name refers to incompatible unit', but got {warning_call!r}"
+        assert (
+            "m3" in warning_call
+        ), f"Expected warning message to contain 'm3', but got {warning_call!r}"
+
+    @patch("flowmapper.utils.flow_names.logger")
+    def test_incompatible_unit_logs_warning_with_kg(self, mock_logger):
+        """Test that remove_unit_slash logs warning message with kg unit."""
+        # Create flow with kg in name but m3 as unit (incompatible)
+        flow = Flow.from_dict({"name": "water/kg pure", "unit": "m3", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water, pure"
+        ), f"Expected result to be 'water, pure', but got {result!r}"
+
+        # Verify warning was called with kg
+        mock_logger.warning.assert_called_once()
+        warning_call = mock_logger.warning.call_args[0][0]
+        assert (
+            "kg" in warning_call
+        ), f"Expected warning message to contain 'kg', but got {warning_call!r}"
+
+    @patch("flowmapper.utils.flow_names.logger")
+    def test_compatible_unit_no_warning(self, mock_logger):
+        """Test that remove_unit_slash doesn't log warning for compatible units."""
+        # Create flow with m3 in name and m3 as unit (compatible)
+        flow = Flow.from_dict({"name": "water/m3 ", "unit": "m3", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        assert result == "water", f"Expected result to be 'water', but got {result!r}"
+        # Verify warning was NOT called for compatible units
+        mock_logger.warning.assert_not_called()
+
+    def test_match_when_unit_not_followed_by_whitespace_or_comma(self):
+        """Test that remove_unit_slash doesn't match when unit is not followed by whitespace or comma."""
+        flow = Flow.from_dict({"name": "water/m3x", "unit": "m3", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        # The regex requires whitespace, comma, or end of string after /m3 or /kg
+        # Since /m3x doesn't match, no change should occur
+        assert (
+            result == "water/m3x"
+        ), f"Expected result to be 'water/m3x' (no match), but got {result!r}"
+
+    def test_match_not_at_end_replaces(self):
+        """Test that remove_unit_slash replaces match when not at end."""
+        flow = Flow.from_dict({"name": "water/m3 pure", "unit": "m3", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water, pure"
+        ), f"Expected result to be 'water, pure', but got {result!r}"
+
+    def test_case_sensitivity(self):
+        """Test that remove_unit_slash is case-sensitive for unit pattern."""
+        flow = Flow.from_dict(
+            {"name": "water/M3", "unit": "m3", "context": "air"}
+        )  # Uppercase M3
+
+        # Should not match uppercase M3
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water/M3"
+        ), f"Expected result to be 'water/M3' (no match), but got {result!r}"
+
+    def test_no_unit_slash_pattern(self):
+        """Test that remove_unit_slash doesn't match other slash patterns."""
+        flow = Flow.from_dict({"name": "water/liter", "unit": "kg", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        assert (
+            result == "water/liter"
+        ), f"Expected result to be 'water/liter' (no match), but got {result!r}"
+
+    def test_empty_name(self):
+        """Test that remove_unit_slash handles empty name."""
+        flow = Flow.from_dict({"name": "", "unit": "kg", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        assert result == "", f"Expected result to be '', but got {result!r}"
+
+    def test_name_with_only_unit_slash(self):
+        """Test that remove_unit_slash handles name with only /m3 or /kg with whitespace."""
+        flow = Flow.from_dict({"name": "/m3 ", "unit": "m3", "context": "air"})
+
+        result = remove_unit_slash(flow)
+        # match.end() == len(name), so removes from match.start() to end
+        assert result == "", f"Expected result to be '', but got {result!r}"
+
+        # Test with /kg
+        flow = Flow.from_dict({"name": "/kg ", "unit": "kg", "context": "air"})
+        result = remove_unit_slash(flow)
+        assert result == "", f"Expected result to be '', but got {result!r}"
diff --git a/tests/unit/test_split_location_suffix.py b/tests/unit/test_split_location_suffix.py
new file mode 100644
index 0000000..d6e542b
--- /dev/null
+++ b/tests/unit/test_split_location_suffix.py
@@ -0,0 +1,246 @@
+"""Unit tests for split_location_suffix and replace_location_suffix functions."""
+
+import pytest
+
+from flowmapper.errors import MissingLocation
+from flowmapper.fields import replace_location_suffix, split_location_suffix
+
+
+class TestSplitLocationSuffix:
+    """Test split_location_suffix function."""
+
+    def test_simple_location_code(self):
+        """Test split_location_suffix with simple location code."""
+        name, location = split_location_suffix("Ammonia, NL")
+        assert name == "Ammonia", f"Expected name to be 'Ammonia', but got {name!r}"
+        assert location == "NL", f"Expected location to be 'NL', but got {location!r}"
+
+    def test_location_code_with_extra_whitespace(self):
+        """Test split_location_suffix with extra whitespace."""
+        name, location = split_location_suffix("Ammonia,  \tNL")
+        assert name == "Ammonia", f"Expected name to be 'Ammonia', but got {name!r}"
+        assert location == "NL", f"Expected location to be 'NL', but got {location!r}"
+
+    def test_complicated_location_code(self):
+        """Test split_location_suffix with complicated location code."""
+        name, location = split_location_suffix("Ammonia, RER w/o DE+NL+NO")
+        assert name == "Ammonia", f"Expected name to be 'Ammonia', but got {name!r}"
+        assert (
+            location == "RER w/o DE+NL+NO"
+        ), f"Expected location to be 'RER w/o DE+NL+NO', but got {location!r}"
+
+    def test_no_location_code(self):
+        """Test split_location_suffix with no location code."""
+        name, location = split_location_suffix("Ammonia")
+        assert name == "Ammonia", f"Expected name to be 'Ammonia', but got {name!r}"
+        assert location is None, f"Expected location to be None, but got {location!r}"
+
+    def test_location_code_with_dash(self):
+        """Test split_location_suffix with location code using dash (should not match)."""
+        name, location = split_location_suffix("Ammonia-NL")
+        assert (
+            name == "Ammonia-NL"
+        ), f"Expected name to be 'Ammonia-NL', but got {name!r}"
+        assert location is None, f"Expected location to be None, but got {location!r}"
+
+    def test_location_code_case_insensitive_fails(self):
+        """Test split_location_suffix is case-insensitive for location codes."""
+        name, location = split_location_suffix("Ammonia, nl")
+        assert (
+            name == "Ammonia, nl"
+        ), f"Expected name to be 'Ammonia, nl', but got {name!r}"
+        assert location is None, f"Expected location to be 'None', but got {location!r}"
+
+    def test_multiple_commas(self):
+        """Test split_location_suffix with multiple commas."""
+        name, location = split_location_suffix("Ammonia, pure, NL")
+        # Should match the last comma followed by location code
+        assert (
+            name == "Ammonia, pure"
+        ), f"Expected name to be 'Ammonia, pure', but got {name!r}"
+        assert location == "NL", f"Expected location to be 'NL', but got {location!r}"
+
+    def test_location_code_in_middle(self):
+        """Test split_location_suffix with location code not at end."""
+        name, location = split_location_suffix("Ammonia, NL, pure")
+        # Should not match because location code is not at the end
+        assert (
+            name == "Ammonia, NL, pure"
+        ), f"Expected name to be 'Ammonia, NL, pure', but got {name!r}"
+        assert location is None, f"Expected location to be None, but got {location!r}"
+
+    def test_empty_string(self):
+        """Test split_location_suffix with empty string."""
+        name, location = split_location_suffix("")
+        assert name == "", f"Expected name to be '', but got {name!r}"
+        assert location is None, f"Expected location to be None, but got {location!r}"
+
+    def test_only_location_code(self):
+        """Test split_location_suffix with only location code."""
+        name, location = split_location_suffix(", NL")
+        assert name == "", f"Expected name to be '', but got {name!r}"
+        assert location == "NL", f"Expected location to be 'NL', but got {location!r}"
+
+    def test_whitespace_before_comma(self):
+        """Test split_location_suffix with whitespace before comma."""
+        name, location = split_location_suffix("Ammonia , NL")
+        # The regex requires comma immediately, so this might not match
+        # Testing actual behavior
+        assert (
+            name == "Ammonia , NL"
+        ), f"Expected name to be 'Ammonia , NL' (no match), but got {name!r}"
+        assert location is None, f"Expected location to be None, but got {location!r}"
+
+    def test_no_whitespace_after_comma(self):
+        """Test split_location_suffix with no whitespace after comma."""
+        name, location = split_location_suffix("Ammonia,NL")
+        # The regex requires whitespace after comma
+        assert (
+            name == "Ammonia,NL"
+        ), f"Expected name to be 'Ammonia,NL' (no match), but got {name!r}"
+        assert location is None, f"Expected location to be None, but got {location!r}"
+
+    def test_various_location_codes(self):
+        """Test split_location_suffix with various location codes."""
+        test_cases = [
+            ("Water, DE", "Water", "DE"),
+            ("Water, FR", "Water", "FR"),
+            ("Water, US", "Water", "US"),
+            ("Water, GLO", "Water", "GLO"),
+        ]
+        for input_str, expected_name, expected_location in test_cases:
+            name, location = split_location_suffix(input_str)
+            assert (
+                name == expected_name
+            ), f"Expected name to be {expected_name!r} for '{input_str}', but got {name!r}"
+            assert (
+                location == expected_location
+            ), f"Expected location to be {expected_location!r} for '{input_str}', but got {location!r}"
+
+    def test_complex_location_with_operators(self):
+        """Test split_location_suffix with complex location codes containing operators."""
+        name, location = split_location_suffix("Ammonia, RER w/o DE+NL+NO")
+        assert name == "Ammonia", f"Expected name to be 'Ammonia', but got {name!r}"
+        assert (
+            location == "RER w/o DE+NL+NO"
+        ), f"Expected location to be 'RER w/o DE+NL+NO', but got {location!r}"
+
+    def test_location_code_with_trailing_whitespace(self):
+        """Test split_location_suffix with trailing whitespace after location."""
+        name, location = split_location_suffix("Ammonia, NL ")
+        assert name == "Ammonia", f"Expected name to be 'Ammonia', but got {name!r}"
+        assert location == "NL", f"Expected location to be 'NL', but got {location!r}"
+
+
+class TestReplaceLocationSuffix:
+    """Test replace_location_suffix function."""
+
+    def test_simple_location_replacement(self):
+        """Test replace_location_suffix with simple location code."""
+        result = replace_location_suffix("Ammonia, NL", "DE")
+        assert result == "Ammonia, DE", f"Expected 'Ammonia, DE', but got {result!r}"
+
+    def test_location_replacement_with_extra_whitespace(self):
+        """Test replace_location_suffix with extra whitespace."""
+        result = replace_location_suffix("Ammonia,  \tNL", "DE")
+        assert (
+            result == "Ammonia,  \tDE"
+        ), f"Expected 'Ammonia,  \\tDE', but got {result!r}"
+
+    def test_complicated_location_replacement(self):
+        """Test replace_location_suffix with complicated location code."""
+        result = replace_location_suffix("Ammonia, RER w/o DE+NL+NO", "GLO")
+        assert result == "Ammonia, GLO", f"Expected 'Ammonia, GLO', but got {result!r}"
+
+    def test_no_location_code_raises_missing_location(self):
+        """Test replace_location_suffix with no location code (should raise MissingLocation)."""
+        with pytest.raises(MissingLocation, match="No location suffix found"):
+            replace_location_suffix("Ammonia", "DE")
+
+    def test_location_code_with_dash_raises_missing_location(self):
+        """Test replace_location_suffix with location code using dash (should raise MissingLocation)."""
+        with pytest.raises(MissingLocation, match="No location suffix found"):
+            replace_location_suffix("Ammonia-NL", "DE")
+
+    def test_location_code_case_insensitive_raises_missing_location(self):
+        """Test replace_location_suffix with lowercase location (should raise MissingLocation)."""
+        with pytest.raises(MissingLocation, match="No location suffix found"):
+            replace_location_suffix("Ammonia, nl", "DE")
+
+    def test_multiple_commas_replacement(self):
+        """Test replace_location_suffix with multiple commas."""
+        result = replace_location_suffix("Ammonia, pure, NL", "FR")
+        # Should replace the last location code
+        assert (
+            result == "Ammonia, pure, FR"
+        ), f"Expected 'Ammonia, pure, FR', but got {result!r}"
+
+    def test_location_code_in_middle_raises_missing_location(self):
+        """Test replace_location_suffix with location code not at end (should raise MissingLocation)."""
+        with pytest.raises(MissingLocation, match="No location suffix found"):
+            replace_location_suffix("Ammonia, NL, pure", "DE")
+
+    def test_empty_string_raises_missing_location(self):
+        """Test replace_location_suffix with empty string (should raise MissingLocation)."""
+        with pytest.raises(MissingLocation, match="No location suffix found"):
+            replace_location_suffix("", "DE")
+
+    def test_only_location_code_replacement(self):
+        """Test replace_location_suffix with only location code."""
+        result = replace_location_suffix(", NL", "DE")
+        assert result == ", DE", f"Expected ', DE', but got {result!r}"
+
+    def test_whitespace_before_comma_raises_missing_location(self):
+        """Test replace_location_suffix with whitespace before comma (should raise MissingLocation)."""
+        with pytest.raises(MissingLocation, match="No location suffix found"):
+            replace_location_suffix("Ammonia , NL", "DE")
+
+    def test_no_whitespace_after_comma_raises_missing_location(self):
+        """Test replace_location_suffix with no whitespace after comma (should raise MissingLocation)."""
+        with pytest.raises(MissingLocation, match="No location suffix found"):
+            replace_location_suffix("Ammonia,NL", "DE")
+
+    def test_various_location_codes_replacement(self):
+        """Test replace_location_suffix with various location codes."""
+        test_cases = [
+            ("Water, DE", "FR", "Water, FR"),
+            ("Water, FR", "US", "Water, US"),
+            ("Water, US", "GLO", "Water, GLO"),
+            ("Water, GLO", "DE", "Water, DE"),
+        ]
+        for input_str, new_location, expected in test_cases:
+            result = replace_location_suffix(input_str, new_location)
+            assert (
+                result == expected
+            ), f"Expected {expected!r} for '{input_str}' -> '{new_location}', but got {result!r}"
+
+    def test_complex_location_with_operators_replacement(self):
+        """Test replace_location_suffix with complex location codes containing operators."""
+        result = replace_location_suffix("Ammonia, RER w/o DE+NL+NO", "GLO")
+        assert result == "Ammonia, GLO", f"Expected 'Ammonia, GLO', but got {result!r}"
+
+    def test_location_code_with_trailing_whitespace_replacement(self):
+        """Test replace_location_suffix with trailing whitespace after location."""
+        result = replace_location_suffix("Ammonia, NL ", "DE")
+        assert (
+            result == "Ammonia, DE "
+        ), f"Expected 'Ammonia, DE ' (preserving trailing space), but got {result!r}"
+
+    def test_replace_with_empty_string(self):
+        """Test replace_location_suffix replacing location with empty string."""
+        result = replace_location_suffix("Ammonia, NL", "")
+        assert (
+            result == "Ammonia, "
+        ), f"Expected 'Ammonia, ' (empty location), but got {result!r}"
+
+    def test_replace_with_longer_location(self):
+        """Test replace_location_suffix replacing with a longer location code."""
+        result = replace_location_suffix("Ammonia, NL", "RER w/o DE+NL+NO")
+        assert (
+            result == "Ammonia, RER w/o DE+NL+NO"
+        ), f"Expected 'Ammonia, RER w/o DE+NL+NO', but got {result!r}"
+
+    def test_replace_with_shorter_location(self):
+        """Test replace_location_suffix replacing with a shorter location code."""
+        result = replace_location_suffix("Ammonia, RER w/o DE+NL+NO", "NL")
+        assert result == "Ammonia, NL", f"Expected 'Ammonia, NL', but got {result!r}"
diff --git a/tests/unit/test_string_field.py b/tests/unit/test_string_field.py
new file mode 100644
index 0000000..bbf1e96
--- /dev/null
+++ b/tests/unit/test_string_field.py
@@ -0,0 +1,232 @@
+"""Unit tests for StringField class."""
+
+from flowmapper.fields import StringField
+
+
+class TestStringFieldInitialization:
+    """Test StringField initialization."""
+
+    def test_init_with_value(self):
+        """Test initialization with a value."""
+        sf = StringField("test")
+        assert sf == "test", f"Expected sf to equal 'test', but got {sf!r}"
+        from collections import UserString
+
+        assert isinstance(
+            sf, UserString
+        ), f"Expected sf to be an instance of UserString, but got {type(sf)}"
+        assert not isinstance(
+            sf, str
+        ), f"Expected sf to not be an instance of str (UserString is not a subclass), but got {type(sf)}"
+
+    def test_init_with_empty_string(self):
+        """Test initialization with empty string."""
+        sf = StringField("")
+        # Empty StringField doesn't equal empty string due to __eq__ implementation
+        assert sf != "", f"Expected sf to not equal '', but they are equal (sf={sf!r})"
+        assert sf.data == "", f"Expected sf.data to be '', but got {sf.data!r}"
+
+    def test_init_with_whitespace(self):
+        """Test initialization with whitespace."""
+        sf = StringField("  test  ")
+        # Equality normalizes the other string, so "  test  " becomes "test"
+        assert sf == "  test  ", f"Expected sf to equal '  test  ', but got {sf!r}"
+        assert (
+            sf.data == "  test  "
+        ), f"Expected sf.data to be '  test  ', but got {sf.data!r}"
+
+    def test_inherits_from_userstring(self):
+        """Test that StringField inherits from UserString."""
+        sf = StringField("test")
+        from collections import UserString
+
+        assert isinstance(
+            sf, UserString
+        ), f"Expected sf to be an instance of UserString, but got {type(sf)}"
+        assert issubclass(
+            StringField, UserString
+        ), "Expected StringField to be a subclass of UserString, but it is not"
+        # UserString is not a subclass of str
+        assert not isinstance(
+            sf, str
+        ), f"Expected sf to not be an instance of str (UserString is not a subclass), but got {type(sf)}"
+
+
+class TestStringFieldNormalize:
+    """Test StringField normalize method."""
+
+    def test_normalize_with_lowercase_default(self):
+        """Test normalize with default lowercase=True."""
+        sf = StringField("TEST")
+        normalized = sf.normalize()
+        assert (
+            normalized == "test"
+        ), f"Expected normalized to equal 'test', but got {normalized!r}"
+        assert isinstance(
+            normalized, StringField
+        ), f"Expected normalized to be a StringField instance, but got {type(normalized)}"
+
+    def test_normalize_with_lowercase_false(self):
+        """Test normalize with lowercase=False."""
+        sf = StringField("TEST")
+        normalized = sf.normalize(lowercase=False)
+        assert (
+            normalized == "TEST"
+        ), f"Expected normalized to equal 'TEST', but got {normalized!r}"
+
+    def test_normalize_with_whitespace(self):
+        """Test normalize with whitespace."""
+        sf = StringField("  test  ")
+        normalized = sf.normalize()
+        assert (
+            normalized == "test"
+        ), f"Expected normalized to equal 'test', but got {normalized!r}"
+
+    def test_normalize_returns_new_instance(self):
+        """Test that normalize returns a new instance."""
+        sf = StringField("TEST")
+        normalized = sf.normalize()
+        assert (
+            normalized is not sf
+        ), "Expected normalize() to return a new instance, but it returned the same instance"
+        assert sf == "TEST", f"Expected original sf to remain 'TEST', but got {sf!r}"
+
+
+class TestStringFieldEq:
+    """Test StringField __eq__ method."""
+
+    def test_eq_with_same_stringfield(self):
+        """Test equality with same StringField instance."""
+        sf1 = StringField("test")
+        sf2 = StringField("test")
+        assert (
+            sf1 == sf2
+        ), f"Expected sf1 to equal sf2, but they are not equal (sf1={sf1!r}, sf2={sf2!r})"
+
+    def test_eq_with_different_stringfield(self):
+        """Test equality with different StringField."""
+        sf1 = StringField("test")
+        sf2 = StringField("other")
+        assert (
+            sf1 != sf2
+        ), f"Expected sf1 to not equal sf2, but they are equal (sf1={sf1!r}, sf2={sf2!r})"
+
+    def test_eq_with_string(self):
+        """Test equality with string."""
+        sf = StringField("test")
+        assert (
+            sf == "test"
+        ), f"Expected sf to equal 'test', but they are not equal (sf={sf!r})"
+        assert (
+            sf != "other"
+        ), f"Expected sf to not equal 'other', but they are equal (sf={sf!r})"
+
+    def test_eq_with_empty_stringfield(self):
+        """Test equality with empty StringField."""
+        sf = StringField("")
+        assert sf != "", f"Expected sf to not equal '', but they are equal (sf={sf!r})"
+        assert (
+            sf != "test"
+        ), f"Expected sf to not equal 'test', but they are equal (sf={sf!r})"
+
+    def test_eq_with_other_type(self):
+        """Test equality with non-string, non-StringField type."""
+        sf = StringField("test")
+        assert (
+            sf != 123
+        ), f"Expected sf to not equal 123, but they are equal (sf={sf!r})"
+        assert (
+            sf != None
+        ), f"Expected sf to not equal None, but they are equal (sf={sf!r})"
+        assert sf != [], f"Expected sf to not equal [], but they are equal (sf={sf!r})"
+
+
+class TestStringFieldStrBehavior:
+    """Test StringField string behavior (inherited from str)."""
+
+    def test_str_operations(self):
+        """Test that StringField behaves like a string."""
+        sf = StringField("test")
+        assert len(sf) == 4, f"Expected len(sf) to be 4, but got {len(sf)}"
+        assert (
+            sf.upper() == "TEST"
+        ), f"Expected sf.upper() to be 'TEST', but got {sf.upper()!r}"
+        assert (
+            sf.lower() == "test"
+        ), f"Expected sf.lower() to be 'test', but got {sf.lower()!r}"
+        assert sf.startswith(
+            "te"
+        ), f"Expected sf.startswith('te') to be True, but got {sf.startswith('te')}"
+
+    def test_bool_with_non_empty_string(self):
+        """Test __bool__ with non-empty string (inherited from str)."""
+        sf = StringField("test")
+        assert bool(sf) is True, f"Expected bool(sf) to be True, but got {bool(sf)}"
+
+    def test_bool_with_empty_string(self):
+        """Test __bool__ with empty string (inherited from str)."""
+        sf = StringField("")
+        assert bool(sf) is False, f"Expected bool(sf) to be False, but got {bool(sf)}"
+
+    def test_bool_with_whitespace(self):
+        """Test __bool__ with whitespace-only string (inherited from str)."""
+        sf = StringField("   ")
+        assert (
+            bool(sf) is True
+        ), f"Expected bool(sf) to be True for whitespace, but got {bool(sf)}"
+
+
+class TestStringFieldEdgeCases:
+    """Test StringField edge cases."""
+
+    def test_value_preserved_after_normalize(self):
+        """Test that original value is preserved after normalize."""
+        sf = StringField("ORIGINAL")
+        normalized = sf.normalize()
+        assert (
+            sf == "ORIGINAL"
+        ), f"Expected original sf to remain 'ORIGINAL', but got {sf!r}"
+        assert (
+            normalized == "original"
+        ), f"Expected normalized to be 'original', but got {normalized!r}"
+
+    def test_multiple_normalize_calls(self):
+        """Test multiple normalize calls."""
+        sf = StringField("  TEST  ")
+        norm1 = sf.normalize()
+        norm2 = norm1.normalize()
+        assert norm1 == "test", f"Expected norm1 to be 'test', but got {norm1!r}"
+        assert norm2 == "test", f"Expected norm2 to be 'test', but got {norm2!r}"
+
+    def test_equality_chain(self):
+        """Test equality chain with multiple StringFields."""
+        sf1 = StringField("test")
+        sf2 = StringField("test")
+        sf3 = StringField("test")
+        assert (
+            sf1 == sf2 == sf3
+        ), f"Expected all StringFields to be equal, but they are not (sf1={sf1!r}, sf2={sf2!r}, sf3={sf3!r})"
+
+    def test_normalize_with_different_lowercase_settings(self):
+        """Test normalize with different lowercase settings."""
+        sf = StringField("TEST")
+        norm1 = sf.normalize(lowercase=True)
+        norm2 = sf.normalize(lowercase=False)
+        assert norm1 == "test", f"Expected norm1 to be 'test', but got {norm1!r}"
+        assert norm2 == "TEST", f"Expected norm2 to be 'TEST', but got {norm2!r}"
+
+    def test_string_concatenation(self):
+        """Test that StringField can be concatenated like a string."""
+        sf1 = StringField("hello")
+        sf2 = StringField("world")
+        result = sf1 + " " + sf2
+        assert (
+            result == "hello world"
+        ), f"Expected result to be 'hello world', but got {result!r}"
+        # UserString concatenation returns a new instance of the same class
+        assert isinstance(
+            result, StringField
+        ), f"Expected result to be a StringField instance, but got {type(result)}"
+        assert (
+            result.data == "hello world"
+        ), f"Expected result.data to be 'hello world', but got {result.data!r}"
diff --git a/tests/unit/test_unit.py b/tests/unit/test_unit.py
new file mode 100644
index 0000000..8ff4dfa
--- /dev/null
+++ b/tests/unit/test_unit.py
@@ -0,0 +1,271 @@
+import math
+
+import pytest
+
+from flowmapper.unit import UnitField
+
+
+def test_equals_mass():
+    u1 = UnitField("kg")
+    u2 = UnitField("kilogram")
+
+    assert (
+        u1 == u2
+    ), f"Expected u1 to equal u2, but they are not equal (u1={u1!r}, u2={u2!r})"
+
+
+def test_energy():
+    u1 = UnitField("kilowatt hour")
+    u2 = UnitField("MJ")
+    assert u1.compatible(
+        u2
+    ), f"Expected u1 to be compatible with u2, but they are not (u1={u1!r}, u2={u2!r})"
+    assert (
+        u1.conversion_factor(u2) == 3.6
+    ), f"Expected u1.conversion_factor(u2) to be 3.6, but got {u1.conversion_factor(u2)}"
+
+
+def test_enrichment():
+    u1 = UnitField("SWU")
+    u2 = UnitField("tonne * SW")
+    assert u1.compatible(
+        u2
+    ), f"Expected u1 to be compatible with u2, but they are not (u1={u1!r}, u2={u2!r})"
+    assert (
+        u1.conversion_factor(u2) == 1e-3
+    ), f"Expected u1.conversion_factor(u2) to be 1e-3, but got {u1.conversion_factor(u2)}"
+
+
+def test_natural_gas():
+    u1 = UnitField("nm3")
+    u2 = UnitField("sm3")
+    assert u1.compatible(
+        u2
+    ), f"Expected u1 to be compatible with u2, but they are not (u1={u1!r}, u2={u2!r})"
+
+
+def test_livestock():
+    u1 = UnitField("LU")
+    u2 = UnitField("livestock unit")
+    assert (
+        u1.normalize() == u2.normalize()
+    ), f"Expected u1 to equal u2, but they are not equal (u1={u1!r}, u2={u2!r})"
+
+
+def test_freight():
+    u1 = UnitField("kilogram * km")
+    u2 = UnitField("tkm")
+    assert (
+        u1.conversion_factor(u2) == 1e-3
+    ), f"Expected u1.conversion_factor(u2) to be 1e-3, but got {u1.conversion_factor(u2)}"
+
+
+def test_vehicular_travel():
+    u1 = UnitField("vehicle * m")
+    u2 = UnitField("vkm")
+    assert (
+        u1.conversion_factor(u2) == 1e-3
+    ), f"Expected u1.conversion_factor(u2) to be 1e-3, but got {u1.conversion_factor(u2)}"
+
+
+def test_person_travel():
+    u1 = UnitField("person * m")
+    u2 = UnitField("pkm")
+    assert (
+        u1.conversion_factor(u2) == 1e-3
+    ), f"Expected u1.conversion_factor(u2) to be 1e-3, but got {u1.conversion_factor(u2)}"
+
+
+def test_conversion_factor():
+    u1 = UnitField("mg")
+    u2 = UnitField("kg")
+    actual = u1.conversion_factor(u2)
+    assert actual == 1e-06, f"Expected actual to be 1e-06, but got {actual}"
+
+
+def test_nan_conversion_factor():
+    u1 = UnitField("bq")
+    u2 = UnitField("kg")
+    actual = u1.conversion_factor(u2)
+    assert math.isnan(actual), f"Expected actual to be NaN, but got {actual}"
+
+
+def test_complex_conversions():
+    u1 = UnitField("square_meter_year / t")
+    u2 = UnitField("(meter ** 2 * month) / kg")
+    assert (
+        u1.conversion_factor(u2) == 0.012
+    ), f"Expected u1.conversion_factor(u2) to be 0.012, but got {u1.conversion_factor(u2)}"
+
+
+class TestUnitFieldNormalize:
+    """Test UnitField normalize method."""
+
+    def test_normalize_with_valid_unit(self):
+        """Test normalize with valid unit."""
+        u = UnitField("kg")
+        normalized = u.normalize()
+        assert (
+            normalized == "kilogram"
+        ), f"Expected normalized to be 'kilogram', but got {normalized!r}"
+        assert isinstance(
+            normalized, UnitField
+        ), f"Expected normalized to be a UnitField instance, but got {type(normalized)}"
+
+    def test_normalize_with_mapped_unit(self):
+        """Test normalize with unit that needs mapping."""
+        # This tests the UNIT_MAPPING functionality
+        u = UnitField("kilogram")
+        normalized = u.normalize()
+        # The unit should be normalized through UNIT_MAPPING if applicable
+        assert isinstance(
+            normalized, UnitField
+        ), f"Expected normalized to be a UnitField instance, but got {type(normalized)}"
+
+    def test_normalize_raises_error_undefined_unit(self):
+        """Test normalize raises error for undefined unit."""
+        u = UnitField("unknown_unit_xyz")
+        with pytest.raises(ValueError, match="is unknown"):
+            u.normalize()
+
+
+class TestUnitFieldEq:
+    """Test UnitField __eq__ method."""
+
+    def test_eq_with_same_data(self):
+        """Test equality with same data."""
+        u1 = UnitField("kg")
+        u2 = UnitField("kg")
+        assert (
+            u1 == u2
+        ), f"Expected u1 to equal u2, but they are not equal (u1={u1!r}, u2={u2!r})"
+
+    def test_eq_with_different_data_same_unit(self):
+        """Test equality with different data but same unit (conversion_factor == 1)."""
+        u1 = UnitField("kg")
+        u2 = UnitField("kilogram")
+        assert (
+            u1 == u2
+        ), f"Expected u1 to equal u2, but they are not equal (u1={u1!r}, u2={u2!r})"
+
+    def test_eq_with_different_units(self):
+        """Test equality with different units."""
+        u1 = UnitField("kg")
+        u2 = UnitField("g")
+        assert (
+            u1 != u2
+        ), f"Expected u1 to not equal u2, but they are equal (u1={u1!r}, u2={u2!r})"
+
+    def test_eq_with_string(self):
+        """Test equality with string."""
+        u = UnitField("kg")
+        assert u == "kg", f"Expected u to equal 'kg', but they are not equal (u={u!r})"
+        assert u != "g", f"Expected u to not equal 'g', but they are equal (u={u!r})"
+
+    def test_eq_with_other_type(self):
+        """Test equality with other types."""
+        u = UnitField("kg")
+        assert u != 123, f"Expected u to not equal 123, but they are equal (u={u!r})"
+        assert u != None, f"Expected u to not equal None, but they are equal (u={u!r})"
+        assert u != [], f"Expected u to not equal [], but they are equal (u={u!r})"
+
+
+class TestUnitFieldCompatible:
+    """Test UnitField compatible method."""
+
+    def test_compatible_with_compatible_units(self):
+        """Test compatible with compatible units."""
+        u1 = UnitField("kg")
+        u2 = UnitField("g")
+        assert u1.compatible(
+            u2
+        ), f"Expected u1 to be compatible with u2, but they are not (u1={u1!r}, u2={u2!r})"
+
+    def test_compatible_with_incompatible_units(self):
+        """Test compatible with incompatible units."""
+        u1 = UnitField("kg")
+        u2 = UnitField("meter")
+        assert not u1.compatible(
+            u2
+        ), f"Expected u1 to not be compatible with u2, but they are (u1={u1!r}, u2={u2!r})"
+
+    def test_compatible_with_same_unit(self):
+        """Test compatible with same unit."""
+        u1 = UnitField("kg")
+        u2 = UnitField("kg")
+        assert u1.compatible(
+            u2
+        ), f"Expected u1 to be compatible with u2, but they are not (u1={u1!r}, u2={u2!r})"
+
+    def test_compatible_with_non_unitfield(self):
+        """Test compatible with non-UnitField type."""
+        u1 = UnitField("kg")
+        # Strings are now supported and work with compatible()
+        assert u1.compatible(
+            "kg"
+        ), f"Expected u1 to be compatible with 'kg' string (strings are now supported), but it is not (u1={u1!r})"
+        # Non-string, non-UnitField types should return False
+        assert not u1.compatible(
+            123
+        ), f"Expected u1 to not be compatible with 123, but it is (u1={u1!r})"
+
+
+class TestUnitFieldConversionFactor:
+    """Test UnitField conversion_factor method."""
+
+    def test_conversion_factor_with_same_data(self):
+        """Test conversion_factor with same data."""
+        u1 = UnitField("kg")
+        u2 = UnitField("kg")
+        result = u1.conversion_factor(u2)
+        assert result == 1.0, f"Expected conversion_factor to be 1.0, but got {result}"
+
+    def test_conversion_factor_with_non_unitfield(self):
+        """Test conversion_factor with non-UnitField type."""
+        u1 = UnitField("kg")
+        # Strings are now supported and work with conversion_factor()
+        result = u1.conversion_factor("kg")
+        assert (
+            result == 1.0
+        ), f"Expected conversion_factor to be 1.0 for same unit string, but got {result}"
+        # Non-string, non-UnitField types should return NaN
+        result2 = u1.conversion_factor(123)
+        assert math.isnan(
+            result2
+        ), f"Expected conversion_factor to be NaN for non-UnitField, non-string type, but got {result2}"
+
+    def test_conversion_factor_with_undefined_unit(self):
+        """Test conversion_factor with undefined unit."""
+        u1 = UnitField("kg")
+        u2 = UnitField("unknown_unit_xyz")
+        result = u1.conversion_factor(u2)
+        assert math.isnan(
+            result
+        ), f"Expected conversion_factor to be NaN for undefined unit, but got {result}"
+
+    def test_conversion_factor_with_dimensionality_error(self):
+        """Test conversion_factor with dimensionality error."""
+        u1 = UnitField("kg")
+        u2 = UnitField("meter")
+        result = u1.conversion_factor(u2)
+        assert math.isnan(
+            result
+        ), f"Expected conversion_factor to be NaN for incompatible units, but got {result}"
+
+    def test_conversion_factor_zero_to_one(self):
+        """Test conversion_factor from zero to one."""
+        u1 = UnitField("mg")
+        u2 = UnitField("kg")
+        result = u1.conversion_factor(u2)
+        assert (
+            result == 1e-06
+        ), f"Expected conversion_factor to be 1e-06, but got {result}"
+
+    def test_conversion_factor_one_to_zero(self):
+        """Test conversion_factor from one to zero."""
+        u1 = UnitField("kg")
+        u2 = UnitField("mg")
+        result = u1.conversion_factor(u2)
+        assert (
+            result == 1e06
+        ), f"Expected conversion_factor to be 1e06, but got {result}"