From e1cf658bb241be348cb3fb6096f298388b167038 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Wed, 28 May 2025 10:03:56 +0000 Subject: [PATCH 01/11] fixed old functions for embeddings --- docs/usage/mutation_analysis.ipynb | 78 +++++++++++++++--------------- pyproject.toml | 1 + src/pyeed/embedding.py | 68 +++++++++++++++++++++++++- 3 files changed, 105 insertions(+), 42 deletions(-) diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb index ac608881..c51d655d 100644 --- a/docs/usage/mutation_analysis.ipynb +++ b/docs/usage/mutation_analysis.ipynb @@ -11,18 +11,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "import sys\n", "\n", @@ -47,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -55,7 +46,7 @@ "output_type": "stream", "text": [ "📡 Connected to database.\n", - "The provided date does not match the current date. Date is you gave is 2025-03-19 actual date is 2025-04-09\n" + "All data has been wiped from the database.\n" ] } ], @@ -66,7 +57,7 @@ "\n", "eedb = Pyeed(uri, user=user, password=password)\n", "\n", - "eedb.db.wipe_database(date=\"2025-03-19\")" + "eedb.db.wipe_database(date=\"2025-05-16\")" ] }, { @@ -85,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -111,21 +102,18 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/rich/live.py:231: UserWarning: install \n",
-       "\"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], + "application/vnd.jupyter.widget-view+json": { + "model_id": "2dec96f51ab84ce3af3750b48065738d", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/rich/live.py:231: UserWarning: install \n", - "\"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" + "Output()" ] }, "metadata": {}, @@ -135,8 +123,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "Region ids: [5206, 5205, 5203, 5201, 5207]\n", - "len of ids: 5\n" + "Region ids: [849, 843, 848, 842, 847, 841, 846, 839, 850, 844]\n", + "len of ids: 5\n", + "Number of existing pairs: 0\n", + "Number of total pairs: 4\n", + "Number of pairs to align: 4\n" ] }, { @@ -200,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -217,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -253,14 +244,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'from_positions': [272, 241, 125], 'to_positions': [272, 241, 125], 'from_monomers': ['D', 'R', 'V'], 'to_monomers': ['N', 'S', 'I']}\n" + "{'from_positions': [241, 272, 125], 'to_positions': [241, 272, 125], 'from_monomers': ['R', 'D', 'V'], 'to_monomers': ['S', 'N', 'I']}\n" ] } ], @@ -298,21 +289,21 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Mutation on position 17 -> 17 with a nucleotide change of T -> C\n", - "Mutation on position 395 -> 395 with a nucleotide change of T -> G\n", - "Mutation on position 198 -> 198 with a nucleotide change of C -> A\n", - "Mutation on position 716 -> 716 with a nucleotide change of G -> A\n", - "Mutation on position 705 -> 705 with a nucleotide change of G -> A\n", - "Mutation on position 473 -> 473 with a nucleotide change of T -> C\n", - "Mutation on position 720 -> 720 with a nucleotide change of A -> C\n", - "Mutation on position 137 -> 137 with a nucleotide change of A -> G\n" + "Mutation on position 474 -> 474 with a nucleotide change of T -> C\n", + "Mutation on position 199 -> 199 with a nucleotide change of C -> A\n", + "Mutation on position 138 -> 138 with a nucleotide change of A -> G\n", + "Mutation on position 18 -> 18 with a nucleotide change of T -> C\n", + "Mutation on position 396 -> 396 with a nucleotide change of T -> G\n", + "Mutation on position 721 -> 721 with a nucleotide change of A -> C\n", + "Mutation on position 706 -> 706 with a nucleotide change of G -> A\n", + "Mutation on position 717 -> 717 with a nucleotide change of G -> A\n" ] } ], @@ -323,6 +314,13 @@ " )" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/pyproject.toml b/pyproject.toml index 7ec555b0..94635913 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ crc64iso = "0.0.2" SPARQLWrapper = "2.0.0" pysam = "0.23.0" types-requests = "2.32.0.20250328" +ipywidgets = "^8.1.7" [tool.poetry.group.dev.dependencies] mkdocstrings = {extras = ["python"], version = "^0.26.2"} diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py index 28f66a1b..a45ce85c 100644 --- a/src/pyeed/embedding.py +++ b/src/pyeed/embedding.py @@ -96,7 +96,7 @@ def process_batches_on_gpu( def load_model_and_tokenizer( model_name: str, - device: torch.device, + device: torch.device = torch.device("cuda:0"), ) -> Tuple[Any, Union[Any, None], torch.device]: """ Loads the model and assigns it to a specific GPU. @@ -218,7 +218,7 @@ def get_batch_embeddings( def calculate_single_sequence_embedding_last_hidden_state( sequence: str, - device: torch.device, + device: torch.device = torch.device("cuda:0"), model_name: str = "facebook/esm2_t33_650M_UR50D", ) -> NDArray[np.float64]: """ @@ -369,10 +369,74 @@ def get_single_embedding_all_layers( return np.array(embeddings_list) +def calculate_single_sequence_embedding_first_layer( + sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D", device: torch.device = torch.device("cuda:0"), +) -> NDArray[np.float64]: + """ + Calculates an embedding for a single sequence using the first layer. + """ + model, tokenizer, device = load_model_and_tokenizer(model_name, device) + return get_single_embedding_first_layer(sequence, model, tokenizer, device) + # The rest of your existing functions will need to be adapted in a similar way # if they interact with the model or tokenizer directly +def get_single_embedding_first_layer( + sequence: str, model: Any, tokenizer: Any, device: torch.device +) -> NDArray[np.float64]: + """ + Generates normalized embeddings for each token in the sequence across all layers. + """ + embeddings_list = [] + + with torch.no_grad(): + if isinstance(model, ESMC): + # ESM-3 logic + from esm.sdk.api import ESMProtein, LogitsConfig + + protein = ESMProtein(sequence=sequence) + protein_tensor = model.encode(protein) + logits_output = model.logits( + protein_tensor, + LogitsConfig( + sequence=True, + return_embeddings=True, + return_hidden_states=True, + ), + ) + if logits_output.hidden_states is None: + raise ValueError( + "Model did not return hidden states. Check LogitsConfig settings." + ) + embedding = ( + logits_output.hidden_states[0][0].to(torch.float32).cpu().numpy() + ) + + elif isinstance(model, ESM3): + # ESM-3 logic + from esm.sdk.api import ESMProtein, SamplingConfig + + protein = ESMProtein(sequence=sequence) + protein_tensor = model.encode(protein) + embedding = model.forward_and_sample( + protein_tensor, + SamplingConfig(return_per_residue_embeddings=True), + ) + if embedding is None or embedding.per_residue_embedding is None: + raise ValueError("Model did not return embeddings") + embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy() + else: + # ESM-2 logic + inputs = tokenizer(sequence, return_tensors="pt").to(device) + outputs = model(**inputs, output_hidden_states=True) + # Get the first layer's hidden states for all residues (excluding special tokens) + embedding = outputs.hidden_states[0][0, 1:-1, :].detach().cpu().numpy() + + # Ensure embedding is a numpy array and normalize it + embedding = np.asarray(embedding, dtype=np.float64) + embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True) + return embedding def free_memory() -> None: """ From c96b543765b466e0882c78f0f91a33db8e099034 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 29 May 2025 12:17:06 +0000 Subject: [PATCH 02/11] major refactor of all embeddings related thing new strucutre in many places, old ways are still combatible wit the embdedinng_refactored --- docs/usage/embedding_different_models.ipynb | 329 +++++++++++++ docs/usage/embeddings_analysis.ipynb | 209 ++++----- pyproject.toml | 1 + src/pyeed/embedding.py | 191 +++++++- src/pyeed/embedding_refactored.py | 251 ++++++++++ src/pyeed/embeddings/__init__.py | 106 +++++ src/pyeed/embeddings/base.py | 121 +++++ src/pyeed/embeddings/database.py | 41 ++ src/pyeed/embeddings/factory.py | 67 +++ src/pyeed/embeddings/models/__init__.py | 17 + src/pyeed/embeddings/models/esm2.py | 172 +++++++ src/pyeed/embeddings/models/esm3.py | 191 ++++++++ src/pyeed/embeddings/models/esmc.py | 267 +++++++++++ src/pyeed/embeddings/models/prott5.py | 241 ++++++++++ src/pyeed/embeddings/processor.py | 482 ++++++++++++++++++++ src/pyeed/embeddings/utils.py | 77 ++++ src/pyeed/main.py | 130 +++--- 17 files changed, 2675 insertions(+), 218 deletions(-) create mode 100644 docs/usage/embedding_different_models.ipynb create mode 100644 src/pyeed/embedding_refactored.py create mode 100644 src/pyeed/embeddings/__init__.py create mode 100644 src/pyeed/embeddings/base.py create mode 100644 src/pyeed/embeddings/database.py create mode 100644 src/pyeed/embeddings/factory.py create mode 100644 src/pyeed/embeddings/models/__init__.py create mode 100644 src/pyeed/embeddings/models/esm2.py create mode 100644 src/pyeed/embeddings/models/esm3.py create mode 100644 src/pyeed/embeddings/models/esmc.py create mode 100644 src/pyeed/embeddings/models/prott5.py create mode 100644 src/pyeed/embeddings/processor.py create mode 100644 src/pyeed/embeddings/utils.py diff --git a/docs/usage/embedding_different_models.ipynb b/docs/usage/embedding_different_models.ipynb new file mode 100644 index 00000000..b494ef97 --- /dev/null +++ b/docs/usage/embedding_different_models.ipynb @@ -0,0 +1,329 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Protein Embedding with different models\n", + "\n", + "This notebook demonstrates how to calculate embeddings with different models." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-05-29 12:01:28.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.embeddings.processor\u001b[0m:\u001b[36m_initialize_devices\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mInitialized 3 GPU device(s): [device(type='cuda', index=0), device(type='cuda', index=1), device(type='cuda', index=2)]\u001b[0m\n" + ] + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import sys\n", + "import numpy as np\n", + "import pandas as pd\n", + "from loguru import logger\n", + "\n", + "from pyeed import Pyeed\n", + "from pyeed.embeddings import get_processor\n", + "\n", + "from sklearn.decomposition import PCA\n", + "\n", + "logger.remove()\n", + "level = logger.add(sys.stderr, level=\"ERROR\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pyeed Graph Object Mapping constraints not defined. Use _install_labels() to set up model constraints.\n", + "📡 Connected to database.\n", + "All data has been wiped from the database.\n" + ] + } + ], + "source": [ + "uri = \"bolt://129.69.129.130:7688\"\n", + "user = \"neo4j\"\n", + "password = \"12345678\"\n", + "\n", + "eedb = Pyeed(uri, user=user, password=password)\n", + "eedb.db.wipe_database(date='2025-05-29')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The data has the following columns:\n", + "Index(['protein_name', 'phenotype', 'protein_id', 'protein_id_database'], dtype='object')\n" + ] + } + ], + "source": [ + "# these are example ids\n", + "df = pd.read_csv(\"resources/data_example.csv\", delimiter=\";\")\n", + "print(\"The data has the following columns:\")\n", + "print(df.columns)\n", + "\n", + "# create a dict with protein_id_database as key and phenotype as value\n", + "dict_data = dict(zip(df[\"protein_id_database\"], df[\"phenotype\"]))\n", + "data_ids = df[\"protein_id_database\"].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# now fecth all of the proteins from the database\n", + "eedb.fetch_from_primary_db(data_ids, db=\"ncbi_protein\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First sequence (first 10 AA): MSIQHFRVAL with length 286 and id AAP20891.1\n" + ] + } + ], + "source": [ + "query = \"MATCH (p:Protein) WHERE p.accession_id IN $protein_ids RETURN p.accession_id, p.sequence\"\n", + "\n", + "results = eedb.db.execute_read(query, parameters={\"protein_ids\": data_ids})\n", + "sequences = [result[\"p.sequence\"] for result in results]\n", + "\n", + "data = [(data_ids[i], sequences[i]) for i in range(len(data_ids))]\n", + "print(f\"First sequence (first 10 AA): {sequences[0][:10]} with length {len(sequences[0])} and id {data_ids[0]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "model_names = model_name_list = [\"esmc_300m\", \"facebook/esm2_t33_650M_UR50D\", 'prot_t5_xl_uniref50','facebook/esm2_t6_8M_UR50D']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e75dae63f3f740b2b6d95da33c196de5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 4 files: 0%| | 0/4 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/modeling_utils.py:3437: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", + " warnings.warn(\n", + "Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t33_650M_UR50D and are newly initialized: ['esm.pooler.dense.bias', 'esm.pooler.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1899: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embeddings shape: (68, 1280)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHHCAYAAACvJxw8AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAblJJREFUeJzt3XdYU9f/B/B3QFkCQWQErIobEbeFYq0TBbWuuutuv7ZFrXXVqq2DLmu1aq3W1ap1V2tdraUqjg5x1FUVXIiboaAsBYSc3x/8khozSCAhCbxfz5NHuffk5twLyf3kjM+RCCEEiIiIiEgjG3NXgIiIiMiSMVgiIiIi0oHBEhEREZEODJaIiIiIdGCwRERERKQDgyUiIiIiHRgsEREREenAYImIiIhIBwZLRERERDowWKJySSKRYPbs2eauhoqTJ0+iVatWqFSpEiQSCc6ePau17NWrV9G5c2dIpVJIJBLs3Lmz1Or5PIlEgrFjx5b66x4+fBgSiQQ//fRTqb82EZUvDJbIYGvXroVEIlE+HBwcUK9ePYwdOxbJyclq5ZOTkzF58mT4+/vDyckJlSpVQosWLfDpp5/i0aNHGl8jKCgIEokEy5Yt07teN27cUKmXra0tqlevjt69e+sMPAwRGxuL2bNn48aNG0Y5nsLTp0/Rr18/pKWlYeHChVi/fj1q1Kihtfzw4cNx/vx5fPbZZ1i/fj1atmxp1PqURXK5HJ6envjyyy/NXRU10dHReOONN1CvXj04OTmhVq1a+N///ofExESDj7V3716tXwQmTJiA5s2bw93dHU5OTmjQoAFmz56NrKwslXIXL15Ev379UKtWLTg5OcHDwwNt2rTBnj17inN6AIDTp0+jR48eytcODAzE4sWLVcq0a9dO5T2seISHh6sdLzc3Fx988AF8fX3h6OiI4OBg7N+/X62cn58fJBIJQkNDNdZr1apVytf5559/9D6fooL1sWPHQiKRaKyL4lGpUiUEBQVh3bp1as9//vPs2ceWLVvUysfFxSE8PBzOzs5wd3fH0KFDcf/+fY11Vjzs7e3h7e2Ndu3a4fPPP1crT/+pYO4KkPX6+OOPUbNmTeTk5OCvv/7CsmXLsHfvXly4cAFOTk4ACltLunbtiqysLAwZMgQtWrQAAPzzzz/44osv8Mcff2Dfvn0qx7169SpOnjwJPz8/bNy4EREREQbVa9CgQejatSsKCgoQFxeHZcuW4bfffsOxY8fQtGnTEp1zbGwsIiMj0a5dO/j5+ZXoWM+Kj4/HzZs3sWrVKvzvf//TWfbJkyeIiYnBhx9+aJYWHWt14sQJPHjwAN26dTN3VdR88MEHSEtLQ79+/VC3bl1cv34dS5YswS+//IKzZ89CJpPpfay9e/di6dKlGgOmkydP4pVXXsHIkSPh4OCAM2fO4IsvvsCBAwfwxx9/wMam8PvzzZs3kZmZieHDh8PX1xePHz/G9u3b0aNHD6xYsQJvvfWWQee3b98+dO/eHc2aNcOMGTPg7OyM+Ph43LlzR63sCy+8gDlz5qhs8/X1VSs3YsQI/PTTTxg/fjzq1q2LtWvXomvXrjh06BBat26tUtbBwQGHDh1CUlKS2rXcuHEjHBwckJOTY9A5FVfTpk0xadIkAEBiYiK+++47DB8+HLm5uRg1apRaecXn2bNCQkJUfr5z5w7atGkDqVSKzz//HFlZWZg/fz7Onz+PEydOwM7OTqX8uHHj8OKLL6KgoAD379/H0aNHMWvWLCxYsABbt25Fhw4djHzWZYAgMtCaNWsEAHHy5EmV7RMnThQAxKZNm4QQQjx8+FBUrVpVeHt7i7i4OLXjJCUliU8++URt+8yZM4WXl5fYvn27kEgkIiEhQa96JSQkCABi3rx5Ktt3794tAIi33npLuQ2AmDVrll7Hfda2bdsEAHHo0CGDn6vLkSNHBACxbdu2IsvevHlT43maCwAxZsyYUn/dQ4cO6X3NhBBixowZokaNGqatVDEdOXJEFBQUqG0DID788EODjjVmzBhhyEf7/PnzBQARExOjs1x+fr5o0qSJqF+/vkH1SU9PF97e3qJ3795q5/i8tm3bioYNGxZ5zOPHj6u9B548eSJq164tQkJCVMrWqFFDdOzYUbi6uopFixap7Lt9+7awsbERffr00fiZpktRf3+afg81atQQ3bp1U9mWkpIinJ2dRYMGDVS2a/s80yQiIkI4OjqKmzdvKrft379fABArVqzQq85nz54VXl5ews3NTdy7d6/I1yxv2A1HRqP4NpKQkAAAWLFiBe7evYsFCxbA399frby3tzc++ugjte2bNm1C37598eqrr0IqlWLTpk1GrZc2Z86cQZcuXeDq6gpnZ2d07NgRx44dU+5fu3Yt+vXrBwBo3769sin78OHDOo978OBBvPLKK6hUqRLc3NzQs2dPxMXFKfePGDECbdu2BQD069cPEokE7dq103is2bNnK7vn3n//fUgkEmUL182bNzF69GjUr18fjo6OqFKlCvr166exy/DRo0eYMGEC/Pz8YG9vjxdeeAHDhg3DgwcPlGVyc3Mxa9Ys1KlTB/b29qhWrRqmTJmC3NxcjXXbuHEj6tevDwcHB7Ro0QJ//PGHWpmirrHC9evX0a9fP2WXzUsvvYRff/1V4+s+Kzc3V/l3c/ToUZV9v/76q1qr0m+//ab83bi4uKBbt264ePGiSpmkpCSMHDkSL7zwAuzt7eHj44OePXuqXFc/Pz+8+uqrOHz4MFq2bAlHR0c0atRI+bfx888/o1GjRsprc+bMGZXXaNOmjbJV59lt7u7uKn8rRRkxYgSWLl0KACrdLboo/n60dYkr2Nraolq1akWWe96mTZuQnJyMzz77DDY2NsjOzoZcLtf5nPz8fLWuwWf99NNPsLW1VWnhcnBwwJtvvomYmBjcvn1bpbyDgwNee+01tc+SzZs3o3LlyggLCzPonIzJ09MT/v7+iI+P11omOzsbeXl5Wvdv374dr776KqpXr67cFhoainr16mHr1q161aNJkyZYtGgRHj16hCVLluh/AuUEgyUyGsWbvUqVKgCA3bt3w9HREX379tX7GMePH8e1a9cwaNAg2NnZ4bXXXsPGjRuNWi9NLl68iFdeeQXnzp3DlClTMGPGDCQkJKBdu3Y4fvw4gMKb17hx4wAA06dPx/r167F+/Xo0aNBA63EPHDiAsLAwpKSkYPbs2Zg4cSKOHj2Kl19+WXmzffvttzF9+nQAhc3j69evx4cffqjxeK+99hoWLlwIoLB5fv369Vi0aBGAwi6Wo0ePYuDAgVi8eDHeeecdREdHo127dnj8+LHyGFlZWXjllVfwzTffoHPnzvj666/xzjvv4NKlS8puEblcjh49emD+/Pno3r07vvnmG/Tq1QsLFy7EgAED1Op15MgRjB8/HkOGDMHHH3+M1NRUhIeH48KFCwZdY6BwjFurVq3w+++/Y/To0fjss8+Qk5ODHj16YMeOHVqv9ZMnT9C9e3ccPXoUBw4cQKtWrZT7kpKScObMGZXujPXr16Nbt25wdnbG3LlzMWPGDMTGxqJ169YqgVCfPn2wY8cOjBw5Et9++y3GjRuHzMxM3Lp1S+X1r127htdffx3du3fHnDlz8PDhQ3Tv3h0bN27EhAkTMGTIEERGRiI+Ph79+/cvMmDIyspCVlYWPDw8dJZ71ttvv41OnTopz0/xeFZ+fj4ePHiAe/fuYd++ffjoo4/g4uKCoKAgteNlZ2fjwYMHiI+Px8KFC/Hbb7+hY8eOetcHKHwPuLq64u7du6hfvz6cnZ3h6uqKiIgIjV1fV65cUQavMpkMM2bMwNOnT1XKnDlzBvXq1YOrq6vKdsU5aBqj+Prrr+PEiRMqQYnii1nFihUNOidjys/Px507d1C5cmWN+yMjI+Hs7AwHBwe8+OKLasMW7t69i5SUFI3jFoOCgtQCc1369u0LR0dHtdcgsBuODKfohjtw4IC4f/++uH37ttiyZYuoUqWKcHR0FHfu3BFCCFG5cmXRpEkTg449duxYUa1aNSGXy4UQQuzbt08AEGfOnCnyuYpm68jISHH//n2RlJQkDh8+LJo1ayYAiO3btyvL4rluuF69egk7OzsRHx+v3Hbv3j3h4uIi2rRpo9xmaDdc06ZNhZeXl0hNTVVuO3funLCxsRHDhg1TbjOkS0lb8/zjx4/VysbExAgAYt26dcptM2fOFADEzz//rFZecd3Xr18vbGxsxJ9//qmyf/ny5QKA+Pvvv5XbAAgA4p9//lFuu3nzpnBwcBC9e/dWbtP3Go8fP14AUHntzMxMUbNmTeHn56fsynn2mmVmZoq2bdsKDw8PjX8r33//vXB0dFReo8zMTOHm5iZGjRqlUi4pKUlIpVLl9ocPH+rVFVKjRg0BQBw9elS57ffffxcA1LpHVqxYodff0CeffCIAiOjoaJ3lnldUN5zib0LxqF+/vta6vP3228pyNjY2om/fviItLc2g+jRu3Fg4OTkJJycn8e6774rt27eLd999VwAQAwcOVCn7xhtviNmzZ4vt27eLdevWiR49eggAon///irlGjZsKDp06KD2WhcvXhQAxPLly5XbFF1f+fn5QiaTKbv+Y2NjBQBx5MgRrUMLdCluN1znzp3F/fv3xf3798X58+fF0KFDNXZl37x5U3Tu3FksW7ZM7N69WyxatEhUr15d2NjYiF9++UVZ7uTJk2rvcYX3339fABA5OTl61VkIIZo0aSIqV66s93UoLxgskcEUHyzPP2rUqCGioqKU5WxtbUXr1q31Pu7Tp0+Fp6enmDx5snJbfn6+8PLyUtmmjSKIeP7h6uoq5s6dq1L22WApPz9fODk5qX0gC1F4s7CxsRHp6elCCMOCpXv37gkAYsqUKWr7wsLChIeHh/JnYwRLz8rLyxMPHjwQ9+/fF25ubmL8+PHKfQ0bNiwyiO3Ro4do2LCh8kNd8bhy5YoAID799FNlWQBq40SEEGLAgAHCyclJ5OfnG3SN69WrJ4KCgtTKzZkzRwAQ58+fF0L8d82+++47ERISIry9vcWFCxc0nk+fPn1E165dlT///PPPAoA4ePCg2jl27txZ1KlTRwghRE5OjrCzsxPdunXTGSTUqFFDBAQEqGx79OiRAKA2RuXs2bMCgPj++++1Hu/IkSOiQoUKGq9XUYoKltLT08X+/fvFzp07xZQpU0Tz5s3Fnj17NJaNi4sT+/fvFz/88IPo1q2b6N27t0hKSjKoPrVq1RIAxDvvvKOyXRGIXblyRefzR40apTamqlatWqJLly5qZePj4wUAsXDhQuW2Z8cJjRs3Tvl7+vDDD5VfzEozWNL0GTVy5EiNX3ael5qaKry9vVXGjf3xxx8CgPjxxx/Vys+YMUMAEA8fPtSrzkII8fLLL4sKFSoUWZfyht1wVGxLly7F/v37cejQIcTGxuL69esqff+urq7IzMzU+3j79u3D/fv3ERQUhGvXruHatWtISEhA+/btsXnz5iK7LRTeeust7N+/H9HR0Th16hRSUlIwZcoUreXv37+Px48fo379+mr7GjRoALlcrjYGQh83b94EAK3HffDgAbKzsw0+rjZPnjzBzJkzUa1aNdjb28PDwwOenp549OgR0tPTleXi4+MRGBio81hXr17FxYsX4enpqfKoV68eACAlJUWlfN26ddWOUa9ePTx+/Bj379836BrfvHlTaznF/meNHz8eJ0+exIEDB9CwYUO15z19+hT79+9XGa909epVAIXj2Z4/x3379inPz97eHnPnzsVvv/0Gb29vtGnTBl9++SWSkpLUXufZ8SIAIJVKAQDVqlXTuP3hw4dqxwCAS5cuoXfv3ggMDMR3332nsUxJuLq6IjQ0FD179sTcuXMxadIk9OzZE+fOnVMr6+/vj9DQUAwbNgy//PILsrKy0L17dwgh9H49R0dHAIXdxs96/fXXAQAxMTE6n6+YOXbgwAGVY2oaO6fo1lO85vNef/11xMbG4ty5c9i0aRMGDhxY5JguY1OkOIiKisL8+fPh5uaGhw8fqs1Y08Td3R0jR47E5cuXlV3minMtzvXQJCsrCy4uLnqXLy+YOoCKLSgoSGd+H39/f5w9exZ5eXl6fRAoxib1799f4/4jR46gffv2RR6nbt26WnOqlGXvvvsu1qxZg/HjxyMkJESZsHLgwIF6B5oKcrkcjRo1woIFCzTufz4AMKeePXtiy5Yt+OKLL7Bu3Tq1gdJ//fUXMjIyVMYrKa7H+vXrNU7Lr1Dhv4/G8ePHo3v37ti5cyd+//13zJgxA3PmzMHBgwfRrFkzZTlbW1uN9dO2XVPAcfv2bWWy0b1795bKTeu1117D0KFDsWXLFjRp0kRn2b59++Ltt9/GlStXNAa0mvj6+uLixYvw9vZW2e7l5QVAe9CooPhbS0tLU27z8fHB3bt31coq8lJpSjUAFAYqtWvXxvjx45GQkKAM2IrDwcEBQOGXFE0eP36sLPMsDw8P5edTWFgY/P398eqrr+Lrr7/GxIkTi3zdZ6/HCy+8AB8fHwDQmJMrMTER7u7usLe31+ucnj59iitXrhT5Zao8YrBEJtO9e3fExMRg+/btat8qn5ednY1du3ZhwIABGgeEjxs3Dhs3btQrWDKUp6cnnJyccPnyZbV9ly5dgo2NjfIDypBvoYpZa9qO6+HhgUqVKhWz1up++uknDB8+HF999ZVyW05Ojtrspdq1a6sMvNakdu3aOHfuHDp27KjXOStaap515coVODk5wdPTEwD0vsY1atTQWk6x/1m9evVC586dMWLECLi4uKglMv31118REBCgkherdu3aAApv2PoE1rVr18akSZMwadIkXL16FU2bNsVXX32FDRs2FPlcfaWmpqJz587Izc1FdHS08iZoKENbSnJzcyGXy1VaH7VRBAb6lFVo0aIF9u/frxzgrXDv3j0AUP59aHP9+nW1ck2bNsWhQ4eQkZGhMshbMVFAVz61QYMG4dNPP0WDBg1KlHdN1/tbsV1XYlmFbt26oW3btvj888/x9ttvF/mZ8Pz1qFq1Kjw9PTUm1Dxx4oRB5/jTTz/hyZMnZp0daKnYDUcm884778DHxweTJk3ClStX1PanpKTg008/BQDs2LED2dnZGDNmDPr27av2ePXVV7F9+3at09ZLwtbWFp07d8auXbtUZkElJydj06ZNaN26tfIDWfFBps/0aR8fHzRt2hQ//PCDSvkLFy5g3759aonmSsrW1latteKbb75BQUGByrY+ffrg3LlzGmeWKZ7fv39/3L17F6tWrVIr8+TJE7Xuw5iYGJw+fVr58+3bt7Fr1y507twZtra2Bl3jrl274sSJEyrdM9nZ2Vi5ciX8/PwQEBCgVqdhw4Zh8eLFWL58OT744AOVfXv37lVLGRAWFgZXV1d8/vnnajOtACgzGT9+/Fhtxlbt2rXh4uJi1L/F7OxsdO3aFXfv3sXevXs1dmvqS9vf6KNHjzSeq6Kr79lW4ue7WYHCVod169bB0dFR4+9AG0VL8ffff6/2uhUqVFCmycjIyFC7pkII5WfEszfwvn37oqCgACtXrlRuy83NxZo1axAcHKyz5fN///sfZs2apfKlojgU7+8NGzaoXetTp07h2LFj6NKli17H+uCDD5CamqryftOUTfvu3btYvXo1GjdurBJM9+nTB7/88ovKcIHo6GhcuXJFme6kKOfOncP48eNRuXJljBkzRq/nlCdsWSKTqVy5Mnbs2IGuXbuiadOmKhm8T58+jc2bNysz0W7cuBFVqlRRme79rB49emDVqlX49ddf8dprrxm9rp9++in279+P1q1bY/To0ahQoQJWrFiB3NxcleUxmjZtCltbW8ydOxfp6emwt7dHhw4dlF0Kz5s3bx66dOmCkJAQvPnmm3jy5Am++eYbSKVSo69N9+qrr2L9+vWQSqUICAhATEwMDhw4oJYy4f3338dPP/2Efv364Y033kCLFi2QlpaG3bt3Y/ny5WjSpAmGDh2KrVu34p133sGhQ4fw8ssvo6CgAJcuXcLWrVvx+++/q9xcAwMDERYWhnHjxsHe3h7ffvstgMJpzwr6XuOpU6di8+bN6NKlC8aNGwd3d3f88MMPSEhIwPbt29W62RTGjh2LjIwMfPjhh5BKpZg+fToSEhKUWdyf5erqimXLlmHo0KFo3rw5Bg4cCE9PT9y6dQu//vorXn75ZSxZsgRXrlxBx44d0b9/fwQEBKBChQrYsWMHkpOTMXDgwBL/zhQGDx6MEydO4I033kBcXJxKbiVnZ2f06tVL72Mp3mPjxo1DWFgYbG1tMXDgQBw+fBjjxo1D3759UbduXeTl5eHPP//Ezz//jJYtW2LIkCHKY7z99tvIyMhAmzZtULVqVSQlJWHjxo24dOkSvvrqKzg7O+tdn2bNmuGNN97A6tWrkZ+fj7Zt2+Lw4cPYtm0bpk2bpuwyO336NAYNGoRBgwahTp06ePLkCXbs2IG///4bb731Fpo3b648ZnBwMPr164dp06YhJSUFderUwQ8//IAbN26oBWXPq1GjhtHeewsWLEBYWBiaNm2KESNGwNfXF3FxcVi5ciV8fHwwbdo0vY7TpUsXBAYGYsGCBRgzZgwqVqyIKVOmID4+Hh07doSvry9u3LiBFStWIDs7G19//bXK86dPn45t27ahffv2eO+995CVlYV58+ahUaNGGDlypNrr/fnnn8jJyUFBQQFSU1Px999/Y/fu3ZBKpdixY4dBGePLDbMOLyerZOjMkXv37okJEyaIevXqCQcHB+Hk5CRatGghPvvsM5Geni6Sk5NFhQoVxNChQ7Ue4/Hjx8LJyUllKvrzDMl4Cw0ZvE+fPi3CwsKEs7OzcHJyEu3bt1eZCq6watUqUatWLWFra6vXzLgDBw6Il19+WTg6OgpXV1fRvXt3ERsbq1LGGLPhHj58KEaOHCk8PDyEs7OzCAsLE5cuXRI1atQQw4cPVymbmpoqxo4dK6pWrSrs7OzECy+8IIYPHy4ePHigLJOXlyfmzp0rGjZsKOzt7UXlypVFixYtRGRkpHLmmhD/ZfDesGGDqFu3rrC3txfNmjXTeF30vcbx8fGib9++ws3NTTg4OIigoCCV6dK6rtmUKVMEALFkyRKxZMkSIZVKxdOnTzVey0OHDomwsDAhlUqFg4ODqF27thgxYoQyDcKDBw/EmDFjhL+/v6hUqZKQSqUiODhYbN26VeU4mjIzP3ttnqXp96dtlhT+f5apIfLz88W7774rPD09hUQiUc7Iunbtmhg2bJioVauWcHR0FA4ODqJhw4Zi1qxZIisrS+UYmzdvFqGhocLb21tUqFBBVK5cWYSGhopdu3YZVBeFvLw8MXv2bFGjRg1RsWJFUadOHZUZa0IIcf36ddGvXz/h5+en8jmxfPlyZUqLZz158kRMnjxZyGQyYW9vL1588UWV2bgK2n43zyrObDiFY8eOiVdffVVUrlxZVKhQQVStWlX873//U6ZQ0bcua9euFQDEmjVrhBBCbNq0SbRp00Z4enqKChUqCA8PD9G7d29x6tQpjc+/cOGC6Ny5s3BychJubm5i8ODBajMXFe8ZxaNixYrC09NTtGnTRnz22WciJSXF4PMvLyRCGDCtgYjIinTt2hXOzs56ZzEmItKE3XBEVGa1a9cOr7zyirmrQURWji1LRERWID09Xes0dYXSHGuiWI5FF09PT62pEyxVXl6eSpoCTaRSqUG5i8j6sWWJiMgKvPfee/jhhx90linN777z589XGcCvSUJCgkrKBmtw9OjRIlOUrFmzBiNGjCidCpFFYMsSEZEViI2NVeYm0qY0k7Fev35dmfNHm9atW2tMzGjJHj58iFOnTuks07Bhw2LnwSLrxGCJiIiISAcmpSQiIiLSgWOWjEAul+PevXtwcXEp9UUZiYiIqHiEEMjMzISvr6/WhLcAgyWjuHfvnkUtLEpERET6u337Nl544QWt+xksGYFiZfDbt2+rLOpIRERElisjIwPVqlVT3se1YbBkBIquN1dXVwZLREREVqaoITRWN8B76dKl8PPzg4ODA4KDg3HixAmtZS9evIg+ffrAz88PEokEixYtUisze/ZsSCQSlYe/v78Jz4CIiIisiVUFSz/++CMmTpyIWbNm4fTp02jSpAnCwsKQkpKisfzjx49Rq1YtfPHFFzoz2zZs2BCJiYnKx19//WWqUyAiIiIrY1XB0oIFCzBq1CiMHDkSAQEBWL58OZycnLB69WqN5V988UXMmzcPAwcOhL29vdbjVqhQATKZTPnw8PAw1SkQERGRlbGaYCkvLw+nTp1SyVBrY2OD0NBQxMTElOjYV69eha+vL2rVqoXBgwfj1q1bOsvn5uYiIyND5UFERERlk9UESw8ePEBBQQG8vb1Vtnt7eyMpKanYxw0ODsbatWsRFRWFZcuWISEhAa+88goyMzO1PmfOnDmQSqXKB9MGEBERlV1WEyyZSpcuXdCvXz80btwYYWFh2Lt3Lx49eoStW7dqfc60adOQnp6ufNy+fbsUa0xERESlyWpSB3h4eMDW1hbJyckq25OTk3UO3jaUm5sb6tWrh2vXrmktY29vr3MMFBEREZUdVtOyZGdnhxYtWiA6Olq5TS6XIzo6GiEhIUZ7naysLMTHx3NFaSIiIgJgRS1LADBx4kQMHz4cLVu2RFBQEBYtWoTs7GyMHDkSADBs2DBUrVoVc+bMAVA4KDw2Nlb5/7t37+Ls2bNwdnZGnTp1AACTJ09G9+7dUaNGDdy7dw+zZs2Cra0tBg0aZJ6TJCIiIotiVcHSgAEDcP/+fcycORNJSUlo2rQpoqKilIO+b926pbIQ3r1799CsWTPlz/Pnz8f8+fPRtm1bHD58GABw584dDBo0CKmpqfD09ETr1q1x7NgxeHp6luq5EZFlK5ALnEhIQ0pmDrxcHBBU0x22Nlw4m6g8kAghhLkrYe0yMjIglUqRnp7O5U6IyqCoC4mI3BOLxPQc5TYfqQNmdQ9AeCC77Imslb73b6sZs0REZA5RFxIRseG0SqAEAEnpOYjYcBpRFxLNVDMiKi0MloiItCiQC0TuiYWm5nfFtsg9sSiQs4GeqCxjsEREpMWJhDS1FqVnCQCJ6Tk4kZBWepUiolLHYImISIuUTO2BUnHKEZF1YrBERKSFl4uDUcsRkXVisEREpEVQTXf4SB2gLUGABIWz4oJqupdmtYiolDFYIiLSwtZGglndAwBALWBS/DyrewDzLRGVcQyWiIh0CA/0wbIhzSGTqna1yaQOWDakOfMsEZUDVpXBm4jIHMIDfdApQMYM3kTlFIMlIiI92NpIEFK7irmrQURmwG44IiIiIh0YLBERERHpwGCJiIiISAcGS0REREQ6MFgiIiIi0oGz4YiIiCxUgVwwZYUFYLBERERkgaIuJCJyTywS0/9bqNlH6oBZ3QOYDLWUsRuOiIjIwkRdSETEhtMqgRIAJKXnIGLDaURdSDRTzconBktEREQWpEAuELknFkLDPsW2yD2xKJBrKkGmwGCJiIjIgpxISFNrUXqWAJCYnoMTCWmlV6lyjsESERGRBUnJ1B4oFacclRyDJSIiIgvi5eJg1HJUcpwNR+USp+MSkaUKqukOH6kDktJzNI5bkgCQSQs/t6h0MFiicofTcYnIktnaSDCrewAiNpyGBFAJmBRf6WZ1D+AXvFLEbjgqVzgdl4isQXigD5YNaQ6ZVLWrTSZ1wLIhzfnFrpSxZYnKjaKm40pQOB23U4CM39iIyOzCA33QKUDGIQMWgMESlRuGTMcNqV2l9CpGRKSFrY2En0cWgN1wVG5wOi4RERUHW5ao3DD1dFzOsCMiKpsYLFG5YcrpuJxhR0RUdrEbjsoNxXRc4L/ptwolmY7LGXZERGUbgyUqV4w9HbeoGXYCwNTt5/H3tQdc9JKIyEqxG47KHWNOxy1qhh0APHryFIO/O85uOSIiK8VgicolY03HNWTmnKJbjgnliIisC7vhiErAkJlzik64yD2x7JIjIrIiDJaISkAxw07fDrxnE18SEZF1YLBEVAK6ZtjpwsSXRETWg8ESUQlpm2GnS3ETXxIRUeljsERkBOGBPvjrgw7Y+GYw3Bwrai0nQWGyyuIkviQiIvNgsERkJLY2Erxc1wNf9GkECYyb+JKIiMyHwRKRkRk78SUREZkX8ywRmYAxE18SEZF5MVgiMhFjJb4kIiLzYjccERERkQ4MloiIiIh0YLBEREREpAODJSIiIiIdGCwRERER6cBgiYiIiEgHBktEREREOjBYIiIiItKBwRIRERGRDgyWiIiIiHSwumBp6dKl8PPzg4ODA4KDg3HixAmtZS9evIg+ffrAz88PEokEixYtKvExiYiIqHyxqmDpxx9/xMSJEzFr1iycPn0aTZo0QVhYGFJSUjSWf/z4MWrVqoUvvvgCMpnMKMckIiKi8kUihBDmroS+goOD8eKLL2LJkiUAALlcjmrVquHdd9/F1KlTdT7Xz88P48ePx/jx4412TIWMjAxIpVKkp6fD1dXV8BOzIAVygRMJaUjJzIGXiwOCarrD1kZi7moREREZnb737wqlWKcSycvLw6lTpzBt2jTlNhsbG4SGhiImJqZUj5mbm4vc3FzlzxkZGcV6fUsTdSERkXtikZieo9zmI3XArO4BCA/0MWPNiIiIzMdquuEePHiAgoICeHt7q2z39vZGUlJSqR5zzpw5kEqlyke1atWK9fqWJOpCIiI2nFYJlAAgKT0HERtOI+pCoplqRkREZF5WEyxZkmnTpiE9PV35uH37trmrVCIFcoHIPbHQ1B+r2Ba5JxYFcqvpsSWiMqpALhATn4pdZ+8iJj6Vn0tUKqymG87DwwO2trZITk5W2Z6cnKx18Lapjmlvbw97e/tivaYlOpGQptai9CwBIDE9BycS0hBSu0rpVYyI6BkcKkDmYjUtS3Z2dmjRogWio6OV2+RyOaKjoxESEmIxx7RGKZnaA6XilCMiMjYOFSBzspqWJQCYOHEihg8fjpYtWyIoKAiLFi1CdnY2Ro4cCQAYNmwYqlatijlz5gAoHMAdGxur/P/du3dx9uxZODs7o06dOnodszzwcnEwajlDcPYdERWlqKECEhQOFegUIOPnB5mEVQVLAwYMwP379zFz5kwkJSWhadOmiIqKUg7QvnXrFmxs/mssu3fvHpo1a6b8ef78+Zg/fz7atm2Lw4cP63XM8iCopjt8pA5ISs/R+GEkASCTFgYyxsQmdSLSB4cKkLlZVZ4lS1UW8iwpmrgBqARMiu9oy4Y0N2oAo3i95//4TPV6RGS9dp29i/e2nC2y3NcDm6Jn06qmrxCVGfrev61mzBKZVnigD5YNaQ6ZVLWrTSZ1MHrgwtl3ZMk428rymHOoABFgZd1wZFrhgT7oFCAz+RgiNqmTpWLXsGUy11ABIgW2LJEKWxsJQmpXQc+mVRFSu4pJBkty9h1ZIs62sly2NhLM6h4A4L+uegXFz7O6B3BwN5kMgyUqdWxSJ0vDrmHLV5pDBYiex244KnVsUidLw65h61BaQwWInsdgiUqdokk9YsNpSKB59h2b1Kk0sWvYeiiGChCVJnbDkVmwSZ0sCbuGiUgXtiyR2bBJnSwFu4aJSBcGS2RWbFInS8CuYSLShd1wRERg1zARaceWJSKi/8euYSLShMESEdEz2DVMRM9jNxwRERGRDgyWiIiIiHTQO1hq1KgRPvnkE9y+fduU9SEiIiKyKHoHSxcvXsTXX3+NmjVrIjw8HNu3b0d+fr4p60ZERERkdgZ1w/3777/46aefYGdnh4EDB8LX1xeTJ09GXFycqepHREREZFYGBUsVKlRAr169sHv3bty6dQsTJkzA7t27ERgYiFatWmH16tWmqicRERGRWegdLEkkqnlGfHx8MG3aNFy5cgXR0dGoXbs2xo0bZ/QKEhEREZmT3nmWhNC0YlKhdu3aoV27dsjIyDBKpYiIiIgshd4tS8OHD4ejo6POMq6uriWuEBEREZEl0btlac2aNaasBxEREZFFYlJKIiIiIh0MCpa+/fZbhIaGon///oiOjlbZ9+DBA9SqVcuolSMiIiIyN72DpcWLF+P999+Hv78/7O3t0bVrV8yZM0e5v6CgADdv3jRJJYmIiIjMRe8xSytWrMCqVavw+uuvAwAiIiLQq1cvPHnyBB9//LHJKkhERERkTnoHSwkJCWjVqpXy51atWuHgwYMIDQ3F06dPMX78eFPUj4iISC8FcoETCWlIycyBl4sDgmq6w9ZGUvQTiYqgd7Dk4eGB27dvw8/PT7ktMDAQBw8eRIcOHXDv3j1T1I+IiKhIURcSEbknFonpOcptPlIHzOoegPBAHzPWjMoCvccstW7dGj///LPa9oCAAERHR+O3334zasWIiIj0EXUhEREbTqsESgCQlJ6DiA2nEXUh0Uw1o7JC72Bp6tSpaNy4scZ9DRs2xMGDBzFz5kyjVYyIiKgoBXKByD2x0LTGhGJb5J5YFMi1r0JBVBS9u+EaN26sNVgCCrvkAgMDjVIpImvFMRNEpetEQppai9KzBIDE9BycSEhDSO0qpVcxKlP0DpaISDeOmSAqfSmZ2gOl4pQj0sRoGbwbNGgAW1tbYx2OyKpwzASReXi5OBi1HJEmRmtZmjNnDtLT0411OCKrUdSYCQkKx0x0CpCxS47IyIJqusNH6oCk9ByN70EJAJm0sEucqLiM1rLUq1cvDB8+3FiHI7IahoyZICLjsrWRYFb3AACFgdGzFD/P6h7ALypUIsUKltLT03H58mVcvnyZrUlU7nHMBJF5hQf6YNmQ5pBJVbvaZFIHLBvSnGMGqcQM6ob77rvvsGDBAly+fFlle/369TFp0iS8+eabRq0ckTXgmAki8wsP9EGnABlno5JJ6B0szZs3D7Nnz8a4ceMQFhYGb29vAEBycjL27duH9957Dw8fPsTkyZNNVlkiS8QxE1RWWVsqDFsbCdMDkElIhBB6ZeqqUaMG5s2bh/79+2vc/+OPP+L999/HrVu3jFpBa5CRkQGpVIr09HS4urqauzpkBorZcABUAibFbYVdAWRtmAqDygN97996j1lKSUlBo0aNtO5v1KgRHjx4YFgticoIjpmgsoSpMIhU6d2y1KZNG9SsWRPff/89KlRQ7b0rKCjAG2+8gRs3buDIkSMmqaglY8sSKVhbtwXR8wrkAq3nHtQ6w1PRrfzXBx34t01WT9/7t95jlpYsWYKwsDDIZDK0adNGZczSH3/8ATs7O+zbt6/kNSeyYhwzQdaOy4cQqdO7G65x48a4cuUKPvnkE7i4uOD69eu4fv06XFxc8Omnn+LSpUtcG46IyMoxFQaROoNSB7i4uCAiIgIRERGmqg8REZkRU2EQqTNaBm8iIrJ+ilQY2kYjSVA4K46pMKg8YbBERKRBgVwgJj4Vu87eRUx8Kgrkes2FsXpcPoRIndEW0iUiKivKe44hRSqM56+BrBxdA6Jn6ZU64N9//0VgYCBsbNgQpQlTBxCVHYocQ89/MJbHBKNMhUFlnVGTUjZr1kyZcLJWrVpITU01Ti3JpMprNwJRcRXIBSL3xGpctkaxLXJPbLl5LylSYfRsWhUhtaswUKJyS69uODc3NyQkJMDLyws3btyAXC43db2ohMp7NwJRcTDHEBFpolew1KdPH7Rt2xY+Pj6QSCRo2bIlbG1tNZa9fv26UStIhtPWjaBYqqA8dSMQGaI4OYbYVUVU9ukVLK1cuRKvvfYarl27hnHjxmHUqFFwcXExdd2oGIrqRpCgsBuhU4CMH+hEzzE0xxBbcInKB71nw4WHhwMATp06hffee4/BkoViNwJR8SlyDCWl52j8wqFYFy2opjtbcInKEYOnt61Zs0YZKN25cwd37twxeqV0Wbp0Kfz8/ODg4IDg4GCcOHFCZ/lt27bB398fDg4OaNSoEfbu3auyf8SIEZBIJCoPRWBojbhUAVHx6ZtjCAAHghOVIwYHS3K5HB9//DGkUilq1KiBGjVqwM3NDZ988onJB37/+OOPmDhxImbNmoXTp0+jSZMmCAsLQ0pKisbyR48exaBBg/Dmm2/izJkz6NWrF3r16oULFy6olAsPD0diYqLysXnzZpOehylxqQKiklHkGJJJVd8jMqmDsrXIkBZcIrJ+Biel/PDDD/H999/jiy++wMsvvwwA+OuvvzB79mzk5OTgs88+M3olFRYsWIBRo0Zh5MiRAIDly5fj119/xerVqzF16lS18l9//TXCw8Px/vvvAwA++eQT7N+/H0uWLMHy5cuV5ezt7SGTyUxW79JkSDcCEWkWHuiDTgEyrQO32YJLVL4Y3LL0ww8/4LvvvkNERAQaN26Mxo0bY/To0Vi1ahXWrl1rgioWysvLw6lTpxAaGqrcZmNjg9DQUMTExGh8TkxMjEp5AAgLC1Mrf/jwYXh5eaF+/fqIiIiw6jxSXKqAyDh05RhiCy5R+WJwsJSWlgZ/f3+17f7+/khLM12T84MHD1BQUABvb2+V7d7e3khKStL4nKSkpCLLh4eHY926dYiOjsbcuXNx5MgRdOnSBQUFBVrrkpubi4yMDJWHJdGnG4GIio+LzRKVLwZ3wzVp0gRLlizB4sWLVbYvWbIETZo0MVrFSsvAgQOV/2/UqBEaN26M2rVr4/Dhw+jYsaPG58yZMweRkZGlVcViKaobgYiKT9GCG7HhNCSASpc3W3CJyh6Dg6Uvv/wS3bp1w4EDBxASEgKgsLvr9u3bajPNjMnDwwO2trZITk5W2Z6cnKx1vJFMJjOoPFC4nIuHhweuXbumNViaNm0aJk6cqPw5IyMD1apV0/dUSo2iG4GIjI+LzRKVHwYHS23btsWVK1ewdOlSXLp0CQDw2muvYfTo0fD19TV6BRXs7OzQokULREdHo1evXgAKZ+ZFR0dj7NixGp8TEhKC6OhojB8/Xrlt//79yiBPkzt37iA1NRU+Pto/6Ozt7WFvb1+s8yCisoMtuETlg0QIYTWJQH788UcMHz4cK1asQFBQEBYtWoStW7fi0qVL8Pb2xrBhw1C1alXMmTMHQGHqgLZt2+KLL75At27dsGXLFnz++ec4ffo0AgMDkZWVhcjISPTp0wcymQzx8fGYMmUKMjMzcf78eb0DIn1XLSYiIiLLoe/92+CWJXMaMGAA7t+/j5kzZyIpKQlNmzZFVFSUchD3rVu3YGPz35j1Vq1aYdOmTfjoo48wffp01K1bFzt37kRgYCAAwNbWFv/++y9++OEHPHr0CL6+vujcuTM++eQTthwRERERACtrWbJUbFkiIiKyPvrevw1OHUBERERUnjBYIiIiItKhWMFSfn4+Dhw4gBUrViAzMxMAcO/ePWRlZRm1ckRERETmZvAA75s3byI8PBy3bt1Cbm4uOnXqBBcXF8ydOxe5ubkqa64RERERWTuDW5bee+89tGzZEg8fPoSjo6Nye+/evREdHW3UyhERERGZm8EtS3/++SeOHj0KOzs7le1+fn64e/eu0SpGREREZAkMblmSy+UaF5m9c+cOXFxcjFIpIiIiIkthcLDUuXNnLFq0SPmzRCJBVlYWZs2aha5duxqzbkREVEIFcoGY+FTsOnsXMfGpKJAztR6RoQxOSnnnzh2EhYVBCIGrV6+iZcuWuHr1Kjw8PPDHH3/Ay8vLVHW1WExKSUSWKOpCotpCvz5c6JdISd/7d7EyeOfn5+PHH3/EuXPnkJWVhebNm2Pw4MEqA77LEwZLRGRpoi4kImLDaTz/Aa9Y4nfZkOYMmKjcM2mwRKoYLBGRJSmQC7See1ClRelZEgAyqQP++qADbG0kGssQlQcmW+5kzpw5WL16tdr21atXY+7cuYYejoiIjOxEQprWQAkABIDE9BycSEhT2c7xTUSaGZw6YMWKFdi0aZPa9oYNG2LgwIH44IMPjFIxIiIqnpRM7YGStnIc30SkncEtS0lJSfDxUX/jeHp6IjEx0SiVIiKi4vNycTConGJ80/OtUUnpOYjYcBpRF/jZTuWbwcFStWrV8Pfff6tt//vvv+Hr62uUShERUfEF1XSHj9QB2kYjSVDYahRU0x0FcoHIPbFqA8EBKLdF7olllxyVawYHS6NGjcL48eOxZs0a3Lx5Ezdv3sTq1asxYcIEjBo1yhR1JCIiA9jaSDCrewAAqAVMip9ndQ+ArY2k2OObiMoTg8csvf/++0hNTcXo0aORl5cHAHBwcMAHH3yAadOmGb2CRERkuPBAHywb0lxtHJLsuXFIxRnfRFTeGBwsSSQSzJ07FzNmzEBcXBwcHR1Rt25d2Nvbm6J+RERUTOGBPugUIMOJhDSkZObAy6Ww6+3ZdAGGjm8iMrUCudD5N2sOBgdLCs7OznjxxReNWRciMgNL/GAi47G1kSCkdhWt+xXjm5LSczSOW1LkZAqq6W6yOhIpWOqsTIODpezsbHzxxReIjo5GSkoK5HK5yv7r168brXJEZFqW+sFEpUcxviliw2lIAJWA6fnxTUSmpC3rfGJ6Dt7ZcBrfvt4cXRub53PJ4GDpf//7H44cOYKhQ4fCx8cHEgnfQETWSNsHk2K6OJfDKD/0Hd9EZCq6ZmUqjN18GkvQDF0bl/7Me4OXO3Fzc8Ovv/6Kl19+2VR1sjpc7oSsDZfDIE3YJUvmEhOfikGrjulVdrkRv8jpe/82uGWpcuXKcHdn3zWRNTNkuriu8S5UthQ1vonIVAyZbRm5JxadAmSlGsgbnGfpk08+wcyZM/H48WNT1IeISgGnixORJTFktqU58n4Z3LL01VdfIT4+Ht7e3vDz80PFihVV9p8+fdpolSMi0+B0cSKyJIpZmbpavJ9V2l/kDA6WevXqZYJqEFFp4nRxIrIkilmZ72zQr8GltL/IGTzAm9RxgDdZI8VsOEDzdHHOhiOi4ijJRIG9/yZi7ObT0LYUobEnn5hsgDcRlQ2cLk5ExlbS3G1dG/tgCZph9KYzavvMmffL4JalgoICLFy4EFu3bsWtW7eU68MppKWVv8UW2bJE1ozTxYnIGLTlbitOa3VpJcw1WctSZGQkvvvuO0yaNAkfffQRPvzwQ9y4cQM7d+7EzJkzS1RpIip9nC5OVH6Y6suRrqSSAoUBkyFT/vVZ17A0GRwsbdy4EatWrUK3bt0we/ZsDBo0CLVr10bjxo1x7NgxjBs3zhT1JCIiohIwZWuNKXK3WdIXOYPzLCUlJaFRo0YAChfTTU9PBwC8+uqr+PXXX41bOyIiIioxRRfZ8wGNYnmjqAuJJTp+Wc/dZnCw9MILLyAxsfCi1q5dG/v27QMAnDx5Evb29satHREREZVIUV1kQGEXWYG2KWh60Hcq/40H2Wp1i4lPxa6zdxETn1qiOpiSwd1wvXv3RnR0NIKDg/Huu+9iyJAh+P7773Hr1i1MmDDBFHUkIiIqVWVp4kNpLG8UVNMdMlcHJGXobjnafOIWxnaoC1sbSakN4jYGg4OlL774Qvn/AQMGoHr16oiJiUHdunXRvXt3o1aOiIiotFnTTVwfpdFFZmsjwaCg6lh44IrOckkZuTiRkIb0J3kaZ84pugUtLc9bifMshYSEICQkxBh1IaJSVpa+PRMZg7bp75Z6E9dHaS1v5OfhpFe5pPQn+PL3y0abOVca9AqWdu/ejS5duqBixYrYvXu3zrI9evQwSsWIyLSM+e2ZQReVBcae/m4pSmt5I32DrbTsPJN3CxqbXsFSr169kJSUBC8vL51rw0kkEhQUFBirbkRkIsb89lzWuiyo/CqNsT3moFh3LWLDaUigeXkjY2TF1jcoc3fWbzKYJc2c02s2nFwuh5eXl/L/2h4MlIgsnzFnxph6OjJRaSrL098VyxvJpKqtPzKpA5YNaY5OAbISz0pTBGXAf0GYwrNBmcy1dLoFjcmgMUtPnz5FeHg4li9fjrp165qqTkRkQsb69lxWuyyo/CqtsT3moi0r9v7YJLSee9AorcP6rDlZIBel0i1oTAYFSxUrVsS///5rqroQUSkw1rfnstplQeVXaY3tMTVdYwifz4ptigHtRS1VUlrdgsZkcFJKRV4lIrJOxvr2XJa7LKh80rcbyZJu4s+LupCI1nMPYtCqY3hvy1kMWnUMrece1NglbspklYqgrGfTqgipXUXtmhXVLWhp4x0NTh2Qn5+P1atX48CBA2jRogUqVaqksn/BggVGqxwRGZ+xvj2X9S4LKp/06UayVIa2Epm7ddjSFsvVxeBg6cKFC2jevDkA4MoV1eRTEonlnSARqTJWE3hZ6bIgep65buIlScFRnDGEltA6bEmL5epicLB06NAhU9SDiEqRMb49W+O4AyJ9lfZNvKQpOIrTSsTWYf2VOIM3EVknY3x7tuYuCyJLYYxB1sVpJWLrsP6KFSz9888/2Lp1K27duoW8vDyVfT///LNRKkZEpmeMb8/WNO6AyNIYKwVHcVqJ2DqsP4Nnw23ZsgWtWrVCXFwcduzYgadPn+LixYs4ePAgpFKpKepIRBauqJkvRKSZId1nuihaibS98yQo7NZ7vpXI2malmYvBLUuff/45Fi5ciDFjxsDFxQVff/01atasibfffhs+PryoRFT+cG08Ki5jDbIuSSsRW4eLZnCwFB8fj27dugEA7OzskJ2dDYlEggkTJqBDhw6IjIw0eiWJiCwV18ajkjDmIOuSjCG0lllp5mJwsFS5cmVkZmYCAKpWrYoLFy6gUaNGePToER4/fmz0ChIRWSpTZD+m8sXYg6zZSmQaBo9ZatOmDfbv3w8A6NevH9577z2MGjUKgwYNQseOHY1eQSIiS2TK7MdUfpgiazjHEBqfwS1LS5YsQU5OYfPehx9+iIoVK+Lo0aPo06cPPvroI6NXkIjIEpk7+zGVHUzBYfkMDpbc3f9rCrSxscHUqVONWiEiImtgCdmPyTIYY4A/u88sm8HdcKGhoVi7di0yMjJMUZ8iLV26FH5+fnBwcEBwcDBOnDihs/y2bdvg7+8PBwcHNGrUCHv37lXZL4TAzJkz4ePjA0dHR4SGhuLq1aumPAUiKgOY/ZgAwxauLQq7zyyXwcFSw4YNMW3aNMhkMvTr1w+7du3C06dPTVE3NT/++CMmTpyIWbNm4fTp02jSpAnCwsKQkpKisfzRo0cxaNAgvPnmmzhz5gx69eqFXr164cKFC8oyX375JRYvXozly5fj+PHjqFSpEsLCwpRdjURk3QrkAjHxqdh19i5i4lONNoaouHltqOxQDPB/vjtWMcC/OAETWSaJEMLgTw65XI4DBw5g06ZN2LFjB2xtbdG3b18MHjwYbdu2NUU9AQDBwcF48cUXsWTJEmU9qlWrhnfffVdjd+CAAQOQnZ2NX375RbntpZdeQtOmTbF8+XIIIeDr64tJkyZh8uTJAID09HR4e3tj7dq1GDhwoF71ysjIgFQqRXp6OlxdXY1wpkRkDKae1q+4WQKa89pwNlzZVSAXaD33oNZxa4pZbH990IEtRBZM3/u3wS1LQOFYpc6dO2Pt2rVITk7GihUrcOLECXTo0KHYFS5KXl4eTp06hdDQUJV6hIaGIiYmRuNzYmJiVMoDQFhYmLJ8QkICkpKSVMpIpVIEBwdrPSYA5ObmIiMjQ+VBRJalNL71M/tx+WWszNtkHUq0kG5SUhK2bNmCDRs24N9//0VQUJCx6qXmwYMHKCgogLe3t8p2b29vXLp0SWv9NJVPSkpS7lds01ZGkzlz5jD5JpEFM9Z6W/rgwNzyiQP8yxeDW5YyMjKwZs0adOrUCdWqVcOyZcvQo0cPXL16FceOHTNFHS3OtGnTkJ6ernzcvn3b3FUiomeU9rd+DswtfzjAv3wxuGXJ29sblStXxoABAzBnzhy0bNnSFPVS4+HhAVtbWyQnJ6tsT05Ohkwm0/gcmUyms7zi3+TkZJV17ZKTk9G0aVOtdbG3t4e9vX1xToOISgG/9ZOpGTvzNlk2g1uWdu/ejTt37mDhwoWlFigBhevQtWjRAtHR0cptcrkc0dHRCAkJ0fickJAQlfIAsH//fmX5mjVrQiaTqZTJyMjA8ePHtR6TiCwfv/WTqZki8zZZLoODpU6dOsHGpljjwkts4sSJWLVqFX744QfExcUhIiIC2dnZGDlyJABg2LBhmDZtmrL8e++9h6ioKHz11Ve4dOkSZs+ejX/++Qdjx44FAEgkEowfPx6ffvopdu/ejfPnz2PYsGHw9fVFr169zHGKRGQEnNZPpYED/MuPEg3wLm0DBgzA/fv3MXPmTCQlJaFp06aIiopSDtC+deuWSiDXqlUrbNq0CR999BGmT5+OunXrYufOnQgMDFSWmTJlCrKzs/HWW2/h0aNHaN26NaKiouDgwG+cRNZK8a0/YsNpSKB5Wj+/9ZMxcIB/+VCsPEukinmWiCyTqfMsEZF10/f+bVUtS0REhuC3fiIyBgZLRFSmKab1ExEVV7GCpezsbBw5cgS3bt1CXl6eyr5x48YZpWJEzzLGqt5ERETFYXCwdObMGXTt2hWPHz9GdnY23N3d8eDBAzg5OcHLy4vBEhmdKcadMPgiIiJ9GRwsTZgwAd27d8fy5cshlUpx7NgxVKxYEUOGDMF7771nijpSOaZY3+v5WQiK9b2KMz2Xg36JiMgQBidMOnv2LCZNmgQbGxvY2toiNzcX1apVw5dffonp06eboo5UThW1vhdQuL5XgVz/CZ2lsbgqEREZR4FcICY+FbvO3kVMfKpBn/fGZHDLUsWKFZW5jLy8vHDr1i00aNAAUqmUa6SRURmyvpc+A3hLc3FVIiqestpFXlbPy5QsqRfA4GCpWbNmOHnyJOrWrYu2bdti5syZePDgAdavX6+S7JGopIy9vpexgy8iMi59bo7WGHRY0k3fWphiCEZJGBwsff7558jMzAQAfPbZZxg2bBgiIiJQt25drF692ugVpPLL2Ot7cXFVIsulz80RgNUFHZZ207cGltgLYHCw9OziuV5eXoiKijJqhYgUjL2qNxdXJbJM+twcp/58HumPn1pV0GGJN31rYIm9AOZZEZdID8Ze1ZuLq1JZZymDYQ2lz83xkYZASbEPMHyyR2kw5KZP/7HEXgBm8CaLpljV+/mmd1kxmt65uCqVZdY8LqakNz1LHW9oiTd9a2CJvQAMlsjiGXN9L2MGX0SWwtrHxRjrpmdpQYcl3vStgbGHYBgDgyWyCsZc34uLq1JZUhbGxRR1c9SXpQUdlnjTtwaW2AvAMUtULimCr55NqyKkdhWLvYkQFaUsjIvRZ3yim1NFqxtvaOxxl+WJohdAJlUNgGVSB7O0lBarZSk6OhrR0dFISUmBXC5X2cf0AUREpaesjIspqoscgEW1NOiLXf/FZ0m9AAYHS5GRkfj444/RsmVL+Pj4QCKxvD9OIvqPNSbxI/2VpXExRd0crTXosKSbvrUx5hCMkpAIIQzqIvbx8cGXX36JoUOHmqpOVicjIwNSqRTp6elwdXU1d3WIlKx5hhTpp0Au0HruwSLHxfz1QYcycXNm8E/GpO/92+AxS3l5eWjVqlWJKkdEpsdFg8uH8jYuhuMNyRwMDpb+97//YdOmTaaoCxEZSVEzpADLTOJHxWNpg2HJ/Kw1Qaml0mvM0sSJE5X/l8vlWLlyJQ4cOIDGjRujYsWKKmUXLFhg3BoSkcEscbkAMi2OiyEFc3a/G6Ob1BK7WvUKls6cOaPyc9OmTQEAFy5cMHqFiKjkysoMKTKMpQyGJfMxZ4JSYwRpljrOUq9g6dChQ6auBxEZUVmaIUVE+jFnglJjBGmWnIne4DFLb7zxBjIzM9W2Z2dn44033jBKpYioZLhoMFH5Y64EpcYYI2np4ywNDpZ++OEHPHnyRG37kydPsG7dOqNUiohKprzNkCIi83W/GyNIs/RM9HoHSxkZGUhPT4cQApmZmcjIyFA+Hj58iL1798LLy8uUdSUiA3CGFFH5Yq7ud2MEaZY+zlLvDN5ubm6QSCSQSCSoV6+e2n6JRILIyEijVo6ISoYzpIjKD3Mt3GuMIM3Sx1nqHSwdOnQIQgh06NAB27dvh7v7fxfbzs4ONWrUgK+vr0kqSUTFxxlSRNajJNPmFd3vpb2GnjGCNHMFevoyeLmTmzdvonr16lwT7hlc7oSIiErKWNPmzTH9XjGTDdAcpBkyG64kxzCUvvdvvYKlf//9F4GBgbCxscG///6rs2zjxo0Nr62VY7BEREQloW3afHEDBXMkdrTGPEtGDZZsbGyQlJQELy8v2NjYQCKRQNPTJBIJCgoKSlZzK8RgiYio7DNVAKJYDFnbbDBrWgzZ2jJ463v/1mvMUkJCAjw9PZX/JyIiKk9M2eJRlpYnMsYYSUscZ6lXsFSjRg2N/yciIvOwxPWzyipTZ5bWdzr8gdgkiwsiygu9Z8MpVK9eHe3atUPbtm3Rrl071K5d2xT1IiIq00oS7Fjq+lllUWksIaLvdPgdZ+9iejcmkzUHgzN4f/7553BwcMDcuXNRt25dVKtWDUOGDMGqVatw9epVU9SRiKhMibqQiNZzD2LQqmN4b8tZDFp1DK3nHkTUhUS9nhux4bRat42ilUOfY5D+SiOzdFBNd7hXsiuyXFr2U7NlsC7vDA6WhgwZgpUrV+LKlSu4e/cu5s2bBwAYPXo0/P39jV5BIqKypCTBjqWvn1UWlUZmaVsbCXo11S9PobkyWJd3BnfDAcDjx4/x119/4fDhwzh06BDOnDmDwMBAtGvXzsjVIyIqO0rapVOWBgJbi9LKLN0pQIbVf98w+etQ8RgcLLVq1QpnzpxBgwYN0K5dO0ydOhVt2rRB5cqVTVE/IqIyo6TBjqWvn1UWlVZmacXrFJU+wFwZrMs7g7vhLl26hEqVKsHf3x/+/v5o0KABAyUiMyuQC8TEp2LX2buIiU9lN4yFKmmwY+nrZ5VFiiVEgP8SRCoYcwkRxetITPw6z+Nnh34MbllKTU3F+fPncfjwYfz+++/48MMPYWdnh7Zt26J9+/YYNWqUKepJRFoYc2YUp6ObVkmDHUtfP6usCg/0wbIhzdXeZ7L/f591CpAhJj61xO+bol7H2DMdOatSfwavDfcsIQROnTqFJUuWYOPGjZDL5czgzQzeVIqMuUQCPzhNT5GpuahgR1emZnOsn0WFNH2Z2B+bZPT3TWl8aTH28irWyqjLnTzr9OnTOHz4MA4fPoy//voLmZmZaNSokTL3Us+ePUtceWvDYInMwZhLJPCDs/QYa8FRBrbmZ63vm7K0vEpJGXW5k2cFBQWhWbNmaNu2LUaNGoU2bdpAKpWWqLJEZDhjzYwqjaR79B9jdLWEB/qgU4CMXaZmZM3vG86qNJzBwVJaWhpbT4gsgLFmRvGDs/QZI9ixxPWzyhNrft9wVqXhDA6WGCgRWQZjzYziB6d5MNixbtb8vuGsSsMZnDqAiCyDYmaUtrYICQrHsRQ1M4ofnESGs+b3jbE+O8oTBktEVspY+V/4wUllSWnlDbLm901p5Y4qSxgsEVkxxWBhmVT126tM6qD3TBx+cJIxmTPJYUkWKDaUtb9vjPHZUZ6UKM8SFWLqADI3Y+Rl4XR0Kilz/g2Zaxq/tb9vynsiWpPkWUpMTER0dDTc3d0RGhoKOzs75b7s7Gx89dVXmDlzZslqboUYLFFZUd4/OKn4zJlzyNx5g/i+sV5GD5ZOnjyJzp07Qy6X4+nTp6hatSp27tyJhg0bAgCSk5Ph6+vLDN4MloionClJsGKMQCMmPhWDVh0rstzmUS9xBiKp0Pf+rfeYpenTp6N37954+PAhkpOT0alTJ7Rt2xZnzpwxSoWJiMg6GZJz6FnGGmNkzdP4yTroHSydOnUKU6dOhY2NDVxcXPDtt99i8uTJ6NixI06ePGnKOhIRkQUrTrCi6LZ7PshKSs9BxIbTBgVM1jyNn6yDQbPhcnJU/6inTp2K6dOno3Pnzjh69KhRK/a8tLQ0DB48GK6urnBzc8Obb76JrKwsnc/JycnBmDFjUKVKFTg7O6NPnz5ITk5WKSORSNQeW7ZsMeWpEBGVKYYGK0UtFQIULhWi70w6a57GT9ZB72ApMDBQY0A0efJkTJs2DYMGDTJqxZ43ePBgXLx4Efv378cvv/yCP/74A2+99ZbO50yYMAF79uzBtm3bcOTIEdy7dw+vvfaaWrk1a9YgMTFR+ejVq5eJzoKIqOwxNFgpbredNtY+jZ8sn97B0rBhw/DXX39p3DdlyhRERkaievXqRqvYs+Li4hAVFYXvvvsOwcHBaN26Nb755hts2bIF9+7d0/ic9PR0fP/991iwYAE6dOiAFi1aYM2aNTh69CiOHVMdCOjm5gaZTKZ8ODiwqZaISF+GBiumGGPEvEFkSlaRZ2n16tWYNGkSHj58qNyWn58PBwcHbNu2Db1791Z7zsGDB9GxY0c8fPgQbm5uyu01atTA+PHjMWHCBACF3XC+vr7Izc1FrVq18M4772DkyJGQSLR/A8nNzUVubq7y54yMDFSrVo2z4YioXNM355ApZ69xGj8ZQt/ZcHovpJuTk4N9+/ahffv2cHFxUXuxw4cPIywsDPb29sWvtRZJSUnw8vJS2VahQgW4u7sjKSlJ63Ps7OxUAiUA8Pb2VnnOxx9/jA4dOsDJyQn79u3D6NGjkZWVhXHjxmmtz5w5cxAZGVn8EyIiKoPCA33QKUBWZLCi6LZLSs/ROG5JkWqgOGOMuEAxmYLe3XArVqzA119/rRYoAYCrqysWL16MVatWGfTiU6dO1TjA+tnHpUuXDDqmoWbMmIGXX34ZzZo1wwcffIApU6Zg3rx5Op8zbdo0pKenKx+3b982aR2JiKyFIljp2bQqQmpX0diqwzFGZG30DpY2btyI8ePHa90/fvx4rFu3zqAXnzRpEuLi4nQ+atWqBZlMhpSUFJXn5ufnIy0tDTKZTOOxZTIZ8vLy8OjRI5XtycnJWp8DAMHBwbhz545KN9vz7O3t4erqqvIgIiL9cYwRWRO9u+GuXr2KJk2aaN3fuHFjXL161aAX9/T0hKenZ5HlQkJC8OjRI5w6dQotWrQAUDgmSS6XIzg4WONzWrRogYoVKyI6Ohp9+vQBAFy+fBm3bt1CSEiI1tc6e/YsKleubJLuRCIi+o++3XZE5qZ3sJSfn4/79+9rnfF2//595OfnG61iz2rQoAHCw8MxatQoLF++HE+fPsXYsWMxcOBA+Pr6AgDu3r2Ljh07Yt26dQgKCoJUKsWbb76JiRMnwt3dHa6urnj33XcREhKCl156CQCwZ88eJCcn46WXXoKDgwP279+Pzz//HJMnTzbJeRARkSqOMSJroHew1LBhQxw4cEDZsvO8ffv2KdeJM4WNGzdi7Nix6NixI2xsbNCnTx8sXrxYuf/p06e4fPkyHj9+rNy2cOFCZdnc3FyEhYXh22+/Ve6vWLEili5digkTJkAIgTp16mDBggUYNWqUyc6DiIrGGU1EZEn0Th2wcuVKTJw4EVu2bMGrr76qsm/Pnj0YNGgQFixYUGSiyLKIC+lSWWWOoEXf6edERCWl7/3boDxLQ4YMwaZNm+Dv74/69esDAC5duoQrV66gf//+2Lx5c8lrboUYLFFZZI6gRbFe2PMfSorwjAN/iciY9L1/G7Q23IYNG7BlyxbUrVsXV65cweXLl1G/fn1s3ry53AZKRGWRMRc51Zex1wsjIjIWvccsKfTv3x/9+/c3RV2IyAIUFbRIUBi0dAqQGbVLzpD1wjggmIhKk94tS3K5HHPnzsXLL7+MF198EVOnTsWTJ09MWTciMgNjL3KqL1OsF0ZEZAx6B0ufffYZpk+fDmdnZ1StWhVff/01xowZY8q6EZEZmCto8XLRbwFrfcsRERmL3sHSunXr8O233+L333/Hzp07sWfPHmzcuBFyudyU9SOiUmauoEWxXpi2jj0JCgeYF2e9MCKiktA7WLp16xa6du2q/Dk0NBQSiQT37t0zScWo7CmQC8TEp2LX2buIiU81y0BdS6iDpTNX0ML1wojIUhmUwdvBQfWbZMWKFfH06VOjV4rKHkvInWMJdbAGiqAlYsNpSACVgd6mDloU64U9/3uS8fdERGakd54lGxsbdOnSRWXNtD179qBDhw6oVKmSctvPP/9s/FpaOOZZ0s0ScudYQh2sjTmDS2bwJqLSYPSklCNHjtTrhdesWaNfDcsQBkvaFcgFWs89qHV2lQSFrQZ/fdDBZDdDS6iDtWLQQkRlmb73b7274cpjEEQlZwm5cyyhDtaKi5wSERmYwZvIUJaQO8cS6kBERNaLwRKZlCXkzrGEOhARkfVisEQmZQm5cyyhDkREZL0YLJFJWULuHEuoAxERWS8GS2Ryitw5MqlqN5dM6lBqU/YtoQ5ERGSd9E4dQNoxdYB+LGEauiXUgYiILIPRUwcQlZQlTEO3hDoQEZF1YbBEVIaw5YyIyPgYLBGVEVz7jojINDjAm6gMUKx993ym8qT0HERsOI2oC4lmqhkRkfVjsERk5QrkApF7YtUWCQag3Ba5JxYFcs7lICIqDgZLRFbOkLXviIjIcAyWiKwc174jIjItBktEVo5r3xERmRaDJSIrx7XviIhMi8ESkZXj2ndERKbFYImoDODad0REpsOklERlRHigDzoFyJjBm4jIyBgsERmJJSw1wrXviIiMj8ESkRFwqREiorKLY5aISohLjRARlW0MlohKgEuNEBGVfQyWiEqAS40QEZV9HLNkoSxhsDAVjUuNEBGVfQyWLBAHC1sPLjVCRFT2sRvOwnCwsHXhUiNERGUfgyULwsHC1odLjRARlX0MliwIBwtbJy41QkRUtnHMkgXhYGHrxaVGiIjKLgZLFoSDha0blxohIiqb2A1nQThYmIiIyPIwWLIgHCxMRERkeRgsWRgOFiYiIrIsHLNkgThYmIiIyHIwWLJQHCxMRERkGdgNR0RERKQDgyUiIiIiHRgsEREREenAYImIiIhIBwZLRERERDpwNhwRURlVIBdMQUJkBAyWiIjKoKgLiYjcE4vE9P8W3vaROmBW9wAmtyUykNV0w6WlpWHw4MFwdXWFm5sb3nzzTWRlZel8zsqVK9GuXTu4urpCIpHg0aNHRjkuEZEli7qQiIgNp1UCJQBISs9BxIbTiLqQaKaaEVknqwmWBg8ejIsXL2L//v345Zdf8Mcff+Ctt97S+ZzHjx8jPDwc06dPN+pxiYgsVYFcIHJPLISGfYptkXtiUSDXVIKINJEIISz+HRMXF4eAgACcPHkSLVu2BABERUWha9euuHPnDnx9fXU+//Dhw2jfvj0ePnwINzc3ox1XISMjA1KpFOnp6XB1dS3eSRIRGUFMfCoGrTpWZLnNo17iKgFU7ul7/7aKlqWYmBi4ubkpAxoACA0NhY2NDY4fP25xxyUiMpeUzJyiCxlQjoisZIB3UlISvLy8VLZVqFAB7u7uSEpKKvXj5ubmIjc3V/lzRkZGsetARKSLoTPavFwc9DquvuWIyMzB0tSpUzF37lydZeLi4kqpNvqbM2cOIiMjzV0NIirjijOjLaimO3ykDkhKz9E4bkkCQCYtDLqISD9m7YabNGkS4uLidD5q1aoFmUyGlJQUlefm5+cjLS0NMpms2K9f3ONOmzYN6enpysft27eLXQciIk2KO6PN1kaCWd0DABQGRs9S/DyrewDzLREZwKwtS56envD09CyyXEhICB49eoRTp06hRYsWAICDBw9CLpcjODi42K9f3OPa29vD3t6+2K9LRKRLUTPaJCic0dYpQKYx6AkP9MGyIc3VWqVkzLNEVCxWMWapQYMGCA8Px6hRo7B8+XI8ffoUY8eOxcCBA5Uz1u7evYuOHTti3bp1CAoKAlA4JikpKQnXrl0DAJw/fx4uLi6oXr063N3d9TouEVk3a8xifSIhTa1F6VkCQGJ6Dk4kpGmd0RYe6INOATKrO3ciS2QVwRIAbNy4EWPHjkXHjh1hY2ODPn36YPHixcr9T58+xeXLl/H48WPltuXLl6uMLWrTpg0AYM2aNRgxYoRexyUi62WtWayNNaPN1kbC9ABERmAVeZYsHfMsEVkexZif5z/gFO0q40Prwc/DySJbXJgriah06Hv/tpqWJSIifemTxXrhgSvKbZbW2sQZbUSWxSqSUhIRGaKoMT/Ps7Q10zijjciyMFgiojLH0OzUlrhmmmJGm0yqmjxSJnXAsiHNLaYVjKg8YDccEZU5xclOrc8Ms9LGGW1EloHBEhGVOUWN+dHF0tZM44w2IvNjNxwRlTm6xvwUhWumEdHzGCwRUZmkbcyPNhIUzorjDDMieh674YiozHp+zM+NB9lYeOAqJIBK9xxnmBGRLgyWiKhMe37MT32ZC9dMIyKDMFgionKFM8yIyFAMloio3OEMMyIyBAd4ExEREenAYImIiIhIB3bDkUUokAuOISEiIovEYInMLupCotrsJEtbBZ6IiMovdsORWUVdSETEhtNqK8Rb2irwRERUfjFYIrMpkAtE7onVuHaXJa4CT0RE5RODJTKbEwlpai1Kz3p2FXgiIiJzYbBEZqPv6u6Wtgo8ERGVLwyWyGz0Xd2dq8ATEZE5MVgiswmq6Q4fqQO0JQjgKvBERGQJGCyR2djaSDCrewAAqAVMXAWeiIgsBYMlMqvwQB8sG9IcMqlqV5tM6oBlQ5ozzxIREZkdk1KS2XEVeCIismQMlsgicBV4IiKyVOyGIyIiItKBwRIRERGRDgyWiIiIiHRgsERERESkA4MlIiIiIh0YLBERERHpwGCJiIiISAcGS0REREQ6MFgiIiIi0oEZvI1ACAEAyMjIMHNNiIiISF+K+7biPq4NgyUjyMzMBABUq1bNzDUhIiIiQ2VmZkIqlWrdLxFFhVNUJLlcjnv37sHFxQUSCRd/NaaMjAxUq1YNt2/fhqurq7mrUy7xd2BevP7mxetvfqb8HQghkJmZCV9fX9jYaB+ZxJYlI7CxscELL7xg7mqUaa6urvygMjP+DsyL19+8eP3Nz1S/A10tSgoc4E1ERESkA4MlIiIiIh0YLJFFs7e3x6xZs2Bvb2/uqpRb/B2YF6+/efH6m58l/A44wJuIiIhIB7YsEREREenAYImIiIhIBwZLRERERDowWCIiIiLSgcESmV1aWhoGDx4MV1dXuLm54c0330RWVpbO56xcuRLt2rWDq6srJBIJHj16ZJTjlkfFuU45OTkYM2YMqlSpAmdnZ/Tp0wfJyckqZSQSidpjy5YtpjwVq7F06VL4+fnBwcEBwcHBOHHihM7y27Ztg7+/PxwcHNCoUSPs3btXZb8QAjNnzoSPjw8cHR0RGhqKq1evmvIUrJqxr/+IESPU/tbDw8NNeQpWzZDrf/HiRfTp0wd+fn6QSCRYtGhRiY9ZLILIzMLDw0WTJk3EsWPHxJ9//inq1KkjBg0apPM5CxcuFHPmzBFz5swRAMTDhw+NctzyqDjX6Z133hHVqlUT0dHR4p9//hEvvfSSaNWqlUoZAGLNmjUiMTFR+Xjy5IkpT8UqbNmyRdjZ2YnVq1eLixcvilGjRgk3NzeRnJyssfzff/8tbG1txZdffiliY2PFRx99JCpWrCjOnz+vLPPFF18IqVQqdu7cKc6dOyd69OghatasyeutgSmu//Dhw0V4eLjK33paWlppnZJVMfT6nzhxQkyePFls3rxZyGQysXDhwhIfszgYLJFZxcbGCgDi5MmTym2//fabkEgk4u7du0U+/9ChQxqDpZIet7woznV69OiRqFixoti2bZtyW1xcnAAgYmJilNsAiB07dpis7tYqKChIjBkzRvlzQUGB8PX1FXPmzNFYvn///qJbt24q24KDg8Xbb78thBBCLpcLmUwm5s2bp9z/6NEjYW9vLzZv3myCM7Buxr7+QhQGSz179jRJfcsaQ6//s2rUqKExWCrJMfXFbjgyq5iYGLi5uaFly5bKbaGhobCxscHx48ct7rhlTXGu06lTp/D06VOEhoYqt/n7+6N69eqIiYlRKTtmzBh4eHggKCgIq1evhijnad3y8vJw6tQplWtnY2OD0NBQtWunEBMTo1IeAMLCwpTlExISkJSUpFJGKpUiODhY6zHLK1Ncf4XDhw/Dy8sL9evXR0REBFJTU41/AlauONffHMfUhAvpklklJSXBy8tLZVuFChXg7u6OpKQkiztuWVOc65SUlAQ7Ozu4ubmpbPf29lZ5zscff4wOHTrAyckJ+/btw+jRo5GVlYVx48YZ/TysxYMHD1BQUABvb2+V7d7e3rh06ZLG5yQlJWksr7jWin91laFCprj+ABAeHo7XXnsNNWvWRHx8PKZPn44uXbogJiYGtra2xj8RK1Wc62+OY2rCYIlMYurUqZg7d67OMnFxcaVUm/LHEq7/jBkzlP9v1qwZsrOzMW/evHIdLFHZNHDgQOX/GzVqhMaNG6N27do4fPgwOnbsaMaakbEwWCKTmDRpEkaMGKGzTK1atSCTyZCSkqKyPT8/H2lpaZDJZMV+fVMd11qY8vrLZDLk5eXh0aNHKq1LycnJOq9tcHAwPvnkE+Tm5pbbdbY8PDxga2urNnNQ17WTyWQ6yyv+TU5Oho+Pj0qZpk2bGrH21s8U11+TWrVqwcPDA9euXWOw9IziXH9zHFMTjlkik/D09IS/v7/Oh52dHUJCQvDo0SOcOnVK+dyDBw9CLpcjODi42K9vquNaC1Ne/xYtWqBixYqIjo5Wbrt8+TJu3bqFkJAQrXU6e/YsKleuXG4DJQCws7NDixYtVK6dXC5HdHS01msXEhKiUh4A9u/fryxfs2ZNyGQylTIZGRk4fvy4zt9HeWSK66/JnTt3kJqaqhK8UvGuvzmOqZHRhooTFVN4eLho1qyZOH78uPjrr79E3bp1Vaau37lzR9SvX18cP35cuS0xMVGcOXNGrFq1SgAQf/zxhzhz5oxITU3V+7hUqDjX/5133hHVq1cXBw8eFP/8848ICQkRISEhyv27d+8Wq1atEufPnxdXr14V3377rXBychIzZ84s1XOzRFu2bBH29vZi7dq1IjY2Vrz11lvCzc1NJCUlCSGEGDp0qJg6daqy/N9//y0qVKgg5s+fL+Li4sSsWbM0pg5wc3MTu3btEv/++6/o2bMnUwdoYezrn5mZKSZPnixiYmJEQkKCOHDggGjevLmoW7euyMnJMcs5WjJDr39ubq44c+aMOHPmjPDx8RGTJ08WZ86cEVevXtX7mMbAYInMLjU1VQwaNEg4OzsLV1dXMXLkSJGZmancn5CQIACIQ4cOKbfNmjVLAFB7rFmzRu/jUqHiXP8nT56I0aNHi8qVKwsnJyfRu3dvkZiYqNz/22+/iaZNmwpnZ2dRqVIl0aRJE7F8+XJRUFBQmqdmsb755htRvXp1YWdnJ4KCgsSxY8eU+9q2bSuGDx+uUn7r1q2iXr16ws7OTjRs2FD8+uuvKvvlcrmYMWOG8Pb2Fvb29qJjx47i8uXLpXEqVsmY1//x48eic+fOwtPTU1SsWFHUqFFDjBo1yqg36rLGkOuv+Px5/tG2bVu9j2kMEiHK+VxeIiIiIh04ZomIiIhIBwZLRERERDowWCIiIiLSgcESERERkQ4MloiIiIh0YLBEREREpAODJSIiIiIdGCwRkdFJJBLs3LlTZ5kRI0agV69epVIfU/Hz88OiRYvMXQ2jmD17Nry9vfX63RGVNwyWiKzEiBEjIJFIIJFIYGdnhzp16uDjjz9Gfn6+sowQAitXrkRwcDCcnZ3h5uaGli1bYtGiRXj8+DEA4OLFi+jTpw/8/PwgkUhMcrNPTExEly5dAAA3btyARCLB2bNnjf465nby5Em89dZb5q5GicXFxSEyMhIrVqxQ+d0979atW+jWrRucnJzg5eWF999/X+XvT5MePXqgevXqcHBwgI+PD4YOHYp79+4p9x8+fBg9e/aEj48PKlWqhKZNm2Ljxo1GPT+ikmKwRGRFwsPDkZiYiKtXr2LSpEmYPXs25s2bp9w/dOhQjB8/Hj179sShQ4dw9uxZzJgxA7t27cK+ffsAAI8fP0atWrXwxRdfGHVV7mfJZLIyvWBuXl4egMIFi52cnMxcm5KLj48HAPTs2VPr766goADdunVDXl4ejh49ih9++AFr167FzJkzdR67ffv22Lp1Ky5fvozt27cjPj4effv2Ve4/evQoGjdujO3bt+Pff//FyJEjMWzYMPzyyy/GPUmikjDq4ilEZDLDhw8XPXv2VNnWqVMn8dJLLwkhhPjxxx8FALFz506158rlcvHo0SO17TVq1BALFy7U+bpyuVx4eHiIbdu2Kbc1adJEyGQy5c9//vmnsLOzE9nZ2UIIIQCIHTt2KP8PDWs6Kc5n3rx5QiaTCXd3dzF69GiRl5entS7Xrl0TPXr0EF5eXqJSpUqiZcuWYv/+/VrLX758WQAQcXFxKtsXLFggatWqJYQQIj8/X7zxxhvCz89PODg4iHr16olFixaplFfU9dNPPxU+Pj7Cz89P4/X76quvRGBgoHBychIvvPCCiIiIUFlnb82aNUIqlYqoqCjh7+8vKlWqJMLCwsS9e/dUXu/7778XAQEBws7OTshkMjFmzBjlvocPH4o333xTeHh4CBcXF9G+fXtx9uxZrddACCH+/fdf0b59e+Hg4CDc3d3FqFGjlPXStM6iJnv37hU2NjYqa54tW7ZMuLq6itzcXJ2v/6xdu3YJiUSi8/fctWtXMXLkSL2PSWRqbFkismKOjo7KVo6NGzeifv366Nmzp1o5iUQCqVRarNeQSCRo06YNDh8+DAB4+PAh4uLi8OTJE1y6dAkAcOTIEbz44osaW1lOnDgBADhw4AASExPx888/K/cdOnQI8fHxOHTokLKlYu3atVrrkpWVha5duyI6OhpnzpxBeHg4unfvjlu3bmksX69ePbRs2VKtW2fjxo14/fXXAQByuRwvvPACtm3bhtjYWMycORPTp0/H1q1bVZ4THR2Ny5cvY//+/VpbPWxsbLB48WJcvHgRP/zwAw4ePIgpU6aolHn8+DHmz5+P9evX448//sCtW7cwefJk5f5ly5ZhzJgxeOutt3D+/Hns3r0bderUUe7v168fUlJS8Ntvv+HUqVNo3rw5OnbsiLS0NI11ys7ORlhYGCpXroyTJ09i27ZtOHDgAMaOHQsAmDx5MtasWQOgsPs0MTFR43FiYmLQqFEjeHt7K7eFhYUhIyMDFy9e1Pic56WlpWHjxo1o1aoVKlasqLVceno63N3d9TomUakwd7RGRPp5tmVJLpeL/fv3C3t7ezF58mQhhBANGjQQPXr0MOiY+rQsCSHE4sWLRcOGDYUQQuzcuVMEBweLnj17imXLlgkhhAgNDRXTp09XlsczLUuKVcPPnDmjdj41atQQ+fn5ym39+vUTAwYMMOgcGjZsKL755hut+xcuXChq166t/Flba9OzxowZI/r06aNSV29vb7UWlKKu37Zt20SVKlWUP69Zs0YAENeuXVNuW7p0qfD29lb+7OvrKz788EONx/vzzz+Fq6uryMnJUdleu3ZtsWLFCo3PWblypahcubLIyspSbvv1119VWol27NihtUVJYdSoUaJz584q27KzswUAsXfvXp3PnTJlinBychIAxEsvvSQePHigteyPP/4o7OzsxIULF3Qek6g0sWWJyIr88ssvcHZ2hoODA7p06YIBAwZg9uzZAAoHd5tK27ZtERsbi/v37+PIkSNo164d2rVrh8OHD+Pp06c4evQo2rVrZ/BxGzZsCFtbW+XPPj4+SElJ0Vo+KysLkydPRoMGDeDm5gZnZ2fExcVpbVkCgIEDB+LGjRs4duwYgMJWpebNm8Pf319ZZunSpWjRogU8PT3h7OyMlStXqh2zUaNGsLOz03k+Bw4cQMeOHVG1alW4uLhg6NChSE1NVQ6uBwAnJyfUrl1b4zmnpKTg3r176Nixo8bjnzt3DllZWahSpQqcnZ2Vj4SEBOW4o+fFxcWhSZMmqFSpknLbyy+/DLlcjsuXL+s8H2N5//33cebMGezbtw+2trYYNmyYxr/XQ4cOYeTIkVi1ahUaNmxYKnUj0kcFc1eAiPTXvn17LFu2DHZ2dvD19UWFCv+9hevVq6fsFjO2Ro0awd3dHUeOHMGRI0fw2WefQSaTYe7cuTh58iSePn2KVq1aGXzc57tiJBIJ5HK51vKTJ0/G/v37MX/+fNSpUweOjo7o27evsitSE5lMhg4dOmDTpk146aWXsGnTJkRERCj3b9myBZMnT8ZXX32FkJAQuLi4YN68eTh+/LjKcZ4NNjS5ceMGXn31VUREROCzzz6Du7s7/vrrL7z55pvIy8tTdlFqOmdF4ODo6KjzNbKysuDj46PsEn2Wm5ubzueWlEwmU3apKiQnJyv36eLh4QEPDw/Uq1cPDRo0QLVq1XDs2DGEhIQoyxw5cgTdu3fHwoULMWzYMOOfAFEJsGWJyIpUqlQJderUQfXq1VUCJQB4/fXXceXKFezatUvteUIIpKenF/t1JRIJXnnlFezatQsXL15E69at0bhxY+Tm5mLFihVo2bKl1mBC0RpTUFBQ7NdX+PvvvzFixAj07t0bjRo1gkwmw40bN4p83uDBg/Hjjz8iJiYG169fx8CBA1WO2apVK4wePRrNmjVDnTp1tLbS6HLq1CnI5XJ89dVXeOmll1CvXj2VKfL6cHFxgZ+fH6KjozXub968OZKSklChQgXUqVNH5eHh4aHxOQ0aNMC5c+eQnZ2t3Pb333/DxsYG9evX17tuISEhOH/+vErL3/79++Hq6oqAgAC9j6MIhnNzc5XbDh8+jG7dumHu3LllIhUDlT0MlojKiP79+2PAgAEYNGgQPv/8c/zzzz+4efMmfvnlF4SGhuLQoUMACqe9nz17FmfPnkVeXh7u3r2Ls2fP4tq1azqP365dO2zevBlNmzaFs7MzbGxs0KZNG2zcuBFt27bV+jwvLy84OjoiKioKycnJJQra6tati59//hlnz57FuXPn8Prrr+tsiVJ47bXXkJmZiYiICLRv3x6+vr4qx/znn3/w+++/48qVK5gxYwZOnjxpcN3q1KmDp0+f4ptvvsH169exfv16LF++3ODjzJ49G1999RUWL16Mq1ev4vTp0/jmm28AAKGhoQgJCUGvXr2wb98+3LhxA0ePHsWHH36If/75R+PxBg8eDAcHBwwfPhwXLlzAoUOH8O6772Lo0KEqg7WL0rlzZwQEBGDo0KE4d+4cfv/9d3z00UcYM2aMMtXAiRMn4O/vj7t37wIAjh8/jiVLluDs2bO4efMmDh48iEGDBqF27drKVqVDhw6hW7duGDduHPr06YOkpCQkJSVpHbBOZBbmHTJFRPrSlDrgeQUFBWLZsmXixRdfFE5OTsLV1VW0aNFCfP311+Lx48dCiP8GXD//UEzp1+bMmTMCgPjggw+U2xYuXCgAiKioKJWyeGaAtxBCrFq1SlSrVk3Y2NiopQ541nvvvaezHgkJCaJ9+/bC0dFRVKtWTSxZskS0bdtWvPfeezrrLoQQ/fv3FwDE6tWrVbbn5OSIESNGCKlUKtzc3ERERISYOnWqaNKkibKMtmv//ADvBQsWCB8fH+Ho6CjCwsLEunXrBADx8OFDIcR/qQOepWlw9fLly0X9+vVFxYoVhY+Pj3j33XeV+zIyMsS7774rfH19RcWKFUW1atXE4MGDxa1bt7Seu67UAdrqoMmNGzdEly5dhKOjo/Dw8BCTJk0ST58+Ve4/dOiQACASEhJUXtfd3V3Y29sLPz8/8c4774g7d+4onzN8+PBi/T0SlSaJECYcFUpERERk5dgNR0RERKQDgyUiIiIiHRgsEREREenAYImIiIhIBwZLRERERDowWCIiIiLSgcESERERkQ4MloiIiIh0YLBEREREpAODJSIiIiIdGCwRERER6cBgiYiIiEiH/wOfzEf6BgryOAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/modeling_utils.py:3437: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", + " warnings.warn(\n", + "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1899: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", + " warnings.warn(\n", + "You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", + "Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embeddings shape: (68, 1024)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHHCAYAAACvJxw8AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAZLhJREFUeJzt3Xl8TOf+B/DPZI8sEyHJJIjYk9hSSySoNSRo7VtKlbrcKqVFi1ZL3KqtlG62W7T2qy2qVQRBEftSJFJL1JYFIYmQReb5/eE3U5PMTGaSmcnM5PN+vfJq55znnPmek8mcr2eVCCEEiIiIiEgtm/IOgIiIiMicMVkiIiIi0oLJEhEREZEWTJaIiIiItGCyRERERKQFkyUiIiIiLZgsEREREWnBZImIiIhICyZLRERERFowWSKqQCQSCWbOnFneYag4efIkWrduDRcXF0gkEpw7d668Q7Iq5vI7N1Yca9euRWBgIOzt7eHh4WHw8xMBTJaIdLZmzRpIJBLlj5OTE+rXr49x48YhLS2tWPm0tDRMnjwZgYGBqFSpElxcXNC8eXN8+umnePTokdr3CA0NhUQiwdKlS3WO68aNGypx2drawt/fH3369DFY4pGQkICZM2fixo0bBjmfQkFBAQYMGICMjAx88cUXWLt2LWrWrGnQ9yiNu3fvYubMmaW6f9ru1fDhw1V+V4qfwMDAsgddAV2+fBnDhw9HnTp1sHLlSqxYsQKAfvdZLpdj/vz5qFWrFpycnNCkSRNs3LjR1JdCZs6uvAMgsjSzZs1CrVq1kJubi8OHD2Pp0qXYuXMnLl68iEqVKgF4XlvSvXt3PH78GEOHDkXz5s0BAKdOncLcuXNx6NAh7NmzR+W8V65cwcmTJxEQEID169djzJgxesUVHR2N7t27o7CwEImJiVi6dCl+//13HDt2DCEhIWW65oSEBMTExKBDhw4ICAgo07ledO3aNfz9999YuXIl/vWvfxnsvGV19+5dxMTEICAgQO97V9K9cnR0xH//+1+VbVKptAzRWoanT5/Czs6wj5wDBw5ALpdjyZIlqFu3rso+Xe/zRx99hLlz52LUqFFo2bIltm/fjtdeew0SiQSDBw82aLxkuZgsEempW7duaNGiBQDgX//6F6pUqYJFixZh+/btiI6OxqNHj9CnTx/Y2tri7Nmzxf41O3v2bKxcubLYedetWwdvb28sXLgQ/fv3x40bN/RKTJo1a4ahQ4cqX7dp0wY9e/bE0qVLsXz58tJdrJGlp6cDgNGbT4QQyM3NhbOzs1HfRxd2dnYqv6eKwsnJqcQyOTk5cHFx0fmc2j4/utznO3fuYOHChRg7diy+/vprAM//ptu3b4/3338fAwYMgK2trc7xkPViMxxRGXXq1AkAkJycDABYvnw57ty5g0WLFqmt9vfx8cH06dOLbd+wYQP69++PV155BVKpFBs2bDBoXJqcPXsW3bp1g7u7O1xdXdG5c2ccO3ZMuX/NmjUYMGAAAKBjx47KJo0DBw5oPe/+/fvx8ssvw8XFBR4eHujVqxcSExOV+4cPH4727dsDAAYMGACJRIIOHTpoPJ+iGfTQoUP497//jSpVqsDd3R3Dhg3Dw4cPVcoGBATglVdewe7du9GiRQs4OzsrE8br169jwIAB8PT0RKVKlRAWFobffvtNeeyBAwfQsmVLAMCIESOU17tmzRqt16vPvSosLERWVlaJ59Nk9erVkEgkWLVqlcr2zz77DBKJBDt37iz1uQ8cOKA2ZkVz74v3Yfjw4XB1dcWdO3fQu3dvuLq6wsvLC5MnT0ZhYaHK8UX7LM2cORMSiQQJCQl47bXXULlyZbRt21a5f926dWjevDmcnZ3h6emJwYMH49atW8r9AQEBmDFjBgDAy8tLbZ+oku7z9u3bUVBQgLffflslzjFjxuD27duIj48v6XZRBcFkiaiMrl27BgCoUqUKAOCXX36Bs7Mz+vfvr/M5jh8/jqtXryI6OhoODg7o27cv1q9fb9C41Ll06RJefvllnD9/Hh988AE+/vhjJCcno0OHDjh+/DgAoF27dhg/fjwA4MMPP8TatWuxdu1aBAUFaTzv3r17ERkZifT0dMycORMTJ07E0aNH0aZNG2Vfnn//+9/48MMPAQDjx4/H2rVr8dFHH5V4XePGjUNiYiJmzpyJYcOGYf369ejduzeEECrlkpKSEB0djS5dumDJkiUICQlBWloaWrdujd27d+Ptt9/G7NmzkZubi549e2Lr1q0AgKCgIMyaNQsAMHr0aOX1tmvXrsTYdLlXT548gbu7O6RSKTw9PTF27Fg8fvy4xHO/aMSIEXjllVcwceJEZQJx4cIFxMTEYOTIkejevbte5yuLwsJCREZGokqVKvj888/Rvn17LFy4UNl/qCQDBgzAkydP8Nlnn2HUqFEAnte+Dhs2DPXq1cOiRYvw7rvvYt++fWjXrp2yv9/ixYvRp08fAMDSpUuxdu1a9O3bV3leXe7z2bNn4eLiUuyzHBoaqtxPBAAQRKST1atXCwBi79694t69e+LWrVti06ZNokqVKsLZ2Vncvn1bCCFE5cqVRdOmTfU697hx40SNGjWEXC4XQgixZ88eAUCcPXu2xGOTk5MFABETEyPu3bsnUlNTxYEDB8RLL70kAIiffvpJWRaAmDFjhvJ17969hYODg7h27Zpy2927d4Wbm5to166dctuWLVsEABEXF6fT9YSEhAhvb2/x4MED5bbz588LGxsbMWzYMOW2uLg4AUBs2bKlxHMq7n/z5s1Ffn6+cvv8+fMFALF9+3bltpo1awoAYteuXSrnePfddwUA8ccffyi3ZWdni1q1aomAgABRWFgohBDi5MmTAoBYvXq1Ttf7Im33aurUqWLKlCli8+bNYuPGjeKNN94QAESbNm1EQUGBXu+TkpIiPD09RZcuXUReXp546aWXhL+/v8jMzFQpV/R3XhLF76Ro/IrP2Yv3RBH/rFmzVMq+9NJLonnz5lrjmDFjhgAgoqOjVcrduHFD2NraitmzZ6tsv3DhgrCzs1PZrjjHvXv3VMrqep979OghateuXewe5OTkCABi6tSpxW8QVUisWSLSU0REBLy8vFCjRg0MHjwYrq6u2Lp1K6pVqwYAyMrKgpubm87ne/bsGTZv3oxBgwZBIpEAeN6E5u3trVft0owZM+Dl5QWZTIYOHTrg2rVrmDdvnsq/tl9UWFiIPXv2oHfv3qhdu7Zyu6+vL1577TUcPny4VE1FKSkpOHfuHIYPHw5PT0/l9iZNmqBLly5laiICntf22NvbK1+PGTMGdnZ2xc5bq1YtREZGqmzbuXMnQkNDVZp7XF1dMXr0aNy4cQMJCQlliq0kc+bMwdy5czFw4EAMHjwYa9aswezZs3HkyBH8+OOPep1LJpPhm2++QWxsLF5++WWcO3cOq1atgru7u5Gi1+ytt95Sef3yyy/j+vXrpTr2559/hlwux8CBA3H//n3lj0wmQ7169RAXF1fiOXW9z0+fPoWjo2Ox4xX9q54+farTNZD1Y7JEpCfFAyouLg4JCQm4fv26ykPZ3d0d2dnZOp9vz549uHfvHkJDQ3H16lVcvXoVycnJ6NixIzZu3Ai5XK7TeUaPHo3Y2Fjs27cPp0+fRnp6Oj744AON5e/du4cnT56gQYMGxfYFBQVBLper9BHR1d9//w0AGs97//595OTk6H1ehXr16qm8dnV1ha+vb7Gh+rVq1VIbm6a4FPtN7b333oONjQ327t2r97GDBw9Gjx49cOLECYwaNQqdO3c2QoTaOTk5wcvLS2Vb5cqVi/Uj06To7+nKlSsQQqBevXrw8vJS+UlMTFR26taXuvvs7OyMvLy8YmVzc3OV+4kAjoYj0ltoaKhyNJw6gYGBOHfuHPLz8+Hg4FDi+RS1RwMHDlS7/+DBg+jYsWOJ56lXrx4iIiJKLFdRWMqDztnZGVWqVEFGRobexz548ACnTp0C8HzKArlcDhubsv0bWFG7WVTRDtsKZR0tVvT3JJfLIZFI8Pvvv6s9t6ura6nfp+h99vX1RVxcHIQQKtedkpICAPDz8yvVe5H1Yc0SkYG9+uqrePr0KX766acSy+bk5GD79u0YNGgQtmzZUuzH19e3zB29NfHy8kKlSpWQlJRUbN/ly5dhY2ODGjVqAND8AFVHMamkpvNWrVpVr+HhRV25ckXl9ePHj5GSkqLTNAs1a9bUGJdiP6Df9Ral77HZ2dm4f/9+sdoZXYwdOxbZ2dmYM2cODh8+jMWLF+t9jqIqV64MAMUmTjVVrVudOnUghECtWrUQERFR7CcsLKxU51V3n0NCQvDkyROVUZoAlIMbyjo/GVkPJktEBvbWW2/B19cXkyZNwl9//VVsf3p6Oj799FMAwNatW5GTk4OxY8eif//+xX5eeeUV/PTTT2qbCsrK1tYWXbt2xfbt21WasNLS0rBhwwa0bdtW2f9Fkdxomnn8Rb6+vggJCcH333+vUv7ixYvYs2dPmUdqrVixAgUFBcrXS5cuxbNnz9CtW7cSj+3evTtOnDihMiQ8JycHK1asQEBAAIKDgwHod71FaTo2NzdXbfPsf/7zHwghEBUVpdf7/Pjjj9i8eTPmzp2LqVOnYvDgwZg+fbraz5w+atasCVtbWxw6dEhl+7ffflum8+qqb9++sLW1RUxMTLERjkIIPHjwQOvx+tznXr16wd7eXuXahBBYtmwZqlWrhtatW5fxashasBmOyMAqV66MrVu3onv37ggJCVGZwfvMmTPYuHEjwsPDATxvgqtSpYrGL+WePXti5cqV+O233zR21C6LTz/9FLGxsWjbti3efvtt2NnZYfny5cjLy8P8+fOV5UJCQmBra4t58+YhMzMTjo6Oyk7o6ixYsADdunVDeHg4Ro4ciadPn+Krr76CVCot8/pg+fn56Ny5MwYOHIikpCR8++23aNu2LXr27FnisVOnTsXGjRvRrVs3jB8/Hp6envj++++RnJyMn376SdmEVadOHXh4eGDZsmVwc3ODi4sLWrVqpbYfVFGa7tWTJ0/w0ksvITo6Wjn/1u7du7Fz505ERUWhV69eOt+D9PR0jBkzBh07dsS4ceMAAF9//TXi4uIwfPhwHD58uNTNcVKpFAMGDMBXX30FiUSCOnXq4Ndffy11XyF91alTB59++immTZuGGzduoHfv3nBzc0NycjK2bt2K0aNHY/LkyRqPT01N1fk+V69eHe+++y4WLFiAgoICtGzZEtu2bcMff/yB9evXc0JK+kf5DcQjsiyKoesnT57Uqfzdu3fFe++9J+rXry+cnJxEpUqVRPPmzcXs2bNFZmamSEtLE3Z2duL111/XeI4nT56ISpUqiT59+mgsoxjSvWDBghJjgpph5GfOnBGRkZHC1dVVVKpUSXTs2FEcPXq02LErV64UtWvXFra2tjpNI7B3717Rpk0b4ezsLNzd3cWrr74qEhISVMqUZuqAgwcPitGjR4vKlSsLV1dXMWTIEJUpCoR4PnVAjx491J7n2rVron///sLDw0M4OTmJ0NBQ8euvvxYrt337dhEcHCzs7Oz0nkZA3b16+PChGDp0qKhbt66oVKmScHR0FA0bNhSfffaZylQIuujbt69wc3MTN27cKBYzADFv3jzlNnW/85Lcu3dP9OvXT1SqVElUrlxZ/Pvf/xYXL15UO3WAi4tLseMVQ/pfVDQOTcP+FX766SfRtm1b4eLiIlxcXERgYKAYO3asSEpK0noOfe9zYWGh+Oyzz0TNmjWFg4ODaNiwoVi3bp2ut4oqCIkQReo5iYjM0Jo1azBixAicPHlSawd7IiJDY58lIiIiIi3YZ4mISAdPnz5FZmam1jKenp46TRehTWpqqtb9zs7OkEqlep+3sLAQ9+7d01rG1dW11EPziawZkyUiIh1s3rwZI0aM0FomLi5O62LAuvD19dW6/4033tBpUd+ibt26VWIH9RkzZpS5Az6RNWKfJSIiHaSkpODSpUtayzRv3lw5T1FplTSTt5+fn3KKA33k5ubi8OHDWsvUrl1bZekbInqOyRIRERGRFuzgTURERKQF+ywZgFwux927d+Hm5lamZRKIiIjIdIQQyM7Ohp+fn9aJXJksGcDdu3eVa2gRERGRZbl16xaqV6+ucT+TJQNwc3MD8PxmK9bSIiIiIvOWlZWFGjVqKJ/jmjBZMgBF05u7uzuTJSIiIgtTUhcadvAmIiIi0oLJEhEREZEWTJaIiIiItGCyRERERKQFkyUiIiIiLZgsEREREWnBZImIiIhICyZLRERERFowWSIiIiLSgjN4E5FZKJQLnEjOQHp2LrzdnBBayxO2NlyYmojKH5MlIip3uy6mIGZHAlIyc5XbfKVOmPFqMKIa+ZZjZEREbIYjonK262IKxqw7o5IoAUBqZi7GrDuDXRdTyikyIqLnmCwRUbkplAvE7EiAULNPsS1mRwIK5epKEBGZBpMlIio3J5IzitUovUgASMnMxYnkDNMFRURUBJMlIio36dmaE6XSlCMiMgYmS0RUbrzdnAxajojIGJgsEVG5Ca3lCV+pEzRNECDB81FxobU8TRkWEZEKJktEVG5sbSSY8WowABRLmBSvZ7wazPmWiKhcMVkionIV1cgXS4c2g0yq2tQmkzph6dBmnGeJiModJ6UkonIX1cgXXYJlnMGbiMwSkyUiMgu2NhKE16lS3mEQERXDZjgiIiIiLZgsEREREWnBZjgiMqlCuWDfJCKyKEyWiMhkdl1MQcyOBJUlTnylTpjxajBHvRGR2WIzHBGZxK6LKRiz7kyxteBSM3MxZt0Z7LqYUk6RERFpx2SJiIyuUC4QsyMBQs0+xbaYHQkolKsrQURUvpgsEZHRnUjOKFaj9CIBICUzFyeSM0wXFBGRjpgsEZHRpWdrTpRKU46IyJSYLBGR0Xm7OZVcSI9yRESmxGSJiIwutJYnfKVOxRbLVZDg+ai40FqepgyLiEgnTJaIyOhsbSSY8WowABRLmBSvZ7wazPmWiMgsMVkiIpOIauSLpUObQSZVbWqTSZ2wdGgzzrNERGaLk1ISkclENfJFl2AZZ/AmIovCZImoAjCnJUZsbSQIr1NFJbb4aw/MIjYiInWYLBFZOXNeYsScYyMiUmCfJSIrZs5LjJhzbEREL2KyRGSlzHmJEXOOjYioKCZLRFbKnJcYMefYiIiKYrJEZKXMeYkRc46NiKgoJktEVsqclxgx59iIiIpiskRkpcx5iRFzjo2IqCgmS0RWypyXGDHn2IiIimKyRGTFzHmJEXOOjYjoRRIhBMfmllFWVhakUikyMzPh7u5e3uEQFWNOM3gXZc6xEZF10/X5zRm8iSqAokuMmBNzjo2ICGCyRERkcqxNI7IsTJaIiEyI6+ERWR528CayQoVygfhrD7D93B3EX3vAZUPMBNfDI7JMrFkisjJlqblg85DxlLQengTP18PrEizjPScyM0yWiKyIouai6ANZUXOhbUg+m4eMS5/18Njhnci8sBmOyEqUVHMBPK+5UNckZ07NQ9bahMj18IgsF2uWiMycrk1jpa25MKfmIWuu3eJ6eESWi8kSkRnTJ3kobc2FuTQPlaUJ0RKE1vKEzN0RqVl5avdL8Hz2cq6HR2R+LK4Z7ptvvkFAQACcnJzQqlUrnDhxQmv5LVu2IDAwEE5OTmjcuDF27typ3FdQUIApU6agcePGcHFxgZ+fH4YNG4a7d+8a+zKISqRv01hpay7MoXmoLE2IliI2IRW5z+Rq93E9PCLzZlHJ0ubNmzFx4kTMmDEDZ86cQdOmTREZGYn09HS15Y8ePYro6GiMHDkSZ8+eRe/evdG7d29cvHgRAPDkyROcOXMGH3/8Mc6cOYOff/4ZSUlJ6Nmzpykvi6iY0iQPobU84St1KrYwrYIEz2ulitZcmEPzkD61W5ZIkfg+elKgdr+0kr3F15wRWTOLSpYWLVqEUaNGYcSIEQgODsayZctQqVIlrFq1Sm35JUuWICoqCu+//z6CgoLwn//8B82aNcPXX38NAJBKpYiNjcXAgQPRoEEDhIWF4euvv8bp06dx8+ZNU14akYrSJA+2NhLMeDUYAIolTNpqLkqbZBmSOdRuGYu2xFfB2d4WXYJlJouJiPRjMclSfn4+Tp8+jYiICOU2GxsbREREID4+Xu0x8fHxKuUBIDIyUmN5AMjMzIREIoGHh4dB4iYqjdImD1GNfLF0aDPIpKq1QDKpk8aai9ImWYZkDrVbxlJS4gtYdq2ZpbHW0ZZkXBbTwfv+/fsoLCyEj4+PynYfHx9cvnxZ7TGpqalqy6empqotn5ubiylTpiA6Olrr6sN5eXnIy/unk2ZWVpaul0Gkk7IkD1GNfNElWKbX5JKKJKtoZ3KZlpFohpzAUlG7lZqZq7YGxpI7P1tzrZmlsebRlmRcFpMsGVtBQQEGDhwIIQSWLl2qteycOXMQExNjosioIipr8mBrI9F75Jo+SZahHzqK2q0x685AAqhcs6V3frbmWjNLYu2jLcm4LKYZrmrVqrC1tUVaWprK9rS0NMhk6tv6ZTKZTuUVidLff/+N2NhYrbVKADBt2jRkZmYqf27dulWKKyLSrLyaxhRJVq+QagivU0VjomSMCSxL04RoCcyhT1hFVxFGW5JxWUyy5ODggObNm2Pfvn3KbXK5HPv27UN4eLjaY8LDw1XKA0BsbKxKeUWidOXKFezduxdVqpT8r3FHR0e4u7ur/BAZmjkmD8Z+6EQ18sXhKZ2wcVQYlgwOwcZRYTg8pZPO12qO/VHMoU9YRWftoy3J+CyqGW7ixIl444030KJFC4SGhmLx4sXIycnBiBEjAADDhg1DtWrVMGfOHADAhAkT0L59eyxcuBA9evTApk2bcOrUKaxYsQLA80Spf//+OHPmDH799VcUFhYq+zN5enrCwcGhfC6U6P+Vpv+RMZliAkt9mhBf7Dd14/4TbDxxE6lZqk2DH/cIRmUXh3K9f6XpE0aGw35jVFYWlSwNGjQI9+7dwyeffILU1FSEhIRg165dyk7cN2/ehI3NP5VlrVu3xoYNGzB9+nR8+OGHqFevHrZt24ZGjRoBAO7cuYNffvkFABASEqLyXnFxcejQoYNJrotIm9L0P9KkaKfs5jUr4/TfD3VOJMzpoaOu31RRKZm5eHvDGZVt5dWh19wS34qE/caorCRCiPKvp7ZwWVlZkEqlyMzMZJMcmS11yYWNBHixpaqkRCL+2gNErzxW4nttHBVWLkuj6EKRmlhyPyjST6FcoO28/SUOmDg8pROT1wpG1+e3xfRZIqLS09Qpu2iXnpI6aZtDZ2VdJnnUhh16Kx72G6OyYrJEZOX0SS5KSiTM4aGjyySPJWGH3orHHAdMkOWwqD5LRKQ/fZOLkjppl3dnZUP2h2KH3oqF/caotJgsEVm50iYE2o4rz4eOITvhskNvxWPIARNUcTBZIrJypU0ISjquvB46Jc1urgtLXj6FNDPkEjxEL2KyRGTl9E0uzD2R0LY0ii7Yodc6cd03MiZ28Caycto6ZRdlKYmExs667o54L6Kecvbvb197Cb7s0Gv1jLUED5EC51kyAM6zRJbAEPMsmRtdml3YNFM25n7/FHMoaRrEwDmUSBtdn99shiOqINR1ytZ3Bm9zo0u/KXboLT1LaNoyxRI8REyWiCoQdYkDHyCkjqZZ0hVNW+bSlGlOS/CQ9WKfJSIiUqFtIlNzmwGd676RKTBZIiIiFfo0bZU3c1iCh6wfkyUiMnuFcoH4aw+w/dwdxF97YBY1GtbMkpq2zGEJHrJ+7LNEVArmPkLImlhCJ2NrY2lNW+W9BA9ZPyZLRHriw9t0LKWTsbUpaSJTc5y4lOu+kTGxGY5ID5z8znBKalqzpE7G1sZSm7YUoz17hVRDeJ0qZhcfWS7WLBHpqKSHtwTPH95dgmX8ki6BLrVznD+nfLFpSzs2xVcsTJaIdMSHt2Ho2rRmSZ2MrRWbttRjU3zFw2Y4Ih3x4V12+jStWVonY2vFpi1VbIqvmJgsEemID++y06d2Tt/5czi9ABkb+9FVXGyGI9KRJY4QMjf61M4pOhmPWXcGEkDlnhftZMxmETIFNsVXXKxZItKRpY4QMif61s4pOhnLpKrHyaROyr5NbBYhU2FTfMXFmiUiPXCEUNmUpnZOWydjjlAkU2JTfMWld7JUWFgIW1tb5esTJ05ALpfjpZdegqOjo0GDIzJHHCFUevo0rRU9Tl2zBptFOITdlNgUX3Hp3Az3999/o0WLFnB0dES3bt2QlZWFLl26ICwsDK1bt0ZwcDD++usvY8ZKZDbKe4SQJXdm1qVpTVcVvVlk18UUtJ23H9Erj2HCpnOIXnkMbeftZ9OjkbApvuKSCCF0+pbt378/7t+/j8mTJ2Pt2rW4c+cO7O3tsW7dOtjY2GDEiBFwdnbG1q1bjR2z2cnKyoJUKkVmZibc3d3LOxyyctbSmdkQNSLx1x4geuWxEsttHBVmdTVLmuarUtxBdYkna6EMw1r+Bkn357fOyZK3tzf27NmDkJAQZGZmonLlyjh06BDatm0LADhz5gy6d++O1NRUw1yBBWGyRKZSmgekNSuUC7Sdt7/EZpHDUzpZVVKguG5NTZDqrruiPOBNlRAy8bQOuj6/de6zlJubC6lUCgBwc3ODra0t3NzclPvd3d3x5MmTMoRMRNqwM3Nxpe0DZen07atVURYkNmVCqKkfHVknnfssNWzYEKtWrQIAfP/996hSpQo2bdqk3L9x40bUr1/f8BESEQD9HpAVgaLfVt4zOd6NqAcfd9UBJqXpA2Up9OmrVVEmUuQUEmRMOtcszZw5E71798b8+fNhY2OD3bt3Y9SoUdi/fz9sbGxw8uRJbNiwwZixElVoFb0z84vU1SDI3J3wXkR9BFStZPXNIvoMYa8IIwZZ60rGpnPNUmRkJBITE7FhwwYkJiaiXbt2OHToECIjI9GmTRvExsZi4MCBxoyVqELjHC/PaapBSMvKxeK9f8HRzsbq1zDTZymYipBks9aVjE2veZYCAgIQEBCgfO3j44NZs2YZOiYiUoNzvBi/BsFSOu3q01erIiTZFSEhrKjM5W/SYDN4P3v2DHfv3oW/v7+hTklEL6ionZlfZMwmJUsbLabrbPIVIcmuCAlhRWROf5MGWxvu0qVLqFWrlqFOR0RqGHJCR0tkrBoES+0cHNXIF4endMLGUWFYMjgEG0eF4fCUTiqfg4owkaI+zZJkGcztb5JrwxFZmIq83IoxahAsvXOwLkPYrX1NQ9a6Whdz/JvUOVlq1qyZ1v1Pnz4tczBEpJuKOseLMZqUKsJoMcD6k2xrTwgrEnP8m9Q5WUpISMDgwYM1NrWlpKRwbTgiMipj1CBUpM7B1ppkKzoB5z2T4/MBTQEB3M/JM1pCaC6djq2VOf5N6pwsNWrUCK1atcKYMWPU7j937hxWrlxpsMCIiNQxdA0COwdbNm2dgI2RGJpTp2NrZY5/kzonS23atEFSUpLG/W5ubmjXrp1BgiIi0saQTUoVYbSYtTL1Mi4VZdmY8maOf5M6L6RLmnEhXSLLpngIAuqb9vgQND+lWUzYkt6vojPV36Suz2+DTR1ARGSpKvqUDJbI1LN2c5Zw0zK3v0lOHUBEBOsfLWZtTN0J2Bw7HVs7c/qbNFiyFBQUhL/++guFhYWGOiURkUlZ62gxa2TqTsDm2Om4IjCXv0mDJUtz5sxBZmamoU5HRCbEodBkaUzdCdgcOx2T6RgsWerdu7ehTkVEJsSh0GSJTD1rN2cJr9hK1cE7MzMTSUlJSEpKYm0SkQUzt/WXiPRh6k7A5tbpmExHr6kD/vvf/2LRokXF5ltq0KABJk2ahJEjRxo8QEvAqQPIEnEoNFkLUzcjs9naeuj6/Na5GW7BggWYOXMmxo8fj8jISPj4+AAA0tLSsGfPHkyYMAEPHz7E5MmTyx49ERmdOa6/RFQapu4EbC6djsl0dE6Wvv76a6xevRoDBw5U2R4UFIQOHTqgadOmeP/995ksEVkIDoUmItKNzn2W0tPT0bhxY437GzdujPv37xskKCIyPg6FJiLSjc7JUsuWLTF37lw8e/as2L7CwkLMmzcPLVu2NGhwRGQ8iqHQmnpaSPB8VByHQhNRRadXM1xkZCRkMhnatWun0mfp0KFDcHBwwJ49e4wWKBEZFodCExHpRq/RcNnZ2Vi3bh2OHTuG1NRUAIBMJkN4eDhee+21CjsSjKPhyJJxniUiqqh0fX7rlSyRekyWyNKZYig0h1sTkbkx+NQBRGT+SpuQGHsoNGuviMiSMVkishLmmpAoZgkvWoWtmCWcMx8Tkbkr1XInRGRezHXZkkK5QMyOBLULjyq2xexIQKGcvQGIyHzplCz9+eefkMvlxo6FCMDzB2z8tQfYfu4O4q894IO0BOackOgzSzgRkbnSqRnupZdeQkpKCry9vVG7dm2cPHkSVapwqncyPHNtSjJn5rxsCWcJJyJroFPNkoeHB5KTkwEAN27cYC0TGYW5NiWZO3NOSHSd/fvG/SdGjoSIqPR0qlnq168f2rdvD19fX0gkErRo0QK2trZqy16/ft2gAVLFUFJTkgTPm5K6BMs43LwIc162JLSWJ2TujkjNytNabtPJmxjXqS5/t0RklnRKllasWIG+ffvi6tWrGD9+PEaNGgU3Nzdjx0YViDk3JZk7xbIlqZm5apNNCQBZOS1bYmsjQXSoP77Ye0VrOf5u/8H5qIjMj85TB0RFRQEATp8+jQkTJjBZIoMy56Ykc2fuy5YEVHXRqRx/t+yzR2Su9J46YPXq1cpE6fbt27h9+7bBg9Lmm2++QUBAAJycnNCqVSucOHFCa/ktW7YgMDAQTk5OaNy4MXbu3KmyXwiBTz75BL6+vnB2dkZERASuXNH+r2AyPHNuSrIEUY18sXRoM8ikqvdHJnUq93mM+LvVDfvsEZkvvZMluVyOWbNmQSqVombNmqhZsyY8PDzwn//8x+gdvzdv3oyJEydixowZOHPmDJo2bYrIyEikp6erLX/06FFER0dj5MiROHv2LHr37o3evXvj4sWLyjLz58/Hl19+iWXLluH48eNwcXFBZGQkcnP5r1xTUjQlaar7kOD5v7DLoynJUkQ18sXhKZ2wcVQYlgwOwcZRYTg8pVO510jwd1syc57+gYhKsTbctGnT8N133yEmJgZt2rQBABw+fBgzZ87EqFGjMHv2bKMECgCtWrVCy5Yt8fXXXwN4nrjVqFED77zzDqZOnVqs/KBBg5CTk4Nff/1VuS0sLAwhISFYtmwZhBDw8/PDpEmTMHnyZABAZmYmfHx8sGbNGgwePFinuLg2nGEo/mUNqG9KKu8aEio9/m61i7/2ANErj5VYbuOoMPbrIjIgXZ/fetcsff/99/jvf/+LMWPGoEmTJmjSpAnefvttrFy5EmvWrClLzFrl5+fj9OnTiIiIUG6zsbFBREQE4uPj1R4THx+vUh4AIiMjleWTk5ORmpqqUkYqlaJVq1YazwkAeXl5yMrKUvmhsjPnpiQqG/5utWOfPSLzpvfacBkZGQgMDCy2PTAwEBkZxpuF9/79+ygsLISPj4/Kdh8fH1y+fFntMampqWrLp6amKvcrtmkqo86cOXMQExOj9zVQyaIa+aJLsIyjgawQf7easV8XkXnTu2apadOmymawF3399ddo2rSpQYIyd9OmTUNmZqby59atW+UdklWxtZEgvE4V9AqphvA6VfgwtSL83arHfl1E5k3vmqX58+ejR48e2Lt3L8LDwwE8b+66detWsZFmhlS1alXY2toiLS1NZXtaWhpkMpnaY2Qymdbyiv+mpaXB19dXpUxISIjGWBwdHeHo6FiayyAiKsbcp38gquj0rllq3749/vrrL/Tp0wePHj3Co0eP0LdvXyQlJeHll182RowAAAcHBzRv3hz79u1TbpPL5di3b58yaSsqPDxcpTwAxMbGKsvXqlULMplMpUxWVhaOHz+u8ZxERMbAfl1E5kvvmiUA8PPzM+qoN00mTpyIN954Ay1atEBoaCgWL16MnJwcjBgxAgAwbNgwVKtWDXPmzAEATJgwAe3bt8fChQvRo0cPbNq0CadOncKKFSsAABKJBO+++y4+/fRT1KtXD7Vq1cLHH38MPz8/9O7d2+TXR0QVh7qZus29XxdnF6eKqlTJUnkZNGgQ7t27h08++QSpqakICQnBrl27lB20b968CRubfyrLWrdujQ0bNmD69On48MMPUa9ePWzbtg2NGjVSlvnggw+Qk5OD0aNH49GjR2jbti127doFJyd2pCQi4yhppm5znB6As4tTRab3PEtUHOdZIiJdKeacKvrFa85zTllizES6MNo8S0REVDqWOFO3JcZMZGhMloiITOREckaxtd9eJACkZObiRLLx5qzTlyXGTGRopUqWnj17hr1792L58uXIzs4GANy9exePHz82aHBERNbEEmfqtsSYiQxN7w7ef//9N6KionDz5k3k5eWhS5cucHNzw7x585CXl4dly5YZI04iIotniTN1W2LMRIamd83ShAkT0KJFCzx8+BDOzs7K7X369Ck2pxEREf3DEmfqtsSYiQxN72Tpjz/+wPTp0+Hg4KCyPSAgAHfu3DFYYERE1kYxUzeAYsmHuc7UbYkxExma3smSXC5HYWFhse23b9+Gm5ubQYIiIrJWljhTtyXGTGRIes+zNGjQIEilUqxYsQJubm74888/4eXlhV69esHf3x+rV682Vqxmi/MsEZG+LHE2bEuMmUgbXZ/feidLt2/fRmRkJIQQuHLlClq0aIErV66gatWqOHToELy9vcscvKVhskSWhA88Mmf8fJIpGS1ZAp5PHbB582acP38ejx8/RrNmzTBkyBCVDt8VCZMlshRcsoLMGT+fZGpGTZZIFZMlsgRcsoLMGT+fVB6MttzJnDlzsGrVqmLbV61ahXnz5ul7OiIyAS5ZQeaMn08yd3onS8uXL0dgYGCx7Q0bNuSElERmiktWkDnj55PMnd4zeKempsLXt3hVqJeXF1JSUgwSFBEZFpesIG3Ku1M1P59k7vROlmrUqIEjR46gVq1aKtuPHDkCPz8/gwVGRIbDJStIE3PoVM3PJ5k7vZOlUaNG4d1330VBQQE6deoEANi3bx8++OADTJo0yeABElHZKZasSM3MVdsvRILnEwxyyYqKRVOn6tTMXIxZd8Zknar5+SRzp3ey9P777+PBgwd4++23kZ+fDwBwcnLClClTMG3aNIMHSERlp1iyYsy6M5AAKg8kLllhGuXd1KUuHm2dqiV43qm6S7DM6HHy80nmrtRTBzx+/BiJiYlwdnZGvXr14OjoaOjYLAanDiBLYQ5NLhWROd73+GsPEL3yWInlNo4KQ3idKiaIyDzvE1k3XZ/fetcsKbi6uqJly5alPZyIykFUI190CZaZVQ2HtTOXpq6izLFTNT+fZK70TpZycnIwd+5c7Nu3D+np6ZDL5Sr7r1+/brDgiMjwbG0kJqspqOh0mT/ow60X0CnQBw52es/kUibm2qman08yR3onS//6179w8OBBvP766/D19YVEwoyfiEidkuYPAoCMnAKEzdmHz/o0MmkNEztVE+lO72Tp999/x2+//YY2bdoYIx4islDm1oHZHOjahJWRk2/yJjl2qibSnd7JUuXKleHpyX9pENE/2DFXPX2bsEw1+kwhqpEvlg5tVux3J+PvjkiF3qPh1q1bh+3bt+P7779HpUqVjBWXReFoOKrIuACqZoVygbbz9mts6lLHlKPPFFgrSBWV0UbDLVy4ENeuXYOPjw8CAgJgb2+vsv/MmTP6R0tEFsmc5uoxRy82demqPJb0YKdqIu30TpZ69+5thDCIyBLpswBqRX0YK5q6Ptx6ARk5BSWW55IeROZH72RpxowZxoiDiCyQOc7VY46iGvmiU6APwubsQ0ZOvsZyvhx9RmSWTDuxBxFZFXOdq8ccOdjZYEDzalrL9GzqaxXNlYVygfhrD7D93B3EX3uAQnmpFoqwSrw3lknvmqXCwkJ88cUX+N///oebN28q14dTyMjIMFhwRGTeOFeP7grlAr+cT9Fa5pfzKfggKsiiEyaOjNSM98Zy6V2zFBMTg0WLFmHQoEHIzMzExIkT0bdvX9jY2GDmzJlGCJGIzJWiAzPwz+g3Bc7Vo0qXCSoV/bsslWJkZNHrVCztsuui9mTRmvHeWDa9k6X169dj5cqVmDRpEuzs7BAdHY3//ve/+OSTT3DsWMmLMhKRdVF0YJZJVZvaZFKnCj1tQFHW3r9Ll6VdYnYkVMhmJ94by6d3M1xqaioaN24M4PliupmZmQCAV155BR9//LFhoyMii8AFUEtm7f27ODJSM94b/Znb3F96J0vVq1dHSkoK/P39UadOHezZswfNmjXDyZMn4ejoaIwYicgCcK4e7ay9f5e115yVBe+Nfsyxb5fezXB9+vTBvn37AADvvPMOPv74Y9SrVw/Dhg3Dm2++afAAiYisgbX377L2mrOy4L3Rnbn27dK7Zmnu3LnK/x80aBD8/f0RHx+PevXq4dVXXzVocERE1sSa12Kz9pqzsuC90Y05rwigd7JUVHh4OMLDww0RCxGR1bPW/l0vLu0iAVQeeNZQc1YWvDe6Mee+XTolS7/88gu6desGe3t7/PLLL1rL9uzZ0yCBERFZK2vt32XNNWdlxXtTMnPu2yURQpQ4VtHGxgapqanw9vaGjY3mbk4SiQSFhYUGDdAS6LpqMRFRRWBuI5nMCe+NZvHXHiB6ZclTEG0cFWawf2zo+vzWqWZJLper/X8iIqKirLXmzBB4bzQz575deo2GKygoQOfOnXHlyhVjxUNEREQVkDmPGNUrWbK3t8eff/5prFiIiIioAjPXFQF06rP0ovfeew+Ojo4qUwhUdOyzRGRe2C+EyLKZ6m/YoH2WXvTs2TOsWrUKe/fuRfPmzeHi4qKyf9GiRfpHS0SkJ01fpuY4+y8R6cfc+nbpnSxdvHgRzZo1AwD89ddfKvskEv7LjYiMT1NC1LOpL1YcSi7WOVQx+y8X9iWi0tC7GY6KYzMckekolkPQ94tLMZLm8JRObJIjIgC6P7/1XhuOiKi8aFsOoSQvzv5LRKSPUi13curUKfzvf//DzZs3kZ+fr7Lv559/NkhgRERFlbQcgi64sjsR6UvvmqVNmzahdevWSExMxNatW1FQUIBLly5h//79kEqlxoiRiAiAYRIdruxORPrSO1n67LPP8MUXX2DHjh1wcHDAkiVLcPnyZQwcOBD+/v7GiJGICEDZEh0JnncCr+gruxOR/vROlq5du4YePXoAABwcHJCTkwOJRIL33nsPK1asMHiAREQKiuUQSuqebW6z/xKRZdM7WapcuTKys7MBANWqVcPFixcBAI8ePcKTJ08MGx0R0QtKWg5BAuDf7WqZ3ey/RGTZ9O7g3a5dO8TGxqJx48YYMGAAJkyYgP379yM2NhadO3c2RoxEREqK5RCKzrMke2HiyQ+igjiDNxEZjN7zLGVkZCA3Nxd+fn6Qy+WYP38+jh49inr16mH69OmoXLmysWI1W5xnicj0uKQJEZWVrs9vTkppAEyWiIjKB5NmKgujrQ0XERGBoUOHom/fvkwMiIio3HAdQDIVvTt4N2zYENOmTYNMJsOAAQOwfft2FBQUGCM2IiIitRTL3hSdpFSxDuCuiynlFBlZI72TpSVLluDOnTvYtm0bXFxcMGzYMPj4+GD06NE4ePCgMWIkIiJS0rbsjWJbzI4EFMrZy4QMo1Rrw9nY2KBr165Ys2YN0tLSsHz5cpw4cQKdOnUydHxEREQqSlr2husAkqGVam04hdTUVGzatAnr1q3Dn3/+idDQUEPFRUREpJauy95wHUAyFL2TpaysLPz000/YsGEDDhw4gNq1a2PIkCHYvHkz6tSpY4wYycxw9Anpi58Z62fK37Guy95wHUAyFL2TJR8fH1SuXBmDBg3CnDlz0KJFC2PERaVgii8rjj4hffEzY/1M/TtWLHuTmpmrtt+SBM8nKeU6gGQoes+zpJip28amVN2drJI5zLNkii8rxeiToh8YRTrG5SSoqLJ8ZlgbZRnK63tB8b4AVN6b30ekD12f33pnPF26dCmXRCkjIwNDhgyBu7s7PDw8MHLkSDx+/FjrMbm5uRg7diyqVKkCV1dX9OvXD2lpacr958+fR3R0NGrUqAFnZ2cEBQVhyZIlxr4UgzPFEFqOPiF9leUzs+tiCtrO24/olccwYdM5RK88hrbz9nM4uJkpz+8FxbI3XAeQTKFMHbxNaciQIUhJSUFsbCwKCgowYsQIjB49Ghs2bNB4zHvvvYfffvsNW7ZsgVQqxbhx49C3b18cOXIEAHD69Gl4e3tj3bp1qFGjBo4ePYrRo0fD1tYW48aNM9WllUlJX1YSPP+y6hIsK9O/yvUZfRJep0qp34esR2k/M5pqKhTJPx+E5qO8vxeiGvmiS7CMNZBkdBaRLCUmJmLXrl04efKkso/UV199he7du+Pzzz+Hn59fsWMyMzPx3XffYcOGDcopDVavXo2goCAcO3YMYWFhePPNN1WOqV27NuLj4/Hzzz9bTLJkqi8rjj4hfZXmM2Oq5J8Mwxy+F2xtJPwHGhmdRXQ8io+Ph4eHh0pn8oiICNjY2OD48eNqjzl9+jQKCgoQERGh3BYYGAh/f3/Ex8drfK/MzEx4elpOp0BTfVlx9AnpqzSfGc6fY1n4vUAVhUXULKWmpsLb21tlm52dHTw9PZGamqrxGAcHB3h4eKhs9/Hx0XjM0aNHsXnzZvz2229a48nLy0NeXp7ydVZWlg5XYRym+rLi6BPSV2k+M+ZQU0G64/cCVRSlqlnKycnBzp07sWzZMnz55ZcqP/qYOnUqJBKJ1p/Lly+XJkS9Xbx4Eb169cKMGTPQtWtXrWXnzJkDqVSq/KlRo4ZJYlRH8WWlqUFCguej4sr6ZWVrI8GMV4OV5yz6HgAw49VgNo2QUmk+M6ypsCz8XqCKQu+apbNnz6J79+548uQJcnJy4Onpifv376NSpUrw9vbG+PHjdT7XpEmTMHz4cK1lateuDZlMhvT0dJXtz549Q0ZGBmQymdrjZDIZ8vPz8ejRI5XapbS0tGLHJCQkoHPnzhg9ejSmT59eYtzTpk3DxIkTla+zsrLKLWFSfFmNWXcGEqgfQmuoLyvF6JOiUxTIOGcOaaDvZ4Y1FZaH3wtUEeg9z1KHDh1Qv359LFu2DFKpFOfPn4e9vT2GDh2KCRMmoG/fvgYPMjExEcHBwTh16hSaN28OANizZw+ioqJw+/ZtjR28vby8sHHjRvTr1w8AkJSUhMDAQMTHxyMsLAwAcOnSJXTq1AlvvPEG5s+fX6r4Kso8Swqc/4b0pc9nhvPnWCZ+L5Al0vX5rXey5OHhgePHj6NBgwbw8PBAfHw8goKCcPz4cbzxxhtGazbr1q0b0tLSsGzZMuXUAS1atFBOHXDnzh107twZP/zwg3KNujFjxmDnzp1Ys2YN3N3d8c477wB43jcJeN701qlTJ0RGRmLBggXK97K1tYWXl5fOsZlDsgTwy4qsB2f9JiJT0PX5rXcznL29vXJSSm9vb9y8eRNBQUGQSqW4detW6SMuwfr16zFu3Djl7OH9+vVT6SNVUFCApKQkPHnyRLntiy++UJbNy8tDZGQkvv32W+X+H3/8Effu3cO6deuwbt065faaNWvixo0bRrsWY+EQWrIWnD+HDIH/gCRD0btmqWvXrhg+fDhee+01jBo1Cn/++SfGjx+PtWvX4uHDhxqH8lszc6lZIiLd8CFq/Vg7SbowWjPcqVOnkJ2djY4dOyI9PR3Dhg3D0aNHUa9ePaxatQpNmzYtc/CWhskSkeXgQ9T6cR1L0pXRkiUqjskSkWXgQ9T6FcoF2s7br3FyU8WIysNTOrE2kYy3kC4RkSXiYtAVA2eBJ2NgskREFQIfohUDZ4EnY2CyREQVAh+iFQNngSdjYLJERBUCH6IVg6mWgKKKhckSEVUIfIhWDFyvjoxB70kpAWDfvn3Yt28f0tPTIZfLVfatWrXKIIERERmSKddRpPLF9erI0PROlmJiYjBr1iy0aNECvr6+kEj4xUJEloEP0YqDs8CTIek9z5Kvry/mz5+P119/3VgxWRzOs0RkWTiDNxEBRlwbLj8/H61bty5TcERE5YnrKBKRPvTu4P2vf/0LGzZsMEYsREREOimUC8Rfe4Dt5+4g/toDTiZKRqVTzdLEiROV/y+Xy7FixQrs3bsXTZo0gb29vUrZRYsWGTZCIiKiF3B9PzI1nZKls2fPqrwOCQkBAFy8eNHgAREREWmiaX2/1MxcjFl3huv7kVHolCzFxcUZOw4iIiKtdFnf78OtF9Ap0AcOdpxGkAxH70/Tm2++iezs7GLbc3Jy8OabbxokKCIioqJKWt8PADJyChA2Zx92XUwxUVRUEeidLH3//fd4+vRpse1Pnz7FDz/8YJCgiIiIitJ13b6MnHyMWXeGCRMZjM5TB2RlZUEIASEEsrOz4eT0z/pJhYWF2LlzJ7y9vY0SJBERkb7r9sXsSECXYBnn0KIy0zlZ8vDwgEQigUQiQf369Yvtl0gkiImJMWhwRERECor1/VIzc9X2W3qRAJCSmYsTyRmcU4vKTOdkKS4uDkIIdOrUCT/99BM8Pf9ZbNLBwQE1a9aEn5+fUYIkIiJ6cX0/XenadEekjc7JUvv27QEAycnJ8Pf355pwRERkcor1/T7cegEZOQUllte36Y5IHZ2SpT///BONGjWCjY0NMjMzceHCBY1lmzRpYrDgiIiIiopq5ItOgT4Im7MPGTn5astI8HyB5NBanmr3E+lDp2QpJCQEqamp8Pb2RkhICCQSCdStvyuRSFBYWGjwIImIiF7kYGeDz/o0UjbJvfhEUrR7zHg1mJ27ySB0SpaSk5Ph5eWl/H8iIqLypmiSK7r0iYxLn5CBSYS6KiLSS1ZWFqRSKTIzM+Hu7l7e4RARVSiFcoETyRlIz86Ft9vzpjfWKJEudH1+69zBW8Hf3x8dOnRA+/bt0aFDB9SpU6dMgRIREZWFrY2E0wOQUek9g/dnn30GJycnzJs3D/Xq1UONGjUwdOhQrFy5EleuXDFGjEREZAEK5QLx1x5g+7k7iL/2AIVyNlyQdShTM1xKSgoOHjyIX3/9FZs3b4ZcLq+QHbzZDEdEFd2uiynF+g75su8QmTmjNcMBwJMnT3D48GEcOHAAcXFxOHv2LBo1aoQOHTqUNl4iIrJQuy6mYMy6M8Vm1U7NzMWYdWewdGgzJkxk0fROllq3bo2zZ88iKCgIHTp0wNSpU9GuXTtUrlzZGPEREZEZK5QLxOxIULv8iMDzYfxco40snd59li5fvgwXFxcEBgYiMDAQQUFBTJSIiCqoE8kZKk1vRb24RhuRpdI7WXrw4AH279+PsLAw7N69G23atEG1atXw2muvYeXKlcaIkYiIzJSua69xjTayZHonSxKJBE2aNMH48ePx448/4vfff0eXLl2wZcsWvPXWW8aIkYiIzJSua69xjTayZHr3WTpz5gwOHDiAAwcO4PDhw8jOzkbjxo3xzjvvKBfbJSKiiiG0lid8pU5IzcxV22+Ja7SRNdA7WQoNDcVLL72E9u3bY9SoUWjXrh2kUqkxYiMiIjNnayPBjFeDMWbdGUjANdrIOumdLGVkZHAuISIiUuIabWTt9E6WmCgREVFRUY180SVYxjXayCqValJKIiKiorhGG1krvUfDEREREVUkTJaIiIiItGCyRERERKSFXslSSkoK1q1bh507dyI/P19lX05ODmbNmmXQ4IiIiIjKm0QIoW4esWJOnjyJrl27Qi6Xo6CgANWqVcO2bdvQsGFDAEBaWhr8/PxQWFho1IDNUVZWFqRSKTIzMzlakIiIyELo+vzWuWbpww8/RJ8+ffDw4UOkpaWhS5cuaN++Pc6ePWuQgImIiIjMkc5TB5w+fRrffPMNbGxs4Obmhm+//Rb+/v7o3Lkzdu/eDX9/f2PGSURERFQu9JpnKTdXddXoqVOnws7ODl27dsWqVasMGhgRERGROdA5WWrUqBGOHj2KJk2aqGyfPHky5HI5oqOjDR4cERERUXnTuc/SsGHDcPjwYbX7PvjgA8TExLApjoiIiKyOzqPhSDOOhiMiIrI8Bh8Nl5ubi19++QXZ2dlq3+yXX35BXl5e6aIlIiIiMlM6J0vLly/HkiVL4ObmVmyfu7s7vvzyS6xcudKgwRERERGVN52TpfXr1+Pdd9/VuP/dd9/FDz/8YIiYiIiIiMyGzsnSlStX0LRpU437mzRpgitXrhgkKCIiIiJzoXOy9OzZM9y7d0/j/nv37uHZs2cGCYqIiIjIXOicLDVs2BB79+7VuH/Pnj3KdeKIiIiIrIXOydKbb76J//znP/j111+L7duxYwdmz56NN99806DBEREREZU3nWfwHj16NA4dOoSePXsiMDAQDRo0AABcvnwZf/31FwYOHIjRo0cbLVAiIiKi8qBzzRIArFu3Dps2bUK9evXw119/ISkpCQ0aNMDGjRuxceNGY8VIREREVG44g7cBcAZvIiIiy2PwGbzlcjnmzZuHNm3aoGXLlpg6dSqePn1qkGCJiIiIzJXOydLs2bPx4YcfwtXVFdWqVcOSJUswduxYY8ZGREREVO50TpZ++OEHfPvtt9i9eze2bduGHTt2YP369ZDL5caMj4iIiKhc6Zws3bx5E927d1e+joiIgEQiwd27d40SGBEREZE50GsGbycnJ5Vt9vb2KCgoMHhQ6mRkZGDIkCFwd3eHh4cHRo4cicePH2s9Jjc3F2PHjkWVKlXg6uqKfv36IS0tTW3ZBw8eoHr16pBIJHj06JERroCIiIgskc7zLAkhMHz4cDg6Oiq35ebm4q233oKLi4ty288//2zYCP/fkCFDkJKSgtjYWBQUFGDEiBEYPXo0NmzYoPGY9957D7/99hu2bNkCqVSKcePGoW/fvjhy5EixsiNHjkSTJk1w584do8RPRERElknnqQNGjBih0wlXr15dpoDUSUxMRHBwME6ePIkWLVoAAHbt2oXu3bvj9u3b8PPzK3ZMZmYmvLy8sGHDBvTv3x/A8wk0g4KCEB8fj7CwMGXZpUuXYvPmzfjkk0/QuXNnPHz4EB4eHjrHx6kDiIiILI+uz2+da5aMkQTpKj4+Hh4eHspECXjeZ8rGxgbHjx9Hnz59ih1z+vRpFBQUICIiQrktMDAQ/v7+KslSQkICZs2ahePHj+P69es6xZOXl4e8vDzl66ysrNJeGhEREZk5vWbwLi+pqanw9vZW2WZnZwdPT0+kpqZqPMbBwaFYDZGPj4/ymLy8PERHR2PBggXw9/fXOZ45c+ZAKpUqf2rUqKHfBRERGUChXCD+2gNsP3cH8dceoFDOOYaJjEHnmiVjmDp1KubNm6e1TGJiotHef9q0aQgKCsLQoUP1Pm7ixInK11lZWUyYiMikdl1MQcyOBKRk5iq3+UqdMOPVYEQ18i3HyIisT7kmS5MmTcLw4cO1lqlduzZkMhnS09NVtj979gwZGRmQyWRqj5PJZMjPz8ejR49UapfS0tKUx+zfvx8XLlzAjz/+COB5J3YAqFq1Kj766CPExMSoPbejo6NKR3ciIlPadTEFY9adQdF6pNTMXIxZdwZLhzZjwkRkQOWaLHl5ecHLy6vEcuHh4Xj06BFOnz6N5s2bA3ie6MjlcrRq1UrtMc2bN4e9vT327duHfv36AQCSkpJw8+ZNhIeHAwB++uknlSVbTp48iTfffBN//PEH6tSpU9bLIyIyuEK5QMyOhGKJEgAIABIAMTsS0CVYBlsbiYmjI7JO5Zos6SooKAhRUVEYNWoUli1bhoKCAowbNw6DBw9WjoS7c+cOOnfujB9++AGhoaGQSqUYOXIkJk6cCE9PT7i7u+Odd95BeHi4snN30YTo/v37yvfTZzQcEZGpnEjOUGl6K0oASMnMxYnkDITXqWK6wIismEUkSwCwfv16jBs3Dp07d4aNjQ369euHL7/8Urm/oKAASUlJePLkiXLbF198oSybl5eHyMhIfPvtt+URPhGRQaRna06USlOOiEqm8zxLpBnnWSIiU4m/9gDRK4+VWG7jqDDWLBGVQNfnt0VMHUBERM+F1vKEr9QJmnojSfB8VFxoLU9ThkVk1ZgsERFZEFsbCWa8GgwAxRImxesZrwazczeRATFZIiKyMFGNfLF0aDPIpKqLm8ukTpw2gMgILKaDNxER/SOqkS+6BMtwIjkD6dm58HZ73vTGGiUiw2OyRERkoWxtJOzETWQCbIYjIiIi0oI1S0RERlIoF2wmI7ICTJaIiIyAC90SWQ82wxERGZhioduiy5IoFrrddTGlnCIjotJgskT0gkK5QPy1B9h+7g7irz1AoZwT3JN+SlroFni+0C0/W0SWg81wRP+PzSZkCFzolsj6sGaJCGw2IcPhQrdE1ofJElV4bDYhQ/J2cyq5kB7liKj8MVmiCk+fZhOiknChWyLrw2SJKjw2m5AhcaFbIuvDZIkqPDabkKFxoVsi68LRcFThKZpNUjNz1fZbkuD5Q47NJqQPLnRLZD2YLFGFp2g2GbPuDCSASsLEZhMqCy50S2Qd2AxHBDabEBGRZqxZIvp/bDYhIiJ1mCwRvYDNJkREVBSb4YiIiIi0YLJEREREpAWTJSIiIiItmCwRERERacFkiYiIiEgLJktEREREWjBZIiIiItKCyRIRERGRFkyWiIiIiLRgskRERESkBZMlIiIiIi2YLBERERFpwWSJiIiISAsmS0RERERaMFkiIiIi0oLJEhEREZEWTJaIiIiItGCyRERERKQFkyUiIiIiLZgsEREREWnBZImIiIhICyZLRERERFowWSIiIiLSgskSERERkRZMloiIiIi0YLJEREREpAWTJSIiIiItmCwRERERacFkiYiIiEgLJktEREREWjBZIiIiItKCyRIRERGRFkyWiIiIiLRgskRERESkBZMlIiIiIi2YLBERERFpwWSJiIiISAsmS0RERERa2JV3AKReoVzgRHIG0rNz4e3mhNBanrC1kZR3WERERBUOkyUztOtiCmJ2JCAlM1e5zVfqhBmvBiOqkW85RkZERFTxsBnOzOy6mIIx686oJEoAkJqZizHrzmDXxZRyioyIiKhisphkKSMjA0OGDIG7uzs8PDwwcuRIPH78WOsxubm5GDt2LKpUqQJXV1f069cPaWlpxcqtWbMGTZo0gZOTE7y9vTF27FhjXYZWhXKBmB0JEGr2KbbF7EhAoVxdCSIiIjIGi0mWhgwZgkuXLiE2Nha//vorDh06hNGjR2s95r333sOOHTuwZcsWHDx4EHfv3kXfvn1VyixatAgfffQRpk6dikuXLmHv3r2IjIw05qVodCI5o1iN0osEgJTMXJxIzjBdUERERBWcRAhh9tUUiYmJCA4OxsmTJ9GiRQsAwK5du9C9e3fcvn0bfn5+xY7JzMyEl5cXNmzYgP79+wMALl++jKCgIMTHxyMsLAwPHz5EtWrVsGPHDnTu3LnU8WVlZUEqlSIzMxPu7u6lPs/2c3cwYdO5EsstGRyCXiHVSv0+REREpPvz2yJqluLj4+Hh4aFMlAAgIiICNjY2OH78uNpjTp8+jYKCAkRERCi3BQYGwt/fH/Hx8QCA2NhYyOVy3LlzB0FBQahevToGDhyIW7duGfeCNPB2czJoOSIiIio7i0iWUlNT4e3trbLNzs4Onp6eSE1N1XiMg4MDPDw8VLb7+Pgoj7l+/Trkcjk+++wzLF68GD/++CMyMjLQpUsX5Ofna4wnLy8PWVlZKj+GEFrLE75SJ2iaIECC56PiQmt5GuT9iIiIqGTlmixNnToVEolE68/ly5eN9v5yuRwFBQX48ssvERkZibCwMGzcuBFXrlxBXFycxuPmzJkDqVSq/KlRo4ZB4rG1kWDGq8EAUCxhUrye8Wow51siIiIyoXKdZ2nSpEkYPny41jK1a9eGTCZDenq6yvZnz54hIyMDMplM7XEymQz5+fl49OiRSu1SWlqa8hhf3+dzFgUHByv3e3l5oWrVqrh586bGmKZNm4aJEycqX2dlZRksYYpq5IulQ5sVm2dJxnmWiIiIykW5JkteXl7w8vIqsVx4eDgePXqE06dPo3nz5gCA/fv3Qy6Xo1WrVmqPad68Oezt7bFv3z7069cPAJCUlISbN28iPDwcANCmTRvl9urVqwN4PkXB/fv3UbNmTY3xODo6wtHRUfcL1VNUI190CZZxBm8iIiIzYBGj4QCgW7duSEtLw7Jly1BQUIARI0agRYsW2LBhAwDgzp076Ny5M3744QeEhoYCAMaMGYOdO3dizZo1cHd3xzvvvAMAOHr0qPK8vXv3xtWrV7FixQq4u7tj2rRpuH79Os6dOwd7e3udYjPUaDgiIiIyHasaDQcA69evR2BgIDp37ozu3bujbdu2WLFihXJ/QUEBkpKS8OTJE+W2L774Aq+88gr69euHdu3aQSaT4eeff1Y57w8//IBWrVqhR48eaN++Pezt7bFr1y6dEyUiIiKybhZTs2TOWLNERERkeayuZomIiIioPDBZIiIiItKCyRIRERGRFkyWiIiIiLRgskRERESkBZMlIiIiIi2YLBERERFpUa7LnVgLxVRVWVlZ5RwJERER6Urx3C5pykkmSwaQnZ0NAAZbTJeIiIhMJzs7G1KpVON+zuBtAHK5HHfv3oWbmxskEtMudpuVlYUaNWrg1q1bnD1cA94j7Xh/SsZ7pB3vT8l4j7Qrr/sjhEB2djb8/PxgY6O5ZxJrlgzAxsYG1atXL9cY3N3d+QdYAt4j7Xh/SsZ7pB3vT8l4j7Qrj/ujrUZJgR28iYiIiLRgskRERESkBZMlC+fo6IgZM2bA0dGxvEMxW7xH2vH+lIz3SDven5LxHmln7veHHbyJiIiItGDNEhEREZEWTJaIiIiItGCyRERERKQFkyUiIiIiLZgsmbmMjAwMGTIE7u7u8PDwwMiRI/H48WOtx+Tm5mLs2LGoUqUKXF1d0a9fP6SlpRUrt2bNGjRp0gROTk7w9vbG2LFjjXUZRmXMewQADx48QPXq1SGRSPDo0SMjXIHxGeMenT9/HtHR0ahRowacnZ0RFBSEJUuWGPtSDOKbb75BQEAAnJyc0KpVK5w4cUJr+S1btiAwMBBOTk5o3Lgxdu7cqbJfCIFPPvkEvr6+cHZ2RkREBK5cuWLMSzA6Q96jgoICTJkyBY0bN4aLiwv8/PwwbNgw3L1719iXYTSG/gy96K233oJEIsHixYsNHLVpGeMeJSYmomfPnpBKpXBxcUHLli1x8+ZNY13CPwSZtaioKNG0aVNx7Ngx8ccff4i6deuK6Ohorce89dZbokaNGmLfvn3i1KlTIiwsTLRu3VqlzMKFC4Wfn59Yv369uHr1qjh//rzYvn27MS/FaIx1jxR69eolunXrJgCIhw8fGuEKjM8Y9+i7774T48ePFwcOHBDXrl0Ta9euFc7OzuKrr74y9uWUyaZNm4SDg4NYtWqVuHTpkhg1apTw8PAQaWlpassfOXJE2Nraivnz54uEhAQxffp0YW9vLy5cuKAsM3fuXCGVSsW2bdvE+fPnRc+ePUWtWrXE06dPTXVZBmXoe/To0SMREREhNm/eLC5fvizi4+NFaGioaN68uSkvy2CM8RlS+Pnnn0XTpk2Fn5+f+OKLL4x8JcZjjHt09epV4enpKd5//31x5swZcfXqVbF9+3aN5zQkJktmLCEhQQAQJ0+eVG77/fffhUQiEXfu3FF7zKNHj4S9vb3YsmWLcltiYqIAIOLj44UQQmRkZAhnZ2exd+9e416ACRjrHil8++23on379mLfvn0WmywZ+x696O233xYdO3Y0XPBGEBoaKsaOHat8XVhYKPz8/MScOXPUlh84cKDo0aOHyrZWrVqJf//730IIIeRyuZDJZGLBggXK/Y8ePRKOjo5i48aNRrgC4zP0PVLnxIkTAoD4+++/DRO0CRnr/ty+fVtUq1ZNXLx4UdSsWdOikyVj3KNBgwaJoUOHGifgErAZzozFx8fDw8MDLVq0UG6LiIiAjY0Njh8/rvaY06dPo6CgABEREcptgYGB8Pf3R3x8PAAgNjYWcrkcd+7cQVBQEKpXr46BAwfi1q1bxr0gIzDWPQKAhIQEzJo1Cz/88IPWBRbNnTHvUVGZmZnw9PQ0XPAGlp+fj9OnT6tcl42NDSIiIjReV3x8vEp5AIiMjFSWT05ORmpqqkoZqVSKVq1aab1X5soY90idzMxMSCQSeHh4GCRuUzHW/ZHL5Xj99dfx/vvvo2HDhsYJ3kSMcY/kcjl+++031K9fH5GRkfD29karVq2wbds2o13Hiyz3CVABpKamwtvbW2WbnZ0dPD09kZqaqvEYBweHYl9APj4+ymOuX78OuVyOzz77DIsXL8aPP/6IjIwMdOnSBfn5+Ua5FmMx1j3Ky8tDdHQ0FixYAH9/f6PEbirGukdFHT16FJs3b8bo0aMNErcx3L9/H4WFhfDx8VHZru26UlNTtZZX/Fefc5ozY9yjonJzczFlyhRER0db3KKyxro/8+bNg52dHcaPH2/4oE3MGPcoPT0djx8/xty5cxEVFYU9e/agT58+6Nu3Lw4ePGicC3kBk6VyMHXqVEgkEq0/ly9fNtr7y+VyFBQU4Msvv0RkZCTCwsKwceNGXLlyBXFxcUZ7X32U9z2aNm0agoKCMHToUKO9R1mV9z160cWLF9GrVy/MmDEDXbt2Ncl7kmUqKCjAwIEDIYTA0qVLyzscs3D69GksWbIEa9asgUQiKe9wzJJcLgcA9OrVC++99x5CQkIwdepUvPLKK1i2bJnR39/O6O9AxUyaNAnDhw/XWqZ27dqQyWRIT09X2f7s2TNkZGRAJpOpPU4mkyE/Px+PHj1SqRVIS0tTHuPr6wsACA4OVu738vJC1apVTTOqQAflfY/279+PCxcu4McffwTwfLQTAFStWhUfffQRYmJiSnllhlPe90ghISEBnTt3xujRozF9+vRSXYupVK1aFba2tsVGPqq7LgWZTKa1vOK/aWlpyr8txeuQkBADRm8axrhHCopE6e+//8b+/fstrlYJMM79+eOPP5Cenq5Si11YWIhJkyZh8eLFuHHjhmEvwsiMcY+qVq0KOzs7lecWAAQFBeHw4cMGjF6DcukpRTpRdMw9deqUctvu3bt16pj7448/KrddvnxZpWNuUlKSAKDSwfvBgwfCxsZG7N6920hXYxzGukdXr14VFy5cUP6sWrVKABBHjx41ycgLQzLWPRJCiIsXLwpvb2/x/vvvG+8CDCw0NFSMGzdO+bqwsFBUq1ZNa8fTV155RWVbeHh4sQ7en3/+uXJ/ZmamxXfwNuQ9EkKI/Px80bt3b9GwYUORnp5unMBNxND35/79+yrfNxcuXBB+fn5iypQp4vLly8a7ECMyxmcoPDy8WAfv3r17lziy1xCYLJm5qKgo8dJLL4njx4+Lw4cPi3r16ql8MG7fvi0aNGggjh8/rtz21ltvCX9/f7F//35x6tQpER4eLsLDw1XO26tXL9GwYUNx5MgRceHCBfHKK6+I4OBgkZ+fb7JrMxRj3aMXxcXFWexoOCGMc48uXLggvLy8xNChQ0VKSoryx9wfhJs2bRKOjo5izZo1IiEhQYwePVp4eHiI1NRUIYQQr7/+upg6daqy/JEjR4SdnZ34/PPPRWJiopgxY4baqQM8PDzE9u3bxZ9//il69epl8VMHGPIe5efni549e4rq1auLc+fOqXxe8vLyyuUay8IYn6GiLH00nDHu0c8//yzs7e3FihUrxJUrV8RXX30lbG1txR9//GH062GyZOYePHggoqOjhaurq3B3dxcjRowQ2dnZyv3JyckCgIiLi1Nue/r0qXj77bdF5cqVRaVKlUSfPn1ESkqKynkzMzPFm2++KTw8PISnp6fo06ePuHnzpqkuy6CMdY9eZOnJkjHu0YwZMwSAYj81a9Y04ZWVzldffSX8/f2Fg4ODCA0NFceOHVPua9++vXjjjTdUyv/vf/8T9evXFw4ODqJhw4bit99+U9kvl8vFxx9/LHx8fISjo6Po3LmzSEpKMsWlGI0h75Hi86Xu58XPnCUx9GeoKEtPloQwzj367rvvRN26dYWTk5No2rSp2LZtm7EvQwghhESI/++MQURERETFcDQcERERkRZMloiIiIi0YLJEREREpAWTJSIiIiItmCwRERERacFkiYiIiEgLJktEREREWjBZIiKDk0gk2LZtm9Yyw4cPR+/evU0Sj7EEBARg8eLF5R2GQcycORM+Pj46/e6IKhomS0QWYvjw4ZBIJJBIJHBwcEDdunUxa9YsPHv2TFlGCIEVK1agVatWcHV1hYeHB1q0aIHFixfjyZMnAIBLly6hX79+CAgIgEQiMcrDPiUlBd26dQMA3LhxAxKJBOfOnTP4+5S3kydPYvTo0eUdRpklJiYiJiYGy5cvV/ndFXXz5k306NEDlSpVgre3N95//32Vz586PXv2hL+/P5ycnODr64vXX38dd+/eVSnz559/4uWXX4aTkxNq1KiB+fPnG+zaiAyByRKRBYmKikJKSgquXLmCSZMmYebMmViwYIFy/+uvv453330XvXr1QlxcHM6dO4ePP/4Y27dvx549ewAAT548Qe3atTF37lyNK4CXlUwmg6Ojo1HObQ7y8/MBAF5eXqhUqVI5R1N2165dAwD06tVL4++usLAQPXr0QH5+Po4ePYrvv/8ea9aswSeffKL13B07dsT//vc/JCUl4aeffsK1a9fQv39/5f6srCx07doVNWvWxOnTp7FgwQLMnDkTK1asMOxFEpWFSRZVIaIye+ONN0SvXr1UtnXp0kWEhYUJIYTYvHmzAKB2rSS5XC4ePXpUbLsu60/J5XJRtWpVsWXLFuW2pk2bCplMpnz9xx9/CAcHB5GTkyOEEAKA2Lp1q/L/X/xp3769yvUsWLBAyGQy4enpKd5++22tizlfvXpV9OzZU3h7ewsXFxfRokULERsbq7F8UlKSACASExNVti9atEjUrl1bCCHEs2fPxJtvvikCAgKEk5OTqF+/vli8eLFKeUWsn376qfD19RUBAQFq79/ChQtFo0aNRKVKlUT16tXFmDFjVNbgW716tZBKpWLXrl0iMDBQuLi4iMjISHH37l2V9/vuu+9EcHCwcHBwEDKZTIwdO1a57+HDh2LkyJGiatWqws3NTXTs2FGcO3dO4z0QQog///xTdOzYUTg5OQlPT08xatQoZVzq1vhTZ+fOncLGxka5EKoQQixdulS4u7vrtRju9u3bhUQiUf6ev/32W1G5cmWVc0yZMkU0aNBA53MSGRtrlogsmLOzs7KWY/369WjQoAF69epVrJxEIoFUKi3Ve0gkErRr1w4HDhwAADx8+BCJiYl4+vQpLl++DAA4ePAgWrZsqbaW5cSJEwCAvXv3IiUlBT///LNyX1xcHK5du4a4uDhlTcWaNWs0xvL48WN0794d+/btw9mzZxEVFYVXX30VN2/eVFu+fv36aNGiBdavX6+yff369XjttdcAAHK5HNWrV8eWLVuQkJCATz75BB9++CH+97//qRyzb98+JCUlITY2Fr/++qva97OxscGXX36JS5cu4fvvv8f+/fvxwQcfqJR58uQJPv/8c6xduxaHDh3CzZs3MXnyZOX+pUuXYuzYsRg9ejQuXLiAX375BXXr1lXuHzBgANLT0/H777/j9OnTaNasGTp37oyMjAy1MeXk5CAyMhKVK1fGyZMnsWXLFuzduxfjxo0DAEyePBmrV68G8Lz5NCUlRe154uPj0bhxY/j4+Ci3RUZGIisrC5cuXVJ7TFEZGRlYv349WrduDXt7e+V527VrBwcHB5XzJiUl4eHDhzqdl8joyjtbIyLdvFizJJfLRWxsrHB0dBSTJ08WQggRFBQkevbsqdc5dV3Z/MsvvxQNGzYUQgixbds20apVK9GrVy+xdOlSIYQQERER4sMPP1SWxws1S4oV58+ePVvsemrWrCmePXum3DZgwAAxaNAgva6hYcOG4quvvtK4/4svvhB16tRRvtZU2/SisWPHin79+qnE6uPjU6wGpaT7t2XLFlGlShXl69WrVwsA4urVq8pt33zzjfDx8VG+9vPzEx999JHa8/3xxx/C3d1d5ObmqmyvU6eOWL58udpjVqxYISpXriweP36s3Pbbb7+p1BJt3bpVY42SwqhRo0TXrl1VtuXk5AgAYufOnVqP/eCDD0SlSpUEABEWFibu37+v3NelSxcxevRolfKXLl0SAERCQoLW8xKZCmuWiCzIr7/+CldXVzg5OaFbt24YNGgQZs6cCeB5525jad++PRISEnDv3j0cPHgQHTp0QIcOHXDgwAEUFBTg6NGj6NChg97nbdiwIWxtbZWvfX19kZ6errH848ePMXnyZAQFBcHDwwOurq5ITEzUWLMEAIMHD8aNGzdw7NgxAM9rlZo1a4bAwEBlmW+++QbNmzeHl5cXXF1dsWLFimLnbNy4sUrthzp79+5F586dUa1aNbi5ueH111/HgwcPlJ3rAaBSpUqoU6eO2mtOT0/H3bt30blzZ7XnP3/+PB4/fowqVarA1dVV+ZOcnKzsd1RUYmIimjZtChcXF+W2Nm3aQC6XIykpSev1GMr777+Ps2fPYs+ePbC1tcWwYcOM+nklMjS78g6AiHTXsWNHLF26FA4ODvDz84Od3T9/wvXr11c2ixla48aN4enpiYMHD+LgwYOYPXs2ZDIZ5s2bh5MnT6KgoACtW7fW+7yKphgFiUQCuVyusfzkyZMRGxuLzz//HHXr1oWzszP69++vbIpURyaToVOnTtiwYQPCwsKwYcMGjBkzRrl/06ZNmDx5MhYuXIjw8HC4ublhwYIFOH78uMp5Xkw21Llx4wZeeeUVjBkzBrNnz4anpycOHz6MkSNHIj8/X9lEqe6aFYmDs7Oz1vd4/PgxfH19lU2iL/Lw8NB6bFnJZDJlk6pCWlqacp82VatWRdWqVVG/fn0EBQWhRo0aOHbsGMLDwyGTyZTn0fe8RKbCmiUiC+Li4oK6devC399fJVECgNdeew1//fUXtm/fXuw4IQQyMzNL/b4SiQQvv/wytm/fjkuXLqFt27Zo0qQJ8vLysHz5crRo0UJjMqGojSksLCz1+yscOXIEw4cPR58+fdC4cWPIZDLcuHGjxOOGDBmCzZs3Iz4+HtevX8fgwYNVztm6dWu8/fbbeOmll1C3bl2NtTTanD59GnK5HAsXLkRYWBjq169fbIh8Sdzc3BAQEIB9+/ap3d+sWTOkpqbCzs4OdevWVfmpWrWq2mOCgoJw/vx55OTkKLcdOXIENjY2aNCggc6xhYeH48KFCyo1f7GxsXB3d0dwcLDO51Ekw3l5ecrzHjp0CAUFBSrnbdCgASpXrqzzeYmMickSkZUYOHAgBg0ahOjoaHz22Wc4deoU/v77b/z666+IiIhAXFwcgOfD3s+dO4dz584hPz8fd+7cwblz53D16lWt5+/QoQM2btyIkJAQuLq6wsbGBu3atcP69evRvn17jcd5e3vD2dkZu3btQlpaWpmStnr16uHnn3/GuXPncP78ebz22mtaa6IU+vbti+zsbIwZMwYdO3aEn5+fyjlPnTqF3bt346+//sLHH3+MkydP6h1b3bp1UVBQgK+++grXr1/H2rVrsWzZMr3PM3PmTCxcuBBffvklrly5gjNnzuCrr74CAERERCA8PBy9e/fGnj17cOPGDRw9ehQfffQRTp06pfZ8Q4YMgZOTE9544w1cvHgRcXFxeOedd/D666+rdNYuSdeuXREcHIzXX38d58+fx+7duzF9+nSMHTtWOdXAiRMnEBgYiDt37gAAjh8/jq+//hrnzp3D33//jf379yM6Ohp16tRBeHg4gOdJvoODA0aOHIlLly5h8+bNWLJkCSZOnKj3vSMymvLtMkVEulI3dUBRhYWFYunSpaJly5aiUqVKwt3dXTRv3lwsWbJEPHnyRAjxT4froj+KIf2anD17VgAQU6ZMUW774osvBACxa9culbJ4oYO3EEKsXLlS1KhRQ9jY2BSbOuBFEyZM0BpHcnKy6Nixo3B2dhY1atQQX3/9tWjfvr2YMGGC1tiFEGLgwIECgFi1apXK9tzcXDF8+HAhlUqFh4eHGDNmjJg6dapo2rSpsoyme1+0g/eiRYuEr6+vcHZ2FpGRkeKHH34QAMTDhw+FEP9MHfAidZ2rly1bJho0aCDs7e2Fr6+veOedd5T7srKyxDvvvCP8/PyEvb29qFGjhhgyZIi4efOmxmvXNnWAphjUuXHjhujWrZtwdnYWVatWFZMmTRIFBQXK/XFxcQKASE5OVnlfT09P4ejoKAICAsRbb70lbt++rXLe8+fPi7Zt2wpHR0dRrVo1MXfu3BJjITIliRDsZUdERESkCZvhiIiIiLRgskRERESkBZMlIiIiIi2YLBERERFpwWSJiIiISAsmS0RERERaMFkiIiIi0oLJEhEREZEWTJaIiIiItGCyRERERKQFkyUiIiIiLZgsEREREWnxf5/q19F5ObZSAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/modeling_utils.py:3437: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", + " warnings.warn(\n", + "Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t6_8M_UR50D and are newly initialized: ['esm.pooler.dense.bias', 'esm.pooler.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1899: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embeddings shape: (68, 320)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHHCAYAAACvJxw8AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAbs5JREFUeJzt3XdcU9f7B/BPAFmyRZZFcaHiwgXiQisKDtxb66xa96i12tZV26KttdY6a61aR7V+q9ZVHCi2Km6wxS1uGSooIMiQnN8f/pIaSEICCRD4vF+vvNrce+7Nkwt6H8859zkSIYQAERERESllVNwBEBEREZVkTJaIiIiI1GCyRERERKQGkyUiIiIiNZgsEREREanBZImIiIhIDSZLRERERGowWSIiIiJSg8kSERERkRpMloi0JJFIMH/+/OIOQ8H58+fRokULlC9fHhKJBFFRUSrb3rp1Cx07doStrS0kEgn27NlTZHHmJpFIMHHixCL/3PDwcEgkEvzvf/8r8s8mIsPDZImK1MaNGyGRSOQvc3NzeHp6YuLEiUhISMjTPiEhATNmzEDt2rVhaWmJ8uXLo0mTJvjiiy/w4sULpZ/h4+MDiUSC1atXaxzXvXv3FOIyNjZG5cqV0bNnT7WJhzauXr2K+fPn4969ezo5n0x2djb69u2LpKQkfPfdd9i8eTOqVKmisv2wYcPw77//4ssvv8TmzZvRtGlTncZTGkmlUlSsWBFff/11cYeSR1hYGEaOHAlPT09YWlqiWrVqeP/99xEXF6f1uQ4ePKj2HwKpqamYOXMmqlatCjMzM1SqVAl9+vRBenq6Vp+TnJyMmTNnombNmrCwsECVKlUwatQoPHjwQKHd8OHDIZFIYGNjg1evXuU5z61bt+R/ZpcsWaJVDOoS9f/973+QSCQIDw/PE4vsZWZmBk9PT8ydOxcZGRl5zuHh4aHQXvb64IMP8rR98eIFxowZg4oVK6J8+fJo164dLl26pDRm2cvExAQODg5o0qQJpkyZgqtXr2r1/Uk7JsUdAJVNn3/+OapWrYqMjAycPHkSq1evxsGDBxEdHQ1LS0sAb3pLOnfujJcvX2LIkCFo0qQJAODChQtYtGgR/vrrLxw+fFjhvLdu3cL58+fh4eGBrVu3Yty4cVrFNXDgQHTu3Bk5OTm4du0aVq9ejT///BNnzpyBt7d3ob7z1atXsWDBArRt2xYeHh6FOtfbYmJicP/+faxbtw7vv/++2ravXr1CREQEPv3002Lp0TFU586dw7Nnz9ClS5fiDiWPjz/+GElJSejbty9q1qyJO3fuYMWKFdi/fz+ioqLg4uKi8bkOHjyIlStXKk2YkpOT4e/vj0ePHmHMmDGoUaMGnj59ir///huZmZnyP7f5kUql6NChA65evYrx48fD09MTt2/fxqpVq3Do0CFcu3YN1tbW8vYmJiZIT0/Hvn370K9fP4Vzbd26Febm5kqTFX0wMzPDTz/9BODN9fjjjz+wcOFCxMTEYOvWrXnae3t748MPP1TY5unpqfBeKpWiS5cuuHz5Mj766CM4Ojpi1apVaNu2LS5evIiaNWsqtO/QoQOGDh0KIQSSk5Nx+fJlbNq0CatWrcLixYsxffp0HX9rAgAIoiK0YcMGAUCcP39eYfv06dMFALFt2zYhhBDPnz8XlSpVEs7OzuLatWt5zhMfHy8WLlyYZ/vcuXOFk5OT+P3334VEIhF3797VKK67d+8KAOKbb75R2L53714BQIwZM0a+DYCYN2+eRud9286dOwUAcfz4ca2PVefEiRMCgNi5c2e+be/fv6/0exYXAGLChAlF/rnHjx/X+JoJIcScOXNElSpV9BtUAZ04cULk5OTk2QZAfPrpp1qda8KECULVbWHcuHHCzs5O3Llzp8CxCiHEqVOnBACxYsUKhe0///yzACB27dol3zZs2DBRvnx50bFjR9GjR48856pZs6bo3bt3gX6n1f3uKfuzKovlbVKpVDRv3lxIJBIRHx+vsK9KlSqiS5cu+caxY8eOPL+LT548EXZ2dmLgwIEaxfzs2TPh5+cnAIgDBw7k+5mkPQ7DUYnw7rvvAgDu3r0LAFi7di0eP36MpUuXonbt2nnaOzs747PPPsuzfdu2bejTpw+6du0KW1tbbNu2TadxqRIZGYlOnTrBxsYGVlZWaN++Pc6cOSPfv3HjRvTt2xcA0K5dO3lX+tvd/MocO3YMrVu3Rvny5WFnZ4fu3bvj2rVr8v3Dhw+Hv78/AKBv376QSCRo27at0nPNnz9fPjz30UcfQSKRyHu47t+/j/Hjx6NWrVqwsLBAhQoV0LdvX6VDhi9evMC0adPg4eEBMzMzvPPOOxg6dCiePXsmb5OZmYl58+ahRo0aMDMzg7u7O2bOnInMzEylsW3duhW1atWCubk5mjRpgr/++itPm/yuscydO3fQt29fODg4wNLSEs2bN8eBAweUfu7bMjMz5b83p0+fVth34MCBPL1Kf/75p/xnY21tjS5duuDKlSsKbeLj4zFixAi88847MDMzg6urK7p3765wXT08PNC1a1eEh4ejadOmsLCwQP369eW/G7t27UL9+vXl1yYyMlLhM9q0aQMjI6M82xwcHBR+V/IzfPhwrFy5EoDicA/w5me+YcMGjBkzBlWrVkVWVpbKn2V+UlJSALz5M/w2V1dXAICFhUWeYwYNGoQ///xTYej9/PnzuHXrFgYNGlSgOHRBIpGgVatWEELgzp07SttkZWUhLS1N5Tn+97//wdnZGb169ZJvq1ixIvr164c//vhDo+tcoUIFbN++HSYmJvjyyy+1/yKULyZLVCLExMQAePOHHgD27t0LCwsL9OnTR+NznD17Frdv38bAgQNhamqKXr16Ke0aL0xcyly5cgWtW7fG5cuXMXPmTMyZMwd3795F27ZtcfbsWQBvbl6TJ08GAHzyySfYvHkzNm/ejDp16qg879GjRxEYGIgnT55g/vz5mD59Ok6fPo2WLVvKb7Zjx47FJ598AgCYPHkyNm/ejE8//VTp+Xr16oXvvvsOwJvhxs2bN2PZsmUA3tx4Tp8+jQEDBmD58uX44IMPEBYWhrZt2yrMR3n58iVat26NH374AR07dsT333+PDz74ANevX8ejR48AvBlW6NatG5YsWYLg4GD88MMP6NGjB7777jv0798/T1wnTpzA1KlTMWTIEHz++edITExEUFAQoqOjtbrGwJs5bi1atMChQ4cwfvx4fPnll8jIyEC3bt2we/duldf61atXCA4OxunTp3H06FG0aNFCvi8+Ph6RkZHo3LmzfNvmzZvRpUsXWFlZYfHixZgzZw6uXr2KVq1aKSRCvXv3xu7duzFixAisWrUKkydPRmpqap65Obdv38agQYMQHByMkJAQPH/+HMHBwdi6dSumTZuGIUOGYMGCBYiJiUG/fv0glUpVfhfZz+nly5dwdHRU2+5tY8eORYcOHeTfT/YCgJMnTyIjIwM1atRAnz59YGlpCQsLC7Rs2VLrOX1NmzZF+fLlMWfOHBw7dgyPHz/GiRMnMHPmTDRr1gwBAQF5junVqxckEgl27dol37Zt2zbUrl0bjRs31urzdU3287a3t8+z79ixY7C0tISVlRU8PDzw/fff52kTGRmJxo0b50l4fXx8kJ6ejps3b2oUR+XKleHv748zZ87IE1LSoeLu2qKyRTYMd/ToUfH06VPx8OFDsX37dlGhQgVhYWEhHj16JIQQwt7eXjRs2FCrc0+cOFG4u7sLqVQqhBDi8OHDAoCIjIzM91jZMNyCBQvE06dPRXx8vAgPDxeNGjUSAMTvv/8ub4tcw3A9evQQpqamIiYmRr4tNjZWWFtbizZt2si3aTsM5+3tLZycnERiYqJ82+XLl4WRkZEYOnSofJs2Q0qqhhvT09PztI2IiBAAxC+//CLfNnfu3DxDJTKy675582ZhZGQk/v77b4X9a9asEQDEqVOn5NsACADiwoUL8m33798X5ubmomfPnvJtml7jqVOnCgAKn52amiqqVq0qPDw85MNVb1+z1NRU4e/vLxwdHZX+rqxfv15YWFjIr1Fqaqqws7MTo0ePVmgXHx8vbG1t5dufP3+u0fBQlSpVBABx+vRp+bZDhw4JAMLCwkLcv39fvn3t2rUa/Q4tXLhQABBhYWFq2+Wmahhu6dKlAoCoUKGC8PHxEVu3bhWrVq0Szs7Owt7eXsTGxmr1Ofv37xeurq7ynz8AERgYKFJTUxXavT301adPH9G+fXshhBA5OTnCxcVFLFiwQOXvdH5QwGG4p0+fiqdPn4rbt2+LJUuWCIlEIurVqyf//ZcJDg4WixcvFnv27BHr168XrVu3FgDEzJkzFdqVL19ejBw5Mk8MBw4cEABEaGioRjELIcSUKVMEAHH58mVNLgFpgckSFSlZspT7VaVKFYW/FIyNjUWrVq00Pm92draoWLGimDFjhnzb69evhZOTk8I2VWR/4eZ+2djYiMWLFyu0fTtZev36tbC0tBT9+vXLc86xY8cKIyMjkZycLITQLlmKjY1V+herEEIEBgYKR0dH+XtdJEtvy8rKEs+ePRNPnz4VdnZ2YurUqfJ9devWzTeJ7datm6hbt678piJ73bx5UwAQX3zxhbwtAOHn55fnHP379xeWlpbi9evXWl1jT09P4ePjk6ddSEiIACD+/fdfIcR/1+ynn34Sfn5+wtnZWURHRyv9Pr179xadO3eWv9+1a5cAII4dO5bnO3bs2FHUqFFDCCFERkaGMDU1FV26dBFJSUkqr1eVKlWEl5eXwrYXL14IAHnmvERFRQkAYv369SrPd+LECWFiYqL0euVHVbL0+eefCwDC0dFRIaGRJdTazo06e/as6Ny5s/jyyy/Fnj17xPz584WlpaXo06ePQru3k6Vdu3YJY2NjERcXJ44cOSIAiFu3bhVpsqTs74hWrVopJPGqSKVSERgYKExMTMTDhw/l242MjMS4cePytA8LCxMAxO7duzWKWQghPv30UwFAnDx5Mt94SDt8Go6KxcqVK+Hp6QkTExM4OzujVq1aCt3QNjY2SE1N1fh8hw8fxtOnT+Hj44Pbt2/Lt7dr1w6//vorFi9enKebW5kxY8agb9++MDIygp2dHerWrQszMzOV7Z8+fYr09HTUqlUrz746depAKpXi4cOHqFu3rsbfBXgzhwiAyvMeOnQIaWlpKF++vFbnVeXVq1cICQnBhg0b8PjxYwgh5PuSk5Pl/x8TE4PevXurPdetW7dw7do1VKxYUen+J0+eKLzP/bQP8OaJofT0dDx9+hQANL7G9+/fh6+vr9J2wJvrWq9ePfn2qVOnIiMjA5GRkUp/RtnZ2Thy5AhCQkIUvh/w33y23GxsbAC8eXJq8eLF+PDDD+Hs7IzmzZuja9euGDp0aJ4n1CpXrqzw3tbWFgDg7u6udPvz58+Vfvb169fRs2dP1KtXT/7Uli7I5hEFBwfDyspKvr158+aoWrVqnjle6ty5cwft2rXDL7/8Iv9d6t69Ozw8PDB8+HD8+eef6NSpU57jOnfuDGtra+zYsQNRUVFo1qwZatSoofNSHG+TzdmSMTc3x759+wAAjx49wtdff40nT54onWel7FzTpk3DoUOHEB4ejiFDhgB4c22VzUuSPeGnybllXr58CQAKTxOSbjBZomLh4+Ojtr5P7dq1ERUVhaysLJiamuZ7PtncpNyPFsucOHEC7dq1y/c8NWvWVDpnorSbNGkSNmzYgKlTp8LPz09esHLAgAH5zo/JTSqVon79+li6dKnS/bkTgOLUvXt3bN++HYsWLcIvv/ySJ6E+efIkUlJSFOYrya7H5s2blT6Wb2Ly31+rU6dORXBwMPbs2YNDhw5hzpw5CAkJwbFjx9CoUSN5O2NjY6Xxqdr+djIr8/DhQ3mx0YMHD+r0hunm5gYg76RsAHByclKZvCmzceNGZGRkoGvXrgrbu3XrBgA4deqU0mTJzMwMvXr1wqZNm3Dnzp1CF4Y1MzNTWrsJgHyenrm5ucJ2Y2Njhb8fAgMDUbt2bYwdOxZ79+7N9zNlv/tJSUnyba6urkprYsm2ya69JqKjo2FsbIyqVatqfAxphskSlUjBwcGIiIjA77//joEDB6ptm5aWhj/++AP9+/dXOiF88uTJ2Lp1q0bJkrYqVqwIS0tL3LhxI8++69evw8jISP4XZO5/paoje2pN1XkdHR111qsEvHkiZ9iwYfj222/l2zIyMvIU/qxevbrCxGtlqlevjsuXL6N9+/YafWdZT83bbt68CUtLS3nvlKbXuEqVKirbyfa/rUePHujYsSOGDx8Oa2vrPIVMDxw4AC8vL4W6WNWrVwfwJknQJLGuXr06PvzwQ3z44Ye4desWvL298e2332LLli35HqupxMREdOzYEZmZmQgLC5M/WaYtVT8vWY2zx48f59kXGxur9IlVVRISEiCEQE5OjsL27OxsAMDr169VHjto0CD8/PPPMDIywoABAzT+TGVU/a4A//25U1fcFXiT6EybNg0LFizAmTNn0Lx5c7XtZU/Mvd3r6u3tjb///htSqVQhWT979iwsLS3z1GVS5cGDBzhx4gT8/PzYs6QHfBqOSqQPPvgArq6u+PDDD5U+DfLkyRN88cUXAIDdu3cjLS0NEyZMQJ8+ffK8unbtit9//73AjzqrY2xsjI4dO+KPP/5QGA5ISEjAtm3b0KpVK/mwjCy5UVV5/G2urq7w9vbGpk2bFNpHR0fj8OHDCj0dumBsbJynt+KHH37Ic0Pr3bs3Ll++rPTJMtnx/fr1w+PHj7Fu3bo8bV69epXnMeqIiAiFasUPHz7EH3/8gY4dO8LY2Fira9y5c2ecO3cOERER8nZpaWn48ccf4eHhAS8vrzwxDR06FMuXL8eaNWvw8ccfK+w7ePBgnpIBgYGBsLGxwVdffSW/wb/t7aHD3MUSq1evDmtra53+LqalpaFz5854/PgxDh48qHRYU1Oqfkdr1aqFhg0b4o8//lAoEXH48GE8fPhQ/hSdJjw9PSGEwG+//aaw/ddffwUAhR633Nq1a4eFCxdixYoVWhXbVKZz5844c+YMLl68qLD9xYsX2Lp1K7y9vTX6jEmTJsHS0hKLFi2Sb0tKSlKaDC5atAimpqYK/3Dr06cPEhISFJ70e/bsGXbu3Ing4GC10wDe/ryBAwciJydH5dOwVDjsWaISyd7eHrt370bnzp3h7e2tUMH70qVL+PXXX+Hn5wfgzRBchQoVFB73flu3bt2wbt06HDhwQKGWia588cUXOHLkCFq1aoXx48fDxMQEa9euRWZmpsLyGN7e3jA2NsbixYuRnJwMMzMzvPvuu3ByclJ63m+++QadOnWCn58fRo0ahVevXuGHH36Ara2tztem69q1KzZv3gxbW1t4eXkhIiICR48ezVMy4aOPPsL//vc/9O3bFyNHjkSTJk2QlJSEvXv3Ys2aNWjYsCHee+89/Pbbb/jggw9w/PhxtGzZEjk5Obh+/Tp+++03HDp0SGEItl69eggMDMTkyZNhZmaGVatWAQAWLFggb6PpNZ41axZ+/fVXdOrUCZMnT4aDgwM2bdqEu3fv4vfff1c5b23ixIlISUnBp59+CltbW3zyySe4e/euvIr722xsbLB69Wq89957aNy4MQYMGICKFSviwYMHOHDgAFq2bIkVK1bg5s2baN++Pfr16wcvLy+YmJhg9+7dSEhIKHSvyNsGDx6Mc+fOYeTIkbh27ZpCbSUrKyv06NFD43PJ/oxNnjwZgYGBMDY2lsf63XffoUOHDmjVqhXGjh2L5ORkLF26FJ6enlpVyh8+fDiWLFmCsWPHyueKXbp0CT/99BPq1q2Lnj17qjzWyMhIaX21gpg1axZ27tyJNm3aYOzYsahduzZiY2OxceNGxMXFYcOGDRqdp0KFCvLSENeuXUOdOnWwd+9efPHFF+jTpw+qVq2KpKQkbNu2DdHR0fjqq68UkrA+ffqgefPmGDFiBK5evSqv4J2Tk6PwZ0Dm5s2b2LJlC4QQSElJweXLl7Fz5068fPkSS5cuRVBQkE6uD+VSjJPLqQxSVcFbldjYWDFt2jTh6ekpzM3NhaWlpWjSpIn48ssvRXJyskhISBAmJibivffeU3mO9PR0YWlpqfAoem7aPFEDJRW8L126JAIDA4WVlZWwtLQU7dq1U3gUXGbdunWiWrVqwtjYWKMn444ePSpatmwpLCwshI2NjQgODhZXr15VaKOLp+GeP38uRowYIRwdHYWVlZUIDAwU169fF1WqVBHDhg1TaJuYmCgmTpwoKlWqJExNTcU777wjhg0bJp49eyZvk5WVJRYvXizq1q0rzMzMhL29vWjSpIlYsGCB/Mk1If57umfLli2iZs2awszMTDRq1EjpddH0GsfExIg+ffoIOzs7YW5uLnx8fMT+/fs1umYzZ86UV5desWKFsLW1FdnZ2Uqv5fHjx0VgYKCwtbUV5ubmonr16mL48OHyMgjPnj0TEyZMELVr1xbly5cXtra2wtfXV/z2228K51FV6RlKnnxS9vOTlR5Q9tK26vjr16/FpEmTRMWKFYVEIsnzZNyRI0dE8+bNhbm5uXBwcBDvvfeeiIuL0+ozhBDi0aNHYuTIkaJq1arC1NRUuLq6itGjR4unT58qtFNWNTu3gj4NJ4vj/fffF5UqVRImJibCwcFBdO3aVZw5cyZPW3WxxMTECGNjY/mflQsXLojg4GD5nxErKyvRqlWrPD97maSkJDFq1ChRoUIFYWlpKfz9/ZX+Hfn2z9bIyEjY2dmJRo0aiSlTpogrV65o/f1JcxIhlMwUJCIq4zp37gwrK6s8w0VEVPZwGI6ISIm2bduidevWxR0GEZUA7FkiIirlkpOTVT4mL1PYCdNZWVkKj8QrY2trq1XdoIKIj49Xu9/CwkJer4pIU0yWiIhKueHDh2PTpk1q2xT2VhAeHp5veY4NGzZg+PDhhfqc/ORXrmLYsGHYuHGjXmOg0ofJEhFRKXf16lXExsaqbVPYYqzPnz/P8xh+bnXr1i1wDShNHT16VO1+Nzc3pSUkiNRhskRERESkBotSEhEREanBp+F0QCqVIjY2FtbW1lotaUFERETFRwiB1NRUuLm5qV1sncmSDsTGxpaoxUGJiIhIcw8fPsQ777yjcj+TJR2QLVr48OFD+RpVREREVLKlpKTA3d0938WHmSzpgGzozcbGhskSERGRgclvCg0neBMRERGpwWSJiIiISA0mS0RERERqMFkiIiIiUoPJEhEREZEaTJaIiIiI1GCyRERERKQGkyUiIiIiNQwuWVq5ciU8PDxgbm4OX19fnDt3TmXbdevWoXXr1rC3t4e9vT0CAgLytB8+fDgkEonCKygoSN9fg4iIiAyEQSVLO3bswPTp0zFv3jxcunQJDRs2RGBgIJ48eaK0fXh4OAYOHIjjx48jIiIC7u7u6NixIx4/fqzQLigoCHFxcfLXr7/+WhRfR60cqUBETCL+iHqMiJhE5EhFcYdERERUJkmEEAZzF/b19UWzZs2wYsUKAIBUKoW7uzsmTZqEWbNm5Xt8Tk4O7O3tsWLFCgwdOhTAm56lFy9eYM+ePQWOKyUlBba2tkhOTtbJcieh0XFYsO8q4pIz5Ntcbc0xL9gLQfVcC31+IiIi0vz+bTA9S1lZWbh48SICAgLk24yMjBAQEICIiAiNzpGeno7s7Gw4ODgobA8PD4eTkxNq1aqFcePGITExUe15MjMzkZKSovDSldDoOIzbckkhUQKA+OQMjNtyCaHRcTr7LCIiIsqfwSRLz549Q05ODpydnRW2Ozs7Iz4+XqNzfPzxx3Bzc1NIuIKCgvDLL78gLCwMixcvxokTJ9CpUyfk5OSoPE9ISAhsbW3lL3d394J9qVxypAIL9l2Fsq4+2bYF+65ySI6IiKgImRR3AEVl0aJF2L59O8LDw2Fubi7fPmDAAPn/169fHw0aNED16tURHh6O9u3bKz3X7NmzMX36dPn7lJQUnSRM5+4m5elRepsAEJecgXN3k+BXvUKhP4+IiIjyZzA9S46OjjA2NkZCQoLC9oSEBLi4uKg9dsmSJVi0aBEOHz6MBg0aqG1brVo1ODo64vbt2yrbmJmZwcbGRuGlC09SVSdKBWlHREREhWcwyZKpqSmaNGmCsLAw+TapVIqwsDD4+fmpPO7rr7/GwoULERoaiqZNm+b7OY8ePUJiYiJcXYt+IrWTtXn+jbRoR0RERIVnMMkSAEyfPh3r1q3Dpk2bcO3aNYwbNw5paWkYMWIEAGDo0KGYPXu2vP3ixYsxZ84c/Pzzz/Dw8EB8fDzi4+Px8uVLAMDLly/x0Ucf4cyZM7h37x7CwsLQvXt31KhRA4GBgUX+/XyqOsDV1hwSFfslePNUnE9VBxUtiIiISNcMKlnq378/lixZgrlz58Lb2xtRUVEIDQ2VT/p+8OAB4uL+e1ps9erVyMrKQp8+feDq6ip/LVmyBABgbGyMf/75B926dYOnpydGjRqFJk2a4O+//4aZmVmRfz9jIwnmBXsBQJ6ESfZ+XrAXjI1UpVNERESkawZVZ6mkYp0lIiIiw6Pp/bvMPA1nSILquaKDlwvO3U3Ck9QMOFm/GXpjjxIREVHRY7JUQhkbSVgegIiIqAQwqDlLREREREWNyRIRERGRGkyWiIiIiNRgskRERESkBpMlIiIiIjWYLBERERGpwWSJiIiISA0mS0RERERqMFkiIiIiUoPJEhEREZEaTJaIiIiI1GCyRERERKQGkyUiIiIiNZgsEREREanBZImIiIhIDSZLRERERGowWSIiIiJSg8kSERERkRpMloiIiIjUYLJEREREpAaTJSIiIiI1mCwRERERqcFkiYiIiEgNJktEREREajBZIiIiIlKDyRIRERGRGkyWiIiIiNRgskRERESkBpMlIiIiIjWYLBERERGpwWSJiIiISA0mS0RERERqMFkiIiIiUoPJEhEREZEaTJaIiIiI1GCyRERERKQGkyUiIiIiNZgsEREREalhcMnSypUr4eHhAXNzc/j6+uLcuXMq265btw6tW7eGvb097O3tERAQkKe9EAJz586Fq6srLCwsEBAQgFu3bun7axAREZGBMKhkaceOHZg+fTrmzZuHS5cuoWHDhggMDMSTJ0+Utg8PD8fAgQNx/PhxREREwN3dHR07dsTjx4/lbb7++mssX74ca9aswdmzZ1G+fHkEBgYiIyOjqL4WERERlWASIYQo7iA05evri2bNmmHFihUAAKlUCnd3d0yaNAmzZs3K9/icnBzY29tjxYoVGDp0KIQQcHNzw4cffogZM2YAAJKTk+Hs7IyNGzdiwIABGsWVkpICW1tbJCcnw8bGpuBfkIiIiIqMpvdvg+lZysrKwsWLFxEQECDfZmRkhICAAERERGh0jvT0dGRnZ8PBwQEAcPfuXcTHxyuc09bWFr6+vmrPmZmZiZSUFIUXERERlU4Gkyw9e/YMOTk5cHZ2Vtju7OyM+Ph4jc7x8ccfw83NTZ4cyY7T9pwhISGwtbWVv9zd3bX5KkRERGRADCZZKqxFixZh+/bt2L17N8zNzQt1rtmzZyM5OVn+evjwoY6iJCIiopLGpLgD0JSjoyOMjY2RkJCgsD0hIQEuLi5qj12yZAkWLVqEo0ePokGDBvLtsuMSEhLg6uqqcE5vb2+V5zMzM4OZmVkBvgUREREZGoPpWTI1NUWTJk0QFhYm3yaVShEWFgY/Pz+Vx3399ddYuHAhQkND0bRpU4V9VatWhYuLi8I5U1JScPbsWbXnJCIiorLDYHqWAGD69OkYNmwYmjZtCh8fHyxbtgxpaWkYMWIEAGDo0KGoVKkSQkJCAACLFy/G3LlzsW3bNnh4eMjnIVlZWcHKygoSiQRTp07FF198gZo1a6Jq1aqYM2cO3Nzc0KNHj+L6mkRERFSCGFSy1L9/fzx9+hRz585FfHw8vL29ERoaKp+g/eDBAxgZ/ddZtnr1amRlZaFPnz4K55k3bx7mz58PAJg5cybS0tIwZswYvHjxAq1atUJoaGih5zURERFR6WBQdZZKKtZZIiIiMjylrs4SERERUXFgskRERESkBpMlIiIiIjWYLBERERGpwWSJiIiISA0mS0RERERqMFkiIiIiUoPJEhEREZEaTJaIiIiI1GCyRERERKQGkyUiIiIiNZgsEREREanBZImIiIhIDSZLRERERGowWSIiIiJSg8kSERERkRpMloiIiIjUYLJEREREpAaTJSIiIiI1NE6Wnjx5ovA+KioKw4YNQ8uWLdGnTx+Eh4frOjYiIiKiYqdxsuTq6ipPmE6fPg0fHx/cv38fLVu2REpKCjp06IC//vpLb4ESERERFQeJEEJo0tDIyAjx8fFwcnJCx44d4e7ujvXr18v3T506Ff/++y/CwsL0FmxJlZKSAltbWyQnJ8PGxqa4wyEiIiINaHr/LtCcpejoaIwePVph2+jRo/HPP/8U5HREREREJZaJNo1TU1Nhbm4Oc3NzmJmZKewzNzdHenq6ToMjIiIiKm5a9Sx5enrC3t4e9+7dw4ULFxT2XblyBW5ubjoNjoiIiKi4adyzdPz4cYX3rq6uCu/v3r2LMWPG6CYqIiIiohJC4wnepBoneBMRERkevU7wJiIiIiortEqWVq1ahYCAAPTr1y9PiYBnz56hWrVqOg2OiIiIqLhpnCwtX74cH330EWrXrg0zMzN07twZISEh8v05OTm4f/++XoIkIiIiKi4aT/Beu3Yt1q1bh0GDBgEAxo0bhx49euDVq1f4/PPP9RYgERERUXHSOFm6e/cuWrRoIX/fokULHDt2DAEBAcjOzsbUqVP1ER8RERFRsdI4WXJ0dMTDhw/h4eEh31avXj0cO3YM7777LmJjY/URHxEREVGx0njOUqtWrbBr16482728vBAWFoY///xTp4ERERERlQQa9yzNmjULFy9eVLqvbt26OHbsGH7//XedBUZERERUErAopQ6wKCUREZHhYVFKIiIiIh3QWbJUp04dGBsb6+p0RERERCWCxnOW8hMSEoLk5GRdnY6IiIioRNBZstSjRw9dnYqIiIioxCjQMFxycjJu3LiBGzduFHlv0sqVK+Hh4QFzc3P4+vri3LlzKtteuXIFvXv3hoeHByQSCZYtW5anzfz58yGRSBRetWvX1uM3ICIiIkOiVbL0008/wcvLCw4ODvDy8lL4//Xr1+srRrkdO3Zg+vTpmDdvHi5duoSGDRsiMDAQT548Udo+PT0d1apVw6JFi+Di4qLyvHXr1kVcXJz8dfLkSX19BSIiIjIwGg/DffPNN5g/fz4mT56MwMBAODs7AwASEhJw+PBhTJkyBc+fP8eMGTP0FuzSpUsxevRojBgxAgCwZs0aHDhwAD///DNmzZqVp32zZs3QrFkzAFC6X8bExERtMkVERERll8bJ0ooVK7Bhwwb069dPYXudOnXQtm1bNGzYEB999JHekqWsrCxcvHgRs2fPlm8zMjJCQEAAIiIiCnXuW7duwc3NDebm5vDz80NISAgqV66ssn1mZiYyMzPl71NSUgr1+URERFRyaTwM9+TJE9SvX1/l/vr16+PZs2c6CUqZZ8+eIScnR96jJePs7Iz4+PgCn9fX1xcbN25EaGgoVq9ejbt376J169ZITU1VeUxISAhsbW3lL3d39wJ/PhEREZVsGidLzZo1w6JFi/D69es8+3JycrB48WL5kJch6dSpE/r27YsGDRogMDAQBw8exIsXL/Dbb7+pPGb27NlITk6Wvx4+fFiEERMREVFR0moYLjAwEC4uLmjTpo3CnKW//voLpqamOHz4sN4CdXR0hLGxMRISEhS2JyQk6HS+kZ2dHTw9PXH79m2VbczMzGBmZqazzyQiIqKSS+OepQYNGuDmzZtYuHAhrK2tcefOHdy5cwfW1tb44osvcP36ddSrV09vgZqamqJJkyYICwuTb5NKpQgLC4Ofn5/OPufly5eIiYmBq6urzs5JREREhkuropTW1tYYN24cxo0bp6941Jo+fTqGDRuGpk2bwsfHB8uWLUNaWpr86bihQ4eiUqVKCAkJAfBmUvjVq1fl///48WNERUXBysoKNWrUAADMmDEDwcHBqFKlCmJjYzFv3jwYGxtj4MCBxfIdiYiIqGTRWQXvotC/f388ffoUc+fORXx8PLy9vREaGiofEnzw4AGMjP7rLIuNjUWjRo3k75csWYIlS5bA398f4eHhAIBHjx5h4MCBSExMRMWKFdGqVSucOXMGFStWLNLvRkRERCWTRAghijsIQ5eSkgJbW1skJyfDxsamuMMhIiIiDWh6/y7QcidEREREZYVGydI///wDqVSq71iIiIiIShyNkqVGjRrJC05Wq1YNiYmJeg2KiIiIqKTQKFmys7PD3bt3AQD37t1jLxMRERGVGRo9Dde7d2/4+/vD1dUVEokETZs2hbGxsdK2d+7c0WmARERERMVJo2Tpxx9/RK9evXD79m1MnjwZo0ePhrW1tb5jIyIiojImRypw7m4SnqRmwMnaHD5VHWBsJCnWmDSusxQUFAQAuHjxIqZMmcJkiYiIiHQqNDoOC/ZdRVxyhnybq6055gV7Iahe8a2sUag6S48ePQIAvPPOOzoLyBCxzhIREVHhhEbHYdyWS8idlMj6lFYPaazzhElvdZakUik+//xz2NraokqVKqhSpQrs7OywcOFCTvwmIiIireVIBRbsu5onUQIg37Zg31XkSIunjrbWy518+umnWL9+PRYtWoSWLVsCAE6ePIn58+cjIyMDX375pc6DJCIiotLr3N0khaG33ASAuOQMnLubBL/qFYousP+ndbK0adMm/PTTT+jWrZt8W4MGDVCpUiWMHz+eyRIRERWZkjgZmLT3JFV1olSQdrqmdbKUlJSE2rVr59leu3ZtJCUl6SQoIiKi/JTUycCkPSdrc5220zWt5yw1bNgQK1asyLN9xYoVaNiwoU6CIiIiUkc2GTj30E18cgbGbbmE0Oi4YoqMCsKnqgNcbc2hqk9QgjeJsE9Vh6IMS07rnqWvv/4aXbp0wdGjR+Hn5wcAiIiIwMOHD3Hw4EGdB0hERPS2/CYDS/BmMnAHLxcOyRkIYyMJ5gV7YdyWS5AACj9b2U9wXrBXsf08te5Z8vf3x82bN9GzZ0+8ePECL168QK9evXDjxg20bt1aHzESERHJaTMZmAxHUD1XrB7SGC62ikNtLrbmeikboA2te5YAwM3NjRO5iYioWJT0ycBUcEH1XNHBy6XETdovULJERERUXEr6ZGAqHGMjSbGUB1BH62E4IiKi4pTfZGAAcLExK7bJwFT6MFkiIiKDIpsMDEBlwpTxWoojV+OLLigq1ZgsERGRwZFNBra1LKd0f3J6NksIkM4UKFl6/fo1jh49irVr1yI1NRUAEBsbi5cvX+o0OCIiIlU6eLnA3MRY6b6SsJ4YlR5aT/C+f/8+goKC8ODBA2RmZqJDhw6wtrbG4sWLkZmZiTVr1ugjTiIiIgXn7iYhPqXkridGpYfWPUtTpkxB06ZN8fz5c1hYWMi39+zZE2FhYToNjoiISBWWEKCionXP0t9//43Tp0/D1NRUYbuHhwceP36ss8CIiIjUYQkBKipa9yxJpVLk5OTk2f7o0SNYW1vrJCgiIqL8lPT1xKj00DpZ6tixI5YtWyZ/L5FI8PLlS8ybNw+dO3fWZWxEREQqqSshUBLWE6PSQyKE0OoxgUePHiEwMBBCCNy6dQtNmzbFrVu34OjoiL/++gtOTk76irXESklJga2tLZKTk2FjY1Pc4RARlSmh0XFYsO+qwnpxrrbmmBfsVazriVHJp+n9W+tkCXhTOmDHjh24fPkyXr58icaNG2Pw4MEKE77LEiZLRETFK0cqStx6YlTy6TVZIkVMloiIiAyPpvdvrecshYSE4Oeff86z/eeff8bixYu1PR0RERFRiaZ1srR27VrUrl07z/a6deuyICURERVKjlTg1K1nWHLoOpYcuoFTt5+xAjcVO63rLMXHx8PVNe+EuYoVKyIujmvwEBFRwYRGx2HWrn/xIj1bvm3F8duwsyyHRb3qc7I2FRute5bc3d1x6tSpPNtPnToFNzc3nQRFRERlS2h0HD7YckkhUZJ5kZ6ND7goLhUjrXuWRo8ejalTpyI7OxvvvvsuACAsLAwzZ87Ehx9+qPMAiYiodMt6LcXsXf/k227Bvqvo4OXCp9yoyGmdLH300UdITEzE+PHjkZWVBQAwNzfHxx9/jNmzZ+s8QCIiKr1Co+Pwye5oPE9/nW9bLopLxUXrZEkikWDx4sWYM2cOrl27BgsLC9SsWRNmZmb6iI+IiEqp0Og4jNtyCdpM3+aiuFQctE6WZKysrNCsWTNdxkJERGVEjlRgwb6rWiVKABfFpeKhdbKUlpaGRYsWISwsDE+ePIFUKlXYf+fOHZ0FR0REpdO5u0kKy5No4u1FcVmxm4qS1snS+++/jxMnTuC9996Dq6srJBL+chIRkXYKMpwmWxSXa8FRUdM6Wfrzzz9x4MABtGzZUh/xEBFRGaDNcJq9ZTmE/H+dJVXznOKTMzBuyyWsHtKYCRPpnNZ1luzt7eHg4KCPWDSycuVKeHh4wNzcHL6+vjh37pzKtleuXEHv3r3h4eEBiUSCZcuWFfqcRERUeD5VHeBqaw51YxNWZsbYPNIHFz7rgKB6rmrnOcm2Ldh3lRW/See0TpYWLlyIuXPnIj09XR/xqLVjxw5Mnz4d8+bNw6VLl9CwYUMEBgbiyZMnStunp6ejWrVqWLRoEVxcXHRyTiIiKjxjIwnmBXsBQJ6ESfL/ryV9G6K1Z0X5XKT85jkJ/FdegEiXJEIIrVLwRo0aISYmBkIIeHh4oFy5cgr7L126pNMA3+br64tmzZphxYoVAACpVAp3d3dMmjQJs2bNUnush4cHpk6diqlTp+rsnDKarlpMRESKtJl/9EfUY0zZHpXvOb8f4I3u3pV0HSqVQprev7Wes9SjR4/CxFVgWVlZuHjxokLhSyMjIwQEBCAiIqJIz5mZmYnMzEz5+5SUlAJ9PhFRWRdUzxUdvFw0erJN03lOLC9AuqZ1sjRv3jx9xJGvZ8+eIScnB87OzgrbnZ2dcf369SI9Z0hICBYsWFCgzyQiIkXGRhKNqnLL5jnFJ2conbckAeDyVnkBIl3Res4SAbNnz0ZycrL89fDhw+IOiYio1MtvnhPwX3kBIl3SOlnKycnBkiVL4OPjAxcXFzg4OCi89MXR0RHGxsZISEhQ2J6QkKBy8ra+zmlmZgYbGxuFFxER6V9QPVesHtIYLraKQ20utuYsG0B6o3WytGDBAixduhT9+/dHcnIypk+fjl69esHIyAjz58/XQ4hvmJqaokmTJggLC5Nvk0qlCAsLg5+fX4k5JxER6VdQPVec/Phd/Dq6Ob4f4I1fRzfHyY/fZaJEeqP1nKWtW7di3bp16NKlC+bPn4+BAweievXqaNCgAc6cOYPJkyfrI04AwPTp0zFs2DA0bdoUPj4+WLZsGdLS0jBixAgAwNChQ1GpUiWEhIQAeDOB++rVq/L/f/z4MaKiomBlZYUaNWpodE4iIip5NJ3nRKQLWidL8fHxqF+/PoA3i+kmJycDALp27Yo5c+boNrpc+vfvj6dPn2Lu3LmIj4+Ht7c3QkND5RO0Hzx4ACOj/zrLYmNj0ahRI/n7JUuWYMmSJfD390d4eLhG5yQiIqKyTes6S7Vq1cIvv/wCX19ftGrVCl27dsWsWbOwY8cOTJo0qUwWc2SdJSIiIsOj6f1b6zlLPXv2lM/xmTRpEubMmYOaNWti6NChGDlyZMEjJiIiIiqBtO5Zyi0iIgIRERGoWbMmgoODdRWXQWHPEhERkeHRWwXv3Pz8/PjkGBEREZVaGiVLe/fuRadOnVCuXDns3btXbdtu3brpJDAiIiKikkCjYTgjIyPEx8fDyclJ4WmzPCeTSJCTk6PTAA0Bh+GIiIgMj06H4aRSqdL/JyIiIirttHoaLjs7G+3bt8etW7f0FQ8RERFRiaJVslSuXDn8888/+oqFiIiIqMTRus7SkCFDsH79en3EQkRERFTiaF064PXr1/j5559x9OhRNGnSBOXLl1fYv3TpUp0FR0RERFTctE6WoqOj0bhxYwDAzZs3FfZJJBLdREVERERUQmidLB0/flwfcRARERGVSFrPWSIiIiIqSwq03MmFCxfw22+/4cGDB8jKylLYt2vXLp0ERkRERFQSaN2ztH37drRo0QLXrl3D7t27kZ2djStXruDYsWOwtbXVR4xERERExUbrZOmrr77Cd999h3379sHU1BTff/89rl+/jn79+qFy5cr6iJGIiIio2GidLMXExKBLly4AAFNTU6SlpUEikWDatGn48ccfdR4gERERUXHSOlmyt7dHamoqAKBSpUqIjo4GALx48QLp6em6jY6IiIiomGk9wbtNmzY4cuQI6tevj759+2LKlCk4duwYjhw5gvbt2+sjRiIiIqJio3WytGLFCmRkZAAAPv30U5QrVw6nT59G79698dlnn+k8QCIiIqLiJBFCiOIOwtClpKTA1tYWycnJsLGxKe5wiIiISAOa3r+1nrMUEBCAjRs3IiUlpVABEhEREamTIxWIiEnEH1GPERGTiBxp8fTvaD0MV7duXcyePRvjx49Hly5dMGTIEHTu3BnlypXTR3xERERUBoVGx2HBvquIS86Qb3O1Nce8YC8E1XMt0li07ln6/vvv8fjxY+zZswfly5fH0KFD4ezsjDFjxuDEiRP6iJGIiIjKkNDoOIzbckkhUQKA+OQMjNtyCaHRcUUaT6HnLGVkZGDfvn348ssv8e+//yInJ0dXsRkMzlkiIiLSjRypQKvFx/IkSjISAC625jj58bswNpIU6rM0vX8XaG04mfj4eGzfvh1btmzBP//8Ax8fn8KcjoiIiMq4c3eTVCZKACAAxCVn4NzdJPhVr1AkMWmdLKWkpOD333/Htm3bEB4ejmrVqmHw4MHYsWMHqlevro8YiYiIkCMVOHc3CU9SM+BkbQ6fqg6F7lmgkudJqupEqSDtdEHrZMnZ2Rn29vbo378/QkJC0LRpU33ERUREJFeSJvuSfjlZm+u0nS5onSzt3bsX7du3h5GR1nPDiYiItCab7Jt7gq1ssu/qIY2ZMJUiPlUd4GprjvjkjDw/c+C/OUs+VR2KLCatM54OHTowUSIioiKRIxVYsO+q0pumbNuCfVeLrf4O6Z6xkQTzgr0AvEmM3iZ7Py/Yq0iHYJn1EBFRiXUmJlHjyb5UegTVc8XqIY3hYqs41OZia14sPYmFehqOiIh0jxOZ3wiNjsOs3//VqG1RTvalohFUzxUdvFxKxJ8FJktERCUIJzK/oWqekipFOdmXio6xkaTIygOow2E4IqISoqRVLS4u6uYp5SbBm2SyKCf7UtlToJ6ltLQ0nDhxAg8ePEBWVpbCvsmTJ+skMCKisiS/icwSvJnI3MHLpdQPyeVXlDC3op7sS2WP1slSZGQkOnfujPT0dKSlpcHBwQHPnj2DpaUlnJycmCwRERVASaxaXFw0nX9kZ1EOi3rXL1PDk1Q8tB6GmzZtGoKDg/H8+XNYWFjgzJkzuH//Ppo0aYIlS5boI0YiolKvJFYtLi6azj9aOZj1lahoaJ0sRUVF4cMPP4SRkRGMjY2RmZkJd3d3fP311/jkk0/0ESMRUalXEqsWFxdZUUJVA2uyeUrNq5XuHjYqObROlsqVKycvSunk5IQHDx4AAGxtbfHw4UPdRkdEVEZomiCUxonMOVKBiJhE/BH1GBExiQBQ4ooSUtmm9ZylRo0a4fz586hZsyb8/f0xd+5cPHv2DJs3b0a9evX0ESMRUaknq1o8bsslSACFid6lOUEIjY7D/L1XEJ+SKd/mYmOG+d3qYvWQxnnKKLiUwTIKVPy07ln66quv4Or65pf0yy+/hL29PcaNG4enT5/ixx9/1HmAua1cuRIeHh4wNzeHr68vzp07p7b9zp07Ubt2bZibm6N+/fo4ePCgwv7hw4dDIpEovIKCgvT5FYiIlCppVYv1LTQ6Dh9suaSQKAFAfEomPthyCQBw8uN38evo5vh+gDd+Hd0cJz9+t9RdByr5JEIIg1lQZ8eOHRg6dCjWrFkDX19fLFu2DDt37sSNGzfg5OSUp/3p06fRpk0bhISEoGvXrti2bRsWL16MS5cuyXvBhg8fjoSEBGzYsEF+nJmZGezt7TWOKyUlBba2tkhOToaNjU3hvygRlWlloYJ3jlSgyRdH8CI9W2UbO8tyuPhZh1L33ank0PT+bVDJkq+vL5o1a4YVK1YAAKRSKdzd3TFp0iTMmjUrT/v+/fsjLS0N+/fvl29r3rw5vL29sWbNGgBvkqUXL15gz549BY6LyRIRkXZO3XqGwevP5ttu6yhftKzpWAQRUVmk6f3bYCp4Z2Vl4eLFiwgICJBvMzIyQkBAACIiIpQeExERodAeAAIDA/O0Dw8Ph5OTE2rVqoVx48YhMTFRbSyZmZlISUlReBERkea2nr2nUbuIO8/0GwiRBgwmWXr27BlycnLg7OyssN3Z2Rnx8fFKj4mPj8+3fVBQEH755ReEhYVh8eLFOHHiBDp16oScnByVsYSEhMDW1lb+cnd3L8Q3IyIqW0Kj43AwOkHD1hyCMxS5n2rMkRrMwFW+yvxCugMGDJD/f/369dGgQQNUr14d4eHhaN++vdJjZs+ejenTp8vfp6SkMGEiItKAbFkXTb1drbwszOUyVKV9AWiDSZYcHR1hbGyMhATFf40kJCTAxcVF6TEuLi5atQeAatWqwdHREbdv31aZLJmZmcHMzEzLb0BERNqs+2ZvWU5eeLK034wNmWwB6Nz9SLIFoEvDk5wFSpbCwsIQFhaGJ0+eQCqVKuz7+eefdRJYbqampmjSpAnCwsLQo0cPAG8meIeFhWHixIlKj/Hz80NYWBimTp0q33bkyBH4+fmp/JxHjx4hMTFRXh6BiIh016ujzXItIb3qw9hIUiZuxoaqrCwArXWytGDBAnz++edo2rQpXF1dIZEU3ZefPn06hg0bhqZNm8LHxwfLli1DWloaRowYAQAYOnQoKlWqhJCQEADAlClT4O/vj2+//RZdunTB9u3bceHCBXk9qJcvX2LBggXo3bs3XFxcEBMTg5kzZ6JGjRoIDAwssu9FRFSS6bJXR9PlWqYFeCKonmuZuRkbqrKyALTWydKaNWuwceNGvPfee/qIR63+/fvj6dOnmDt3LuLj4+Ht7Y3Q0FD5JO4HDx7Il2IBgBYtWmDbtm347LPP8Mknn6BmzZrYs2ePvMaSsbEx/vnnH2zatAkvXryAm5sbOnbsiIULF3KYjYgIuh9ikS3rEp+coTQBAt5U8J74bg0AZedmbKjKygLQWidLWVlZaNGihT5i0cjEiRNVDruFh4fn2da3b1/07dtXaXsLCwscOnRIl+EREZUa+ujV0WRZl/nd6srPV1ZuxoaqrCwArXXpgPfffx/btm3TRyxERFSCaNOrow1tlnUpKzfjkkbTMgBlZQFojXqW3n5MXiqV4scff8TRo0fRoEEDlCtXTqHt0qVLdRshEREVC3326gTVc0UHL5d8J43nN2wnwZsky9BvxiWJNnPUysoC0BolS5GRkQrvvb29AQDR0dE6D4iIiEoGfffqGBtJ8p1nVFZuxiVFQeaoyXoKcydYLqWotINBrQ1XUnFtOCIqjXKkAq0WH8u3V+fkx+/qPVlhnSX9k/28VQ295vfzNsSioZrev7We4D1y5Eh8//33sLa2VtielpaGSZMm6a3OEhERFa2S1Kuj6bAdFVxhnzzUpKfQUGk9wXvTpk149epVnu2vXr3CL7/8opOgiIioZNBmMra+yW7G3b0rwa96BSZKOsYnD1XTuGcpJSUFQggIIZCamgpz8//+4OTk5ODgwYNwcnLSS5BERFR82KtTNmg69+xZaiZypKJM/fw1Tpbs7OwgkUggkUjg6emZZ79EIsGCBQt0GhwREZUMpXmIhd7QpGAoACw8cA0/nbxbpuaLaTzB+8SJExBC4N1338Xvv/8OB4f/HtM0NTVFlSpV4ObmprdASzJO8CYiotJA9jQcALUJk6xPydDX5dP0/q3103D3799H5cqVi3RNuJKOyRIREZUWyp48VKYon4bUF50+DffPP/+gXr16MDIyQnJyMv7991+VbRs0aKB9tERERFQiyOaobTx1FwsPXFPZriyty6dRsuTt7Y34+Hg4OTnB29sbEokEyjqkJBIJcnJydB4kERERFR1jIwkcrTVbUP7U7WelfuK/RsnS3bt3UbFiRfn/ExERkX4Vd5FHTZ+OW3H8tvz/S2uhUFbw1gHOWSIiIl0qCRXL86vgroyhTfzW9P6tdVHKypUrY+jQoVi/fj1iYmIKFSQREREpkj2RlnuCtWx9ttDouCKJQ1bBHfgvCcqPLKlasO8qcqSlpy9G62Tpq6++grm5ORYvXoyaNWvC3d0dQ4YMwbp163Dr1i19xEhERFQm5EgFFuy7qrQnpzgSEVUV3NV5e+J3aaH12nBDhgzBkCFDAABxcXE4ceIE9u/fj/Hjx0MqlXKCNxERUQEVdn02fchdwf1WQipWHM9/ZKk0LYuidbIEAOnp6Th58iTCw8Nx/PhxREZGol69emjbtq2OwyMiIio7Sur6bG9XcI+ISdQoWdJ0grgh0DpZatGiBSIjI1GnTh20bdsWs2bNQps2bWBvb6+P+IiIiMoMTROM4kxE8lsWRVas0qeqg5K9hknrOUvXr19H+fLlUbt2bdSuXRt16tRhokRERKQDskRE1YRqCd48FVeciYi6id+y9/OCvUpVvSWtk6XExEQcO3YMzZs3x6FDh9CyZUtUqlQJgwYNwrp16/QRIxERUZlgKImIqonfLrbmBlM2QBuFqrMkhMDFixexYsUKbN26tcxO8GadJSIyNMVd8JDUKwl1ljRh6L9HOl0b7m2XLl1CeHg4wsPDcfLkSaSmpqJ+/fqYNGkS/P39CxU0ERHpn6HciMuy3E+gldRE5O2J36WZ1j1LJiYmaNSoEfz9/eHv7482bdrA1tZWX/EZBPYsEZGhkBU8zP0Xv6FVXi7pDL3HpazQW89SUlISEwIiIgOUX8FDCd4UPOzg5cIbeyGw56700XqCNxMlIiLDdCYmUeOCh1QwJWWpEtItrZMlIiIyPKHRcZiw7ZJGbUtT5eWiVNKWKiHdYbJERFTKyXo7XrzK1qh9aaq8XJS0WaqEDEuBljshIiLDoK63I7fclZc5SVk7JXWpEio8JktERKVYfr0duckKHnKSsvYMYakSKhithuHi4uKwZcsWHDx4EFlZWQr70tLS8Pnnn+s0OCIiKhxNezHsLMrJywZwknLBGMJSJVQwGidL58+fh5eXFyZMmIA+ffqgbt26uHLlinz/y5cvsWDBAr0ESUREBaNpL8bKwW8SJU5SLjhDWaqEtKdxsvTJJ5+gZ8+eeP78ORISEtChQwf4+/sjMjJSn/EREVEhaNrb0bzamyrMnKRcOGVtzbSyQuM5SxcvXsTKlSthZGQEa2trrFq1CpUrV0b79u1x6NAhVK5cWZ9xEhFRAch6O8ZtuQQJoNBjpKy3g5OUC89QliohzWk1wTsjQ/EPx6xZs2BiYoKOHTvi559/1mlgRESkG7LejtwTtl2UTNjmJGXdKCtrppUVGidL9erVw+nTp9GgQQOF7TNmzIBUKsXAgQN1HhwREemGpr0dsmG7+OQMpfOWcpcXICoLNJ6zNHToUJw8eVLpvpkzZ2LBggUciiMiKsFkvR3dvSvBr3oFpcNCnKRcduVIBSJiEvFH1GNExCRyEv9bJEIIXo1C0nTVYiIiQ8E6S2VLWf15a3r/1jhZysjIwOHDh9GuXTtYW1vn+bDw8HAEBgbCzMyscJEbICZLRFQaq12Xxu9EecnqauVOBmQ/6dL8FJ+m92+N5yytXbsWe/fuRbdu3fLss7GxwfLly/HgwQNMnDixYBETERmo0vqvck5SLv3yq6slwZu6Wh28XMp0oqzxnKWtW7di6tSpKvdPnToVv/zyiy5iUmvlypXw8PCAubk5fH19ce7cObXtd+7cidq1a8Pc3Bz169fHwYMHFfYLITB37ly4urrCwsICAQEBuHXrlj6/AhGVIqW52jXnsJR+rKulGY2TpVu3bqFhw4Yq9zdo0EDvScaOHTswffp0zJs3D5cuXULDhg0RGBiIJ0+eKG1/+vRpDBw4EKNGjUJkZCR69OiBHj16IDo6Wt7m66+/xvLly7FmzRqcPXsW5cuXR2BgYJ4yCUREuZXGateyBGnhvito9uURDFx3BlO2R2HgujNotfiYQSd/lBframlG42Tp9evXePr0qcr9T58+xevXr3USlCpLly7F6NGjMWLECHh5eWHNmjWwtLRUWePp+++/R1BQED766CPUqVMHCxcuROPGjbFixQoAb3qVli1bhs8++wzdu3dHgwYN8MsvvyA2NhZ79uzR63chIsNXmv5VniMV+P7oLTRZ+CZBWn/qHpLSshXalIbeMlLEulqa0ThZqlu3Lo4ePapy/+HDh1G3bl2dBKVMVlYWLl68iICAAPk2IyMjBAQEICIiQukxERERCu0BIDAwUN7+7t27iI+PV2hja2sLX19flecEgMzMTKSkpCi8iKjsKS3/Kg+NjkOTL47gu6M38eJVtsp2htpbRqpx8V/NaJwsjRw5EgsXLsT+/fvz7Nu3bx++/PJLjBw5UqfBve3Zs2fIycmBs7OzwnZnZ2fEx8crPSY+Pl5te9l/tTknAISEhMDW1lb+cnd31/r7EJHhKw3/Kg+NjsMHWy7hRbrqJOlthtRbRvljXS3NaJwsjRkzBj169EC3bt3g5eWFnj17omfPnqhTpw569OiB4OBgjBkzRp+xlhizZ89GcnKy/PXw4cPiDomIioGh/6tcNueqIEp6bxlpjov/5k+rteG2bNmCbt26YevWrbh58yaEEKhVqxYWLFiAfv366StGAICjoyOMjY2RkJCgsD0hIQEuLi5Kj3FxcVHbXvbfhIQEuLq6KrTx9vZWGYuZmVmZrCdFRIq0XaRWG0VR4yi/OVfqyHrLWIupdODiv+pplSwBQL9+/fSeGCljamqKJk2aICwsDD169AAASKVShIWFqazt5Ofnh7CwMIWSB0eOHIGfnx8AoGrVqnBxcUFYWJg8OUpJScHZs2cxbtw4fX4dIioltFmkVlNFVbepIL1Db68NV1rrS5VVrKulmsbJklQqxTfffIO9e/ciKysL7du3x7x582BhYaHP+BRMnz4dw4YNQ9OmTeHj44Nly5YhLS0NI0aMAPBm/bpKlSohJCQEADBlyhT4+/vj22+/RZcuXbB9+3ZcuHABP/74IwBAIpFg6tSp+OKLL1CzZk1UrVoVc+bMgZubmzwhIyLKjy7/Va6qmrLsSTRdDosUdC7VvGAvHLkaX2RxEhU3jZOlL7/8EvPnz0dAQAAsLCzw/fff48mTJyof29eH/v374+nTp5g7dy7i4+Ph7e2N0NBQ+QTtBw8ewMjov2lYLVq0wLZt2/DZZ5/hk08+Qc2aNbFnzx7Uq1dP3mbmzJlIS0vDmDFj8OLFC7Rq1QqhoaEwNy+5EzKJqOTRxb/KNanbNH/vFZ1VU5bNuYpPzlD6mbnJeo06eLmg1eJjrPpMZYbGa8PVrFkTM2bMwNixYwEAR48eRZcuXfDq1SuFBKUs4tpwRKQLETGJGLjuTL7tpgV4YkpATZ18pqqeLBnzckYY5FMZHbxc5L1lmsb56+jmHNahEk3T+7fGWc6DBw/QuXNn+fuAgABIJBLExsYWLlIiIgKg+Ryi747e1FlhyKB6rhjTpqrK/RnZUvhUdYBf9QryXqLSUl+KSFNaVfDOPTRVrlw5ZGdrVpuDiIjU02YOUUELQ+Ze7y3rtRR7L6tOvGRDam9/VmmoL0WkDY3nLAkhMHz4cIVH5jMyMvDBBx+gfPny8m27du3SbYRERGWEbA6RJo/zywpDajPMpezpNYfypkhKy1J5zNtFKGWfld9cp7efmCMqDTTuWRo2bBicnJwUKlcPGTIEbm5uCtuIiKhg3q6mrAlthrlkc5NyJ2LqEiVVn8Wqz1TWaNyztGHDBn3GQUREeDOHaFpATXx39Fa+bTUd5lL3lJ2mcn+WPupLEZVUWhelJCIi/Zr4bk38eu4h4lOU9xxpO8xVmErdAOBQvhyaVLHPs51Vn6msKNvP/BMRlUDGRhLM7+YFCXQzzFXYp9KS0rLh/81xpU/gyepLdfeupPDEHFFpwmSJiKgE0uXippoO1zmUL6dyX9z/V+bWVckCIkPCYTgiohJKV8Ncmj69duzDtmi5+JjKSd8CrMxNZRN7loiISjBdDHNp+vRa1MMX+T4dJysjQFSWMFkiIirlcqQCthamGNnSA/a5htreHtaLT36l0fk0bUdUWnAYjoioFFNViLKHt5vCem+A5jWXNG1HVFqwZ4mIqJRSVYjyeVoWNpy6h+RXWQrDeg5WZrlPoZSm7YhKCyZLRESlkLpClLJtudd8c7HR7Kk5TdsRlRZMloiISqH8ClG+veabjOypOXVcueYblUFMloiISiFNC1EqW/NNVTFMCbjmG5VNTJaIiEohTQtRqlrzTRfFMIlKCz4NR0RUCmlaiFLZkBrXfCNSxGSJiKgUkg2pjdtyCRJAIWHSZH05WTFMIuIwHBFRqcUhNcOSIxWIiEnEH1GPERGTqPCkIhUv9iwREZViHFIzDMqKh7rammNesBeT2hJAIoRg6lpIKSkpsLW1RXJyMmxsbIo7HCIiMiCy4qG5b8aydJa9gPqj6f2bw3BERETFpCDFQ6noMVkiIiIqJgUpHkpFj8kSERFRMSlI8VAqekyWiIiIiklBi4dS0WKyREREVExkxUNVPZsoAdfjKwmYLBERERUTWfFQQPl6fADX4ysJWGeJiKgMyZEKnLubhPjkV0hKy4KDlRlcbFh7qTjJiofmrrPkwjpLJQaTJSKiMkJZ4UMZFkAsXiweWrKxKKUOsCglEZV0qgofvk0CFkCksoVFKYmICMCbobf5e6+oTZRkWACRKC8mS0REpdyKY7cRn5KZbzsWQCRSjskSEVEpFhodh++O3tTqGBZAJFLEZImIqJTKkQrM2vWv1sexACKRIj4NR0RUCshKArz9JNWZO4l4kZ6t8TkkePO4OgsgEiliskREZOCUlQRwtTVHw3dstTqPAAsgEinDZImIyICpKgkQl5yhdjV7Zewsy6GDl4vugiMqJThniYjIQOVIBRbsu6pRSQBNvEjP5pNwREqwZ4mIyECdu5ukde9Rfk7dfsYK0kS5GEzPUlJSEgYPHgwbGxvY2dlh1KhRePnypdpjMjIyMGHCBFSoUAFWVlbo3bs3EhISFNpIJJI8r+3bt+vzqxAR6YQ+HvFfcfw2pmyPwsB1Z9Bq8TGERsfp/DOIDI3BJEuDBw/GlStXcOTIEezfvx9//fUXxowZo/aYadOmYd++fdi5cydOnDiB2NhY9OrVK0+7DRs2IC4uTv7q0aOHnr4FEZHu6PsR//jkDIzbcokJE5V5BrE23LVr1+Dl5YXz58+jadOmAIDQ0FB07twZjx49gpubW55jkpOTUbFiRWzbtg19+vQBAFy/fh116tRBREQEmjdvDuBNz9Lu3bsLlSBxbTgiKg45UoFWi48hPjlDZ/OWcpOVEzj58bsckqNSp1StDRcREQE7Ozt5ogQAAQEBMDIywtmzZ5Uec/HiRWRnZyMgIEC+rXbt2qhcuTIiIiIU2k6YMAGOjo7w8fHBzz//jPzyx8zMTKSkpCi8iIiKw4BmlfWWKAFcAoUIMJAJ3vHx8XByclLYZmJiAgcHB8THx6s8xtTUFHZ2dgrbnZ2dFY75/PPP8e6778LS0hKHDx/G+PHj8fLlS0yePFllPCEhIViwYEHBvxARUSEpq62kT1wChcqyYu1ZmjVrltIJ1m+/rl+/rtcY5syZg5YtW6JRo0b4+OOPMXPmTHzzzTdqj5k9ezaSk5Plr4cPH+o1RiIyfDlSgYiYRPwR9RgRMYnIkRa8P0hWW0lVojQtoCZWDWoMh/KmBf6M3O49S9fZuYgMTbH2LH344YcYPny42jbVqlWDi4sLnjx5orD99evXSEpKgouL8gJqLi4uyMrKwosXLxR6lxISElQeAwC+vr5YuHAhMjMzYWZmprSNmZmZyn1ERLmpqrA9L9gLQfVctTqXJrWVNkXcw5yudfF9P29M3hGJ5yqWPJHNRxJCICElU+05lx29iVouVujg5ZJnWRXOZaLSrliTpYoVK6JixYr5tvPz88OLFy9w8eJFNGnSBABw7NgxSKVS+Pr6Kj2mSZMmKFeuHMLCwtC7d28AwI0bN/DgwQP4+fmp/KyoqCjY29szGSIinVBVYVv2pNnqIY21Spg0qa2UlJaNaTuiALypyg28SYzejkGW3swL9gIAfLDlUr6fPXvXv5i/9wriUzLl27RN+pStYcdki0o6g5izVKdOHQQFBWH06NFYs2YNsrOzMXHiRAwYMED+JNzjx4/Rvn17/PLLL/Dx8YGtrS1GjRqF6dOnw8HBATY2Npg0aRL8/PzkT8Lt27cPCQkJaN68OczNzXHkyBF89dVXmDFjRnF+XSIqJdT1Agm8SVgW7LuKDl4uGicM2s4dSv7/XiVby3IKi+q65EpypgXUxHdHb6k8jwCU9lDFJWfggy2XMC2gJia+W1Pt91DWw2ZnUQ4jWlbFxHdrMGmiEssgkiUA2Lp1KyZOnIj27dvDyMgIvXv3xvLly+X7s7OzcePGDaSn/zeu/t1338nbZmZmIjAwEKtWrZLvL1euHFauXIlp06ZBCIEaNWpg6dKlGD16dJF+NyIqnfLrBXr7STO/6hU0Oqe2tZVkSZm5iRG2vu+LZy8zlfboeDiW1+q8uX139BZ+PfcQ87sp72VS1cP24lU2vjt6ExtO38WiXvW1HpYsDdjbVvIZRJ2lko51lohImT+iHmPK9qh8230/wBvdvStpdM7C1Fb6dXRzlUlZREwiBq47o+UZ85IAeYYWZTFr8uTeGi2HJQ2dLuezkfZKVZ0lIiJDpGkvkDa9RcZGEvk8I237HtQN4flUdYCrrbnW51Rmwb6rCk/7abOGXe5jSzNVTzWycnrJw2SJiEhP8ktAJHjTi+BT1UGr8wbVc8XqIY3hYqvdkJy6pOztJKww3h5alJVL+FOLm35ZKYCZ33w2oGwljiUdkyUiIj1R1wv09tNoBZmfElTPFSc/fhe/jm6O7/o1hEN500InZUH1XLFyUGPoYrrMkavxaLX4GAauO4NfIu5rdWxZKICpzXw2Kn5MloiI9EhVL5CLrbnWZQNyMzaSwK96BfRs/A6+6lkPgPKhOQGgcz0XeW+POvblTaGLzoyfT90rcHVxfS8QXBJomhCWhcTREBjM03BERIYqqJ6r3os5ypKy3JOFjSSAVADrT93D+lP38p08rIubs+wztSUrkqntsKQh0sd8NtIfJktEREVA1gukT28nZUeuxuPnU/fyJC35FcMszM1ZVviyoIkSUPBhSUMjm8+m6qnGspQ4GgIOwxERlSLGRhL4VHXAn9HKFxnPb/KwppPSVw1qDFclQ4ujWnpoFKdlOeM8xxZ2WNKQ6HM+G+kee5aIiEqZwhTDlN3EVS1/IgD5MF5gvbxDi+fuJmH9qXv5xrhuWFMYSSRluhCjqqHT3NXVqfgxWSIiKmWKavKwsqHF/IaXAMDeshyaV6tQ5pIjZYpiPhsVHofhiIhKmcJMHpbV/1FFtp6dqqfqZD1T6qYtPU/PxpGryocJyyJZ0tnduxL8qjOJLImYLBERlTKFKYapi/o/HbxcYGdZTuX+/BIuopKGyRIRUSlTmMnDuhjCO3c3CS/Ss1XuZ8FFMjRMloiISqGCFsPURf0fFlyk0oYTvImISqmCTB7WRf0fFlyk0obJEhFRKaZtMUzZEN64LZfkRSZlNK3/w4KLhi1HKvh0Xi5MloiICMB/N8nM11JMDaiJX889QHxKpny/pvV/dJFwUfEIjY7LU/cpvyVyygKJEIKPIxRSSkoKbG1tkZycDBsbm+IOh4hIa8puki425hjoUxkejpYF6mHgjdewhEbHYdyWS3l6A2U/8dJYYV3T+zeTJR1gskREhiy/m+TKQY1hX960QMMyHNIxDDlSgVaLj6ksGyEbOj358bul6uen6f2bw3BERGWYrAilsn81y7ZN/PWSwuK42vQOFcUCwlR4hVkipyxg6QAiojIsv5skAOSuHRmfnIFxWy4hNDpOq8/KkQpExCTij6jHiIhJZFHKEoTlHtRjzxIRURlWkJufwH9VuDt4uWg0LKNs/pJD+XL4ons9dG7gpnUMpFss96Aee5aIiMqwgt78tKnCLZsTlbsHKyktG+O3RSLkoOq16KhoFGaJnLKAyRIRURmW300yP/n1TKmbEyWz9q+7OPiPdkN6pFuFWSKnLGCyRERUhqm7SWoiv54pTeZEAcCcP6I5h6mYFXSJnLKAc5aIiMo42U0y95wiI0neyd0ymlbh1nROVGJaVpl90qokKcgSOWUBkyUiIlJ6k3yelokJ2yIBFLwKtzZzosrqk1YlDcs95MVkiYiIACi/Sa42kuSt7K1FnSWfqg5wKF8OSWnZ+bYtq09aUcnHZImIiFQq7LCMsZEEX3Svh/H/30OlSll+0opKPiZLRESkVmGHZTo3cMPYRy+w9q+7SvdLULaftKKSj0/DERGR3s3u7IVVgxrDobypwnZXPmlFBoA9S0REVCQ6N3BFYD0+aUWGh8kSEREVGT5pRYaIw3BEREREajBZIiIiIlKDyRIRERGRGkyWiIiIiNRgskRERESkBpMlIiIiIjWYLBERERGpwWSJiIiISA0mS0RERERqsIK3DgghAAApKSnFHAkRERFpSnbflt3HVWGypAOpqakAAHd392KOhIiIiLSVmpoKW1tblfslIr90ivIllUoRGxsLa2trSCSGsSBkSkoK3N3d8fDhQ9jY2BR3OCUKr416vD6q8dqox+ujGq+Nevq6PkIIpKamws3NDUZGqmcmsWdJB4yMjPDOO+8UdxgFYmNjwz+YKvDaqMfroxqvjXq8Pqrx2qinj+ujrkdJhhO8iYiIiNRgskRERESkBpOlMsrMzAzz5s2DmZlZcYdS4vDaqMfroxqvjXq8Pqrx2qhX3NeHE7yJiIiI1GDPEhEREZEaTJaIiIiI1GCyRERERKQGkyUiIiIiNZgslSFJSUkYPHgwbGxsYGdnh1GjRuHly5caHSuEQKdOnSCRSLBnzx79BloMCnJtxo4di+rVq8PCwgIVK1ZE9+7dcf369SKKuGhpe32SkpIwadIk1KpVCxYWFqhcuTImT56M5OTkIoy6aBTkd+fHH39E27ZtYWNjA4lEghcvXhRNsEVg5cqV8PDwgLm5OXx9fXHu3Dm17Xfu3InatWvD3Nwc9evXx8GDB4so0qKnzbW5cuUKevfuDQ8PD0gkEixbtqzoAi0m2lyfdevWoXXr1rC3t4e9vT0CAgLy/V0rDCZLZcjgwYNx5coVHDlyBPv378dff/2FMWPGaHTssmXLDGYpl4IoyLVp0qQJNmzYgGvXruHQoUMQQqBjx47IyckpoqiLjrbXJzY2FrGxsViyZAmio6OxceNGhIaGYtSoUUUYddEoyO9Oeno6goKC8MknnxRRlEVjx44dmD59OubNm4dLly6hYcOGCAwMxJMnT5S2P336NAYOHIhRo0YhMjISPXr0QI8ePRAdHV3EkeufttcmPT0d1apVw6JFi+Di4lLE0RY9ba9PeHg4Bg4ciOPHjyMiIgLu7u7o2LEjHj9+rJ8ABZUJV69eFQDE+fPn5dv+/PNPIZFIxOPHj9UeGxkZKSpVqiTi4uIEALF79249R1u0CnNt3nb58mUBQNy+fVsfYRYbXV2f3377TZiamors7Gx9hFksCnttjh8/LgCI58+f6zHKouPj4yMmTJggf5+TkyPc3NxESEiI0vb9+vUTXbp0Udjm6+srxo4dq9c4i4O21+ZtVapUEd99950eoyt+hbk+Qgjx+vVrYW1tLTZt2qSX+NizVEZERETAzs4OTZs2lW8LCAiAkZERzp49q/K49PR0DBo0CCtXriy1/7op6LV5W1paGjZs2ICqVavC3d1dX6EWC11cHwBITk6GjY0NTExKz5KUuro2pUFWVhYuXryIgIAA+TYjIyMEBAQgIiJC6TEREREK7QEgMDBQZXtDVZBrU5bo4vqkp6cjOzsbDg4OeomRyVIZER8fDycnJ4VtJiYmcHBwQHx8vMrjpk2bhhYtWqB79+76DrHYFPTaAMCqVatgZWUFKysr/Pnnnzhy5AhMTU31GW6RK8z1kXn27BkWLlyo8bCvodDFtSktnj17hpycHDg7Oytsd3Z2Vnkt4uPjtWpvqApybcoSXVyfjz/+GG5ubnmSb11hsmTgZs2aBYlEovZV0EnHe/fuxbFjxwx2YqE+r43M4MGDERkZiRMnTsDT0xP9+vVDRkaGjr6BfhXF9QGAlJQUdOnSBV5eXpg/f37hAy8CRXVtiKjwFi1ahO3bt2P37t0wNzfXy2eUnv7wMurDDz/E8OHD1bapVq0aXFxc8kyUe/36NZKSklQOrx07dgwxMTGws7NT2N67d2+0bt0a4eHhhYhc//R5bWRsbW1ha2uLmjVronnz5rC3t8fu3bsxcODAwoavd0VxfVJTUxEUFARra2vs3r0b5cqVK2zYRaIork1p4+joCGNjYyQkJChsT0hIUHktXFxctGpvqApybcqSwlyfJUuWYNGiRTh69CgaNGigvyD1MhOKShzZRNQLFy7Itx06dEjtRNS4uDjx77//KrwAiO+//17cuXOnqELXu4JcG2UyMjKEhYWF2LBhgx6iLD4FvT7JycmiefPmwt/fX6SlpRVFqEWusL87pXGC98SJE+Xvc3JyRKVKldRO8O7atavCNj8/v1I7wVuba/O2sjLBW9vrs3jxYmFjYyMiIiL0Hh+TpTIkKChINGrUSJw9e1acPHlS1KxZUwwcOFC+/9GjR6JWrVri7NmzKs+BUvg0nBDaX5uYmBjx1VdfiQsXLoj79++LU6dOieDgYOHg4CASEhKK62vojbbXJzk5Wfj6+or69euL27dvi7i4OPnr9evXxfU19KIgf67i4uJEZGSkWLdunQAg/vrrLxEZGSkSExOL4yvozPbt24WZmZnYuHGjuHr1qhgzZoyws7MT8fHxQggh3nvvPTFr1ix5+1OnTgkTExOxZMkSce3aNTFv3jxRrlw58e+//xbXV9Abba9NZmamiIyMFJGRkcLV1VXMmDFDREZGilu3bhXXV9Arba/PokWLhKmpqfjf//6n8PdLamqqXuJjslSGJCYmioEDBworKythY2MjRowYofCLdffuXQFAHD9+XOU5SmuypO21efz4sejUqZNwcnIS5cqVE++8844YNGiQuH79ejF9A/3S9vrIekyUve7evVs8X0JPCvLnat68eUqvTWnolfzhhx9E5cqVhampqfDx8RFnzpyR7/P39xfDhg1TaP/bb78JT09PYWpqKurWrSsOHDhQxBEXHW2ujez3JvfL39+/6AMvItpcnypVqii9PvPmzdNLbBIhhNDfIB8RERGRYePTcERERERqMFkiIiIiUoPJEhEREZEaTJaIiIiI1GCyRERERKQGkyUiIiIiNZgsEREREanBZImIdE4ikWDPnj1q2wwfPhw9evQoknj0xcPDw2AXms5t/vz5cHZ21uhnR1TWMFkiMhDDhw+Xr3hvamqKGjVq4PPPP8fr16/lbYQQ+PHHH+Hr6wsrKyvY2dmhadOmWLZsGdLT0wEAV65cQe/eveHh4QGJRKKXm31cXBw6deoEALh37x4kEgmioqJ0/jnF7fz58xgzZkxxh1Fo165dw4IFC7B27VqFn11uDx48QJcuXWBpaQknJyd89NFHCr9/6mRmZsLb21vp74IQAkuWLIGnpyfMzMxQqVIlfPnll4X9WkQ6Y1LcARCR5oKCgrBhwwZkZmbi4MGDmDBhAsqVK4fZs2cDAN577z3s2rULn332GVasWIGKFSvi8uXLWLZsGTw8PNCjRw+kp6ejWrVq6Nu3L6ZNm6aXOEv7SupZWVkwNTVFxYoVizsUnYiJiQEAdO/eHRKJRGmbnJwcdOnSBS4uLjh9+jTi4uIwdOhQlCtXDl999VW+nzFz5ky4ubnh8uXLefZNmTIFhw8fxpIlS1C/fn0kJSUhKSmpcF+KSJf0sogKEencsGHDRPfu3RW2dejQQTRv3lwIIcSOHTsEALFnz548x0qlUvHixYs82zVZzVwqlQpHR0exc+dO+baGDRsKFxcX+fu///5bmJqairS0NCGE4hqCULG2lez7fPPNN8LFxUU4ODiI8ePHi6ysLJWx3L59W3Tr1k04OTmJ8uXLi6ZNm4ojR46obH/jxg0BQFy7dk1h+9KlS0W1atWEEEK8fv1ajBw5Unh4eAhzc3Ph6ekpli1bptBeFusXX3whXF1dhYeHh9Lr9+2334p69eoJS0tL8c4774hx48YprBO3YcMGYWtrK0JDQ0Xt2rVF+fLlRWBgoIiNjVX4vPXr1wsvLy9hamoqXFxcxIQJE+T7nj9/LkaNGiUcHR2FtbW1aNeunYiKilJ5DYQQ4p9//hHt2rUT5ubmwsHBQYwePVoel7J16pQ5ePCgMDIyki9sKoQQq1evFjY2NiIzM1Pt5x88eFDUrl1bXLlyRQAQkZGR8n1Xr14VJiYmpXZdRSodOAxHZMAsLCyQlZUFANi6dStq1aqF7t2752knkUhga2tboM+QSCRo06YNwsPDAQDPnz/HtWvX8OrVK1y/fh0AcOLECTRr1gyWlpZ5jj937hwA4OjRo4iLi8OuXbvk+44fP46YmBgcP34cmzZtwsaNG7Fx40aVsbx8+RKdO3dGWFgYIiMjERQUhODgYDx48EBpe09PTzRt2hRbt25V2L5161YMGjQIACCVSvHOO+9g586duHr1KubOnYtPPvkEv/32m8IxYWFhuHHjBo4cOYL9+/cr/TwjIyMsX74cV65cwaZNm3Ds2DHMnDlToU16ejqWLFmCzZs346+//sKDBw8wY8YM+f7Vq1djwoQJGDNmDP7991/s3bsXNWrUkO/v27cvnjx5gj///BMXL15E48aN0b59e5U9MWlpaQgMDIS9vT3Onz+PnTt34ujRo5g4cSIAYMaMGdiwYQOAN8OncXFxSs8TERGB+vXrw9nZWb4tMDAQKSkpuHLlitJjACAhIQGjR4/G5s2blf5+7Nu3D9WqVcP+/ftRtWpVeHh44P3332fPEpUsxZ2tEZFm3u5Zkkql4siRI8LMzEzMmDFDCCFEnTp1RLdu3bQ6pyY9S0IIsXz5clG3bl0hhBB79uwRvr6+onv37mL16tVCCCECAgLEJ598Im+Pt3qWZKunv92bIPs+VapUEa9fv5Zv69u3r+jfv79W36Fu3brihx9+ULn/u+++E9WrV5e/V9Xb9LYJEyaI3r17K8Tq7Oycpwclv+u3c+dOUaFCBfn7DRs2CADi9u3b8m0rV64Uzs7O8vdubm7i008/VXq+v//+W9jY2IiMjAyF7dWrVxdr165VesyPP/4o7O3txcuXL+XbDhw4oNBLtHv3bpU9SjKjR48WHTt2VNiWlpYmAIiDBw8qPUYqlYqgoCCxcOFCIYTy34WxY8cKMzMz4evrK/766y9x/Phx4e3tLdq1a6c2HqKixJ4lIgOyf/9+WFlZwdzcHJ06dUL//v0xf/58AG8myeqLv78/rl69iqdPn+LEiRNo27Yt2rZti/DwcGRnZ+P06dNo27at1uetW7cujI2N5e9dXV3x5MkTle1fvnyJGTNmoE6dOrCzs4OVlRWuXbumsmcJAAYMGIB79+7hzJkzAN70KjVu3Bi1a9eWt1m5ciWaNGmCihUrwsrKCj/++GOec9avXx+mpqZqv8/Ro0fRvn17VKpUCdbW1njvvfeQmJgon1wPAJaWlqhevbrS7/zkyRPExsaiffv2Ss9/+fJlvHz5EhUqVICVlZX8dffuXfm8o9yuXbuGhg0bonz58vJtLVu2hFQqxY0bN9R+n8L64YcfkJqaKp9Tp4xUKkVmZiZ++eUXtG7dGm3btsX69etx/PhxvcdHpCkmS0QGpF27doiKisKtW7fw6tUrbNq0SX4T9PT0lA+L6Vr9+vXh4OCAEydOKCRLJ06cwPnz55GdnY0WLVpofd5y5copvJdIJJBKpSrbz5gxA7t378ZXX32Fv//+G1FRUahfv758KFIZFxcXvPvuu9i2bRsAYNu2bRg8eLB8//bt2zFjxgyMGjUKhw8fRlRUFEaMGJHnnG8nG8rcu3cPXbt2RYMGDfD777/j4sWLWLlyJQAonEvZd5YluhYWFmo/4+XLl3B1dUVUVJTC68aNG/joo4/UHltYLi4uSEhIUNgme69qQv+xY8cQEREBMzMzmJiYyIcTmzZtimHDhgF4kyyamJjA09NTflydOnUAQG0STFSUmCwRGZDy5cujRo0aqFy5MkxMFB9mHTRoEG7evIk//vgjz3FCCCQnJxf4cyUSCVq3bo0//vgDV65cQatWrdCgQQNkZmZi7dq1aNq0qcpkQtYbk5OTU+DPlzl16hSGDx+Onj17on79+nBxccG9e/fyPW7w4MHYsWMHIiIicOfOHQwYMEDhnC1atMD48ePRqFEj1KhRQ2UvjToXL16EVCrFt99+i+bNm8PT0xOxsbFancPa2hoeHh4ICwtTur9x48aIj4+XJx5vvxwdHZUeU6dOHVy+fBlpaWnybadOnYKRkRFq1aqlcWx+fn74999/FXr+jhw5AhsbG3h5eSk9Zvny5bh8+bI8qTt48CAAYMeOHfLSAC1btsTr168VrvnNmzcBAFWqVNE4PiJ9YrJEVEr069cP/fv3x8CBA/HVV1/hwoULuH//Pvbv34+AgAAcP34cwJteDtnNKysrC48fP0ZUVBRu376t9vxt27bFr7/+Cm9vb1hZWcHIyAht2rTB1q1b4e/vr/I4JycnWFhYIDQ0FAkJCYVK2mrWrIldu3YhKioKly9fxqBBg9T2RMn06tULqampGDduHNq1awc3NzeFc164cAGHDh3CzZs3MWfOHJw/f17r2GrUqIHs7Gz88MMPuHPnDjZv3ow1a9ZofZ758+fj22+/xfLly3Hr1i1cunQJP/zwAwAgICAAfn5+6NGjBw4fPox79+7h9OnT+PTTT3HhwgWl5xs8eDDMzc0xbNgwREdH4/jx45g0aRLee+89hcna+enYsSO8vLzw3nvv4fLlyzh06BA+++wzTJgwAWZmZgDeTOavXbs2Hj9+DACoXLky6tWrJ3/Jeo+qV6+Od955R/6dGjdujJEjRyIyMhIXL17E2LFj0aFDB4XeJqLixGSJqJSQSCTYtm0bli5dij179sDf3x8NGjTA/Pnz0b17dwQGBgIAYmNj0ahRIzRq1AhxcXFYsmQJGjVqhPfff1/t+f39/ZGTk6MwN6lt27Z5tuVmYmKC5cuXY+3atXBzc1P6tJ6mli5dCnt7e7Ro0QLBwcEIDAxE48aN8z3O2toawcHBuHz5ssIQHACMHTsWvXr1Qv/+/eHr64vExESMHz9e69gaNmyIpUuXYvHixahXrx62bt2KkJAQrc8zbNgwLFu2DKtWrULdunXRtWtX3Lp1C8Cbn/HBgwfRpk0bjBgxAp6enhgwYADu37+vMvGxtLTEoUOHkJSUhGbNmqFPnz5o3749VqxYoVVcxsbG2L9/P4yNjeHn54chQ4Zg6NCh+Pzzz+Vt0tPTcePGDWRnZ2t8XiMjI+zbtw+Ojo5o06YNunTpgjp16mD79u1axUekTxKhz1mhRERERAaOPUtEREREajBZIiIiIlKDyRIRERGRGkyWiIiIiNRgskRERESkBpMlIiIiIjWYLBERERGpwWSJiIiISA0mS0RERERqMFkiIiIiUoPJEhEREZEaTJaIiIiI1Pg/HDZSp2VGy88AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "processor = get_processor()\n", + "\n", + "for model_name in model_names:\n", + " embeddings_per_residue = processor.calculate_batch_embeddings(data=data, model_name=model_name, embedding_type=\"last_hidden_state\", num_gpus=1)\n", + " if embeddings_per_residue is None:\n", + " continue\n", + "\n", + " # convert mean embeddings to numpy array\n", + " embeddings = np.mean(np.array(embeddings_per_residue), axis=1)\n", + " print(f\"Embeddings shape: {embeddings.shape}\")\n", + " \n", + " # create PCA Plot from embeddings\n", + " pca = PCA(n_components=2)\n", + " pca.fit(embeddings)\n", + " embeddings_pca = pca.transform(embeddings)\n", + " plt.title(f\"PCA Plot of {model_name}\")\n", + " plt.xlabel(f\"PC1 with a variance of {pca.explained_variance_ratio_[0]:.2f}\")\n", + " plt.ylabel(f\"PC2 with a variance of {pca.explained_variance_ratio_[1]:.2f}\")\n", + " plt.scatter(embeddings_pca[:, 0], embeddings_pca[:, 1])\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pyeed_niklas_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/usage/embeddings_analysis.ipynb b/docs/usage/embeddings_analysis.ipynb index 49c1dc22..40f46af1 100644 --- a/docs/usage/embeddings_analysis.ipynb +++ b/docs/usage/embeddings_analysis.ipynb @@ -25,22 +25,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" + "\u001b[32m2025-05-29 12:00:51.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.embeddings.processor\u001b[0m:\u001b[36m_initialize_devices\u001b[0m:\u001b[36m44\u001b[0m - \u001b[1mInitialized 3 GPU device(s): [device(type='cuda', index=0), device(type='cuda', index=1), device(type='cuda', index=2)]\u001b[0m\n" ] } ], "source": [ "import sys\n", "import matplotlib.pyplot as plt\n", + "import numpy as np\n", "import pandas as pd\n", "from loguru import logger\n", "\n", "from pyeed import Pyeed\n", "from pyeed.analysis.embedding_analysis import EmbeddingTool\n", "\n", + "\n", "logger.remove()\n", - "level = logger.add(sys.stderr, level=\"INFO\")" + "level = logger.add(sys.stderr, level=\"ERROR\")" ] }, { @@ -63,18 +64,19 @@ "name": "stdout", "output_type": "stream", "text": [ + "Pyeed Graph Object Mapping constraints not defined. Use _install_labels() to set up model constraints.\n", "📡 Connected to database.\n", "All data has been wiped from the database.\n" ] } ], "source": [ - "uri = \"bolt://129.69.129.130:7687\"\n", + "uri = \"bolt://129.69.129.130:7688\"\n", "user = \"neo4j\"\n", "password = \"12345678\"\n", "\n", "eedb = Pyeed(uri, user=user, password=password)\n", - "eedb.db.wipe_database(date='2025-03-26')" + "eedb.db.wipe_database(date='2025-05-29')" ] }, { @@ -122,85 +124,7 @@ "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2025-03-26 11:37:31.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:31.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 68 sequences from ncbi_protein.\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:31.880\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 7 batches.\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:32.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAP20891.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:32.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAJ85677.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:32.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein SAQ02853.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:32.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CDR98216.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109963600.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA41038.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109874025.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46344.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APG33178.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKC98298.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46346.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA74912.2 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACB22021.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76795.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CCG28759.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.464\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLG19745.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:33.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32891.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76796.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAD24670.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ARF45649.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CTA52364.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ADL13944.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKA60778.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APT65830.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein HAH6232254.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein QDO66746.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CBX53726.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32889.2 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA64682.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71322.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71323.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71324.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AEC32455.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22538.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22539.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:34.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ABB97007.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.021\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACJ43254.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC05975.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BCD58813.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK17194.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD33116.2 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAB92324.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL03985.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF19151.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05613.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05612.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05611.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29433.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29434.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29435.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29436.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43229.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43230.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.893\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAG44570.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK14792.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK30619.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BAB16308.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF66653.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:35.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85660.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:36.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85661.1 in database\u001b[0m\n", - "\u001b[32m2025-03-26 11:37:36.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC67290.1 in database\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "# now fecth all of the proteins from the database\n", "eedb.fetch_from_primary_db(df[\"protein_id_database\"].tolist(), db=\"ncbi_protein\")" @@ -220,22 +144,50 @@ "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/modeling_utils.py:3437: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", - " warnings.warn(\n", - "Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 3.69it/s]\n", - "Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t36_3B_UR50D and are newly initialized: ['esm.pooler.dense.bias', 'esm.pooler.dense.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1899: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", - " warnings.warn(\n", - "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "970aaf779c9142a09ca258b16ca07fd3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 4 files: 0%| | 0/4 [00:00" ] @@ -361,7 +313,7 @@ "output_type": "stream", "text": [ "Resulst for index AAP20891.1 are:\n", - "[('AAP20891.1', 0.0), ('ADL13944.1', 1.2696941380951898e-05), ('AGQ50511.1', 2.3084859425925863e-05), ('CBX53726.1', 2.3443578533011156e-05), ('AAL29433.1', 3.0809776502382924e-05), ('CAA76796.1', 3.2400445545976986e-05), ('CAC67290.1', 4.856582147116928e-05), ('AFN21551.1', 4.953471590429803e-05), ('CAA74912.2', 5.021707417551813e-05), ('CTA52364.1', 6.113568903631794e-05)]\n" + "[('AAP20891.1', 0.0), ('AGQ50511.1', 0.00016200621801287785), ('ABB97007.1', 0.0001810048295400879), ('AFN21551.1', 0.00018909362988450695), ('CAC67290.1', 0.00021654775310264718), ('ADL13944.1', 0.0002567003210336427), ('AAK30619.1', 0.0002616398020808264), ('AAL29433.1', 0.0002646931927183793), ('ACJ43254.1', 0.0002669990760338914), ('ACB22021.1', 0.0002755243601859636)]\n" ] } ], @@ -458,25 +410,34 @@ "metadata": {}, "outputs": [ { - "ename": "ClientError", - "evalue": "{code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 2560 dimensions, but indexed vectors have 960.}", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# here we use the vector index to find the closest matches\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43met\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind_nearest_neighbors_based_on_vector_index\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mdb\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meedb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdb\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_protein_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprotein_id_database\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtolist\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvector_index_Protein_embedding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mnumber_of_neighbors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(results)\n", - "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/analysis/embedding_analysis.py:415\u001b[0m, in \u001b[0;36mEmbeddingTool.find_nearest_neighbors_based_on_vector_index\u001b[0;34m(self, db, query_protein_id, index_name, number_of_neighbors)\u001b[0m\n\u001b[1;32m 406\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIndex \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mindex_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is populated, finding nearest neighbors\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 408\u001b[0m query_find_nearest_neighbors \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;124mMATCH (source:Protein \u001b[39m\u001b[38;5;130;01m{{\u001b[39;00m\u001b[38;5;124maccession_id: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquery_protein_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m}}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\n\u001b[1;32m 410\u001b[0m \u001b[38;5;124mWITH source.embedding AS embedding\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;124mRETURN fprotein.accession_id, score\u001b[39m\n\u001b[1;32m 414\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m--> 415\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery_find_nearest_neighbors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 416\u001b[0m neighbors: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mtuple\u001b[39m[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mfloat\u001b[39m]] \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 417\u001b[0m (\u001b[38;5;28mstr\u001b[39m(record[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfprotein.accession_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]), \u001b[38;5;28mfloat\u001b[39m(record[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscore\u001b[39m\u001b[38;5;124m\"\u001b[39m]))\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m record \u001b[38;5;129;01min\u001b[39;00m results\n\u001b[1;32m 419\u001b[0m ]\n\u001b[1;32m 420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m neighbors\n", - "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/dbconnect.py:45\u001b[0m, in \u001b[0;36mDatabaseConnector.execute_read\u001b[0;34m(self, query, parameters)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124;03mExecutes a read (MATCH) query using the Neo4j driver.\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;124;03m list[dict]: The result of the query as a list of dictionaries.\u001b[39;00m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdriver\u001b[38;5;241m.\u001b[39msession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 45\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_read\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/work/session.py:661\u001b[0m, in \u001b[0;36mSession.execute_read\u001b[0;34m(self, transaction_function, *args, **kwargs)\u001b[0m\n\u001b[1;32m 592\u001b[0m \u001b[38;5;129m@NonConcurrentMethodChecker\u001b[39m\u001b[38;5;241m.\u001b[39mnon_concurrent_method\n\u001b[1;32m 593\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mexecute_read\u001b[39m(\n\u001b[1;32m 594\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;241m*\u001b[39margs: _P\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: _P\u001b[38;5;241m.\u001b[39mkwargs\n\u001b[1;32m 599\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m _R:\n\u001b[1;32m 600\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Execute a unit of work in a managed read transaction.\u001b[39;00m\n\u001b[1;32m 601\u001b[0m \n\u001b[1;32m 602\u001b[0m \u001b[38;5;124;03m .. note::\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 659\u001b[0m \u001b[38;5;124;03m .. versionadded:: 5.0\u001b[39;00m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 661\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_transaction\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 662\u001b[0m \u001b[43m \u001b[49m\u001b[43mREAD_ACCESS\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mTelemetryAPI\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTX_FUNC\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 663\u001b[0m \u001b[43m \u001b[49m\u001b[43mtransaction_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 664\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/work/session.py:552\u001b[0m, in \u001b[0;36mSession._run_transaction\u001b[0;34m(self, access_mode, api, transaction_function, args, kwargs)\u001b[0m\n\u001b[1;32m 550\u001b[0m tx \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_transaction\n\u001b[1;32m 551\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 552\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mtransaction_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 553\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mCancelledError:\n\u001b[1;32m 554\u001b[0m \u001b[38;5;66;03m# if cancellation callback has not been called yet:\u001b[39;00m\n\u001b[1;32m 555\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_transaction \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/dbconnect.py:222\u001b[0m, in \u001b[0;36mDatabaseConnector._run_query\u001b[0;34m(tx, query, parameters)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Executes a Cypher query in the provided transaction.\"\"\"\u001b[39;00m\n\u001b[1;32m 221\u001b[0m result \u001b[38;5;241m=\u001b[39m tx\u001b[38;5;241m.\u001b[39mrun(query, parameters)\n\u001b[0;32m--> 222\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [record\u001b[38;5;241m.\u001b[39mdata() \u001b[38;5;28;01mfor\u001b[39;00m record \u001b[38;5;129;01min\u001b[39;00m result]\n", - "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/dbconnect.py:222\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Executes a Cypher query in the provided transaction.\"\"\"\u001b[39;00m\n\u001b[1;32m 221\u001b[0m result \u001b[38;5;241m=\u001b[39m tx\u001b[38;5;241m.\u001b[39mrun(query, parameters)\n\u001b[0;32m--> 222\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [record\u001b[38;5;241m.\u001b[39mdata() \u001b[38;5;28;01mfor\u001b[39;00m record \u001b[38;5;129;01min\u001b[39;00m result]\n", - "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/work/result.py:270\u001b[0m, in \u001b[0;36mResult.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_record_buffer\u001b[38;5;241m.\u001b[39mpopleft()\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_streaming:\n\u001b[0;32m--> 270\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfetch_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_discarding:\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_discard()\n", - "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_common.py:178\u001b[0m, in \u001b[0;36mConnectionErrorHandler.__getattr__..outer..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 178\u001b[0m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (Neo4jError, ServiceUnavailable, SessionExpired) \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39miscoroutinefunction(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__on_error)\n", - "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_bolt.py:850\u001b[0m, in \u001b[0;36mBolt.fetch_message\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 846\u001b[0m \u001b[38;5;66;03m# Receive exactly one message\u001b[39;00m\n\u001b[1;32m 847\u001b[0m tag, fields \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minbox\u001b[38;5;241m.\u001b[39mpop(\n\u001b[1;32m 848\u001b[0m hydration_hooks\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresponses[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mhydration_hooks\n\u001b[1;32m 849\u001b[0m )\n\u001b[0;32m--> 850\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfields\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 851\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39midle_since \u001b[38;5;241m=\u001b[39m monotonic()\n\u001b[1;32m 852\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", - "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_bolt5.py:369\u001b[0m, in \u001b[0;36mBolt5x0._process_message\u001b[0;34m(self, tag, fields)\u001b[0m\n\u001b[1;32m 367\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_server_state_manager\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbolt_states\u001b[38;5;241m.\u001b[39mFAILED\n\u001b[1;32m 368\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 369\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mon_failure\u001b[49m\u001b[43m(\u001b[49m\u001b[43msummary_metadata\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ServiceUnavailable, DatabaseUnavailable):\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool:\n", - "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_common.py:245\u001b[0m, in \u001b[0;36mResponse.on_failure\u001b[0;34m(self, metadata)\u001b[0m\n\u001b[1;32m 243\u001b[0m handler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandlers\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon_summary\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 244\u001b[0m Util\u001b[38;5;241m.\u001b[39mcallback(handler)\n\u001b[0;32m--> 245\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Neo4jError\u001b[38;5;241m.\u001b[39mhydrate(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmetadata)\n", - "\u001b[0;31mClientError\u001b[0m: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 2560 dimensions, but indexed vectors have 960.}" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9f3354c532c147a383f2da89937a1132", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[('AAP20891.1', 1.0), ('AGQ50511.1', 0.9999189376831055), ('ABB97007.1', 0.999909520149231), ('AFN21551.1', 0.9999054670333862), ('CAC67290.1', 0.9998918771743774), ('ADL13944.1', 0.9998717904090881), ('AAK30619.1', 0.9998692274093628), ('AAL29433.1', 0.9998676776885986), ('ACJ43254.1', 0.9998666048049927), ('CBX53726.1', 0.9998624920845032)]\n"
      ]
     }
    ],
@@ -484,7 +445,7 @@
     "# here we use the vector index to find the closest matches\n",
     "results = et.find_nearest_neighbors_based_on_vector_index(\n",
     "    db=eedb.db,\n",
-    "    query_protein_id=df[\"protein_id_database\"].tolist()[0],\n",
+    "    query_id=df[\"protein_id_database\"].tolist()[0],\n",
     "    index_name=\"vector_index_Protein_embedding\",\n",
     "    number_of_neighbors=10,\n",
     ")\n",
diff --git a/pyproject.toml b/pyproject.toml
index 94635913..bf00381f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,7 @@ SPARQLWrapper = "2.0.0"
 pysam = "0.23.0"
 types-requests = "2.32.0.20250328"
 ipywidgets = "^8.1.7"
+sentencepiece = "^0.2.0"
 
 [tool.poetry.group.dev.dependencies]
 mkdocstrings = {extras = ["python"], version = "^0.26.2"}
diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index a45ce85c..d5b933b0 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -1,5 +1,6 @@
 import gc
 import os
+import re
 from typing import Any, Tuple, Union
 
 import numpy as np
@@ -11,7 +12,7 @@
 from loguru import logger
 from numpy.typing import NDArray
 from torch.nn import DataParallel, Module
-from transformers import EsmModel, EsmTokenizer
+from transformers import EsmModel, EsmTokenizer, T5Model, T5Tokenizer
 
 from pyeed.dbconnect import DatabaseConnector
 
@@ -36,8 +37,8 @@ def get_hf_token() -> str:
 def process_batches_on_gpu(
     data: list[tuple[str, str]],
     batch_size: int,
-    model: Module,
-    tokenizer: EsmTokenizer,
+    model: Union[EsmModel, ESMC, ESM3, T5Model, DataParallel[Module]],
+    tokenizer: Union[EsmTokenizer, T5Tokenizer, None],
     db: DatabaseConnector,
     device: torch.device,
 ) -> None:
@@ -97,7 +98,7 @@ def process_batches_on_gpu(
 def load_model_and_tokenizer(
     model_name: str,
     device: torch.device = torch.device("cuda:0"),
-) -> Tuple[Any, Union[Any, None], torch.device]:
+) -> Tuple[Union[EsmModel, ESMC, ESM3, T5Model], Union[EsmTokenizer, T5Tokenizer, None], torch.device]:
     """
     Loads the model and assigns it to a specific GPU.
 
@@ -113,8 +114,20 @@ def load_model_and_tokenizer(
 
     if "esmc" in model_name.lower():
         model = ESMC.from_pretrained(model_name)
+        model = model.to(device)
     elif "esm3-sm-open-v1" in model_name.lower():
         model = ESM3.from_pretrained("esm3_sm_open_v1")
+        model = model.to(device)
+    elif "prot_t5" in model_name.lower() or "prott5" in model_name.lower():
+        # ProtT5 models
+        full_model_name = (
+            model_name
+            if model_name.startswith("Rostlab/")
+            else f"Rostlab/{model_name}"
+        )
+        model = T5Model.from_pretrained(full_model_name, use_auth_token=token)
+        tokenizer = T5Tokenizer.from_pretrained(full_model_name, use_auth_token=token, do_lower_case=False)
+        model = model.to(device)
     else:
         full_model_name = (
             model_name
@@ -123,27 +136,42 @@ def load_model_and_tokenizer(
         )
         model = EsmModel.from_pretrained(full_model_name, use_auth_token=token)
         tokenizer = EsmTokenizer.from_pretrained(full_model_name, use_auth_token=token)
+        model = model.to(device)
 
-    model = model.to(device)
     return model, tokenizer, device
 
 
+def preprocess_sequence_for_prott5(sequence: str) -> str:
+    """
+    Preprocesses a protein sequence for ProtT5 models.
+    
+    Args:
+        sequence: Raw protein sequence
+        
+    Returns:
+        Preprocessed sequence with spaces between amino acids and rare AAs mapped to X
+    """
+    # Map rare amino acids to X and add spaces between amino acids
+    sequence = re.sub(r"[UZOB]", "X", sequence.upper())
+    return " ".join(list(sequence))
+
+
 def get_batch_embeddings(
     batch_sequences: list[str],
     model: Union[
         EsmModel,
         ESMC,
         DataParallel[Module],
-        ESM3InferenceClient,
         ESM3,
+        T5Model,
     ],
-    tokenizer_or_alphabet: Union[EsmTokenizer, None],
+    tokenizer_or_alphabet: Union[EsmTokenizer, T5Tokenizer, None],
     device: torch.device,
     pool_embeddings: bool = True,
 ) -> list[NDArray[np.float64]]:
     """
     Generates mean-pooled embeddings for a batch of sequences.
-    Supports ESM++, ESM-2 and ESM-3 models.
+    Supports ESM++, ESM-2, ESM-3 and ProtT5 models.
 
     Args:
         batch_sequences (list[str]): List of sequence strings.
@@ -198,14 +226,64 @@ def get_batch_embeddings(
                     embeddings = embeddings.mean(axis=0)
                 embedding_list.append(embeddings)
         return embedding_list
+    elif isinstance(base_model, T5Model):
+        # For ProtT5 models
+        assert tokenizer_or_alphabet is not None, "Tokenizer required for ProtT5 models"
+        assert isinstance(tokenizer_or_alphabet, T5Tokenizer), "T5Tokenizer required for ProtT5 models"
+        
+        # Preprocess sequences for ProtT5
+        processed_sequences = [preprocess_sequence_for_prott5(seq) for seq in batch_sequences]
+        
+        inputs = tokenizer_or_alphabet.batch_encode_plus(
+            processed_sequences, 
+            add_special_tokens=True, 
+            padding="longest",
+            return_tensors="pt"
+        )
+        
+        # Move inputs to device
+        input_ids = inputs['input_ids'].to(device)
+        attention_mask = inputs['attention_mask'].to(device)
+        
+        with torch.no_grad():
+            # For ProtT5, use encoder embeddings for feature extraction
+            # Create dummy decoder inputs (just the pad token)
+            batch_size = input_ids.shape[0]
+            decoder_input_ids = torch.full(
+                (batch_size, 1), 
+                tokenizer_or_alphabet.pad_token_id or 0, 
+                dtype=torch.long,
+                device=device
+            )
+            
+            outputs = base_model(input_ids=input_ids, 
+                          attention_mask=attention_mask,
+                          decoder_input_ids=decoder_input_ids)
+            
+            # Get encoder last hidden state (encoder embeddings)
+            hidden_states = outputs.encoder_last_hidden_state.cpu().numpy()
+
+        if pool_embeddings:
+            # Mean pooling across sequence length, excluding padding tokens
+            embedding_list = []
+            for i, hidden_state in enumerate(hidden_states):
+                # Get actual sequence length (excluding padding)
+                attention_mask_np = attention_mask[i].cpu().numpy()
+                seq_len = attention_mask_np.sum()
+                # Pool only over actual sequence tokens
+                pooled_embedding = hidden_state[:seq_len].mean(axis=0)
+                embedding_list.append(pooled_embedding)
+            return embedding_list
+        return list(hidden_states)
     else:
         # ESM-2 logic
         assert tokenizer_or_alphabet is not None, "Tokenizer required for ESM-2 models"
+        assert isinstance(tokenizer_or_alphabet, EsmTokenizer), "EsmTokenizer required for ESM-2 models"
         inputs = tokenizer_or_alphabet(
             batch_sequences, padding=True, truncation=True, return_tensors="pt"
         ).to(device)
         with torch.no_grad():
-            outputs = model(**inputs, output_hidden_states=True)
+            outputs = base_model(**inputs, output_hidden_states=True)
 
         # Get last hidden state for each sequence
         hidden_states = outputs.last_hidden_state.cpu().numpy()
@@ -294,15 +372,38 @@ def get_single_embedding_last_hidden_state(
             embedding = (
                 logits_output.hidden_states[-1][0].to(torch.float32).cpu().numpy()
             )
+        elif isinstance(model, T5Model):
+            # ProtT5 logic
+            processed_sequence = preprocess_sequence_for_prott5(sequence)
+            inputs = tokenizer.encode_plus(
+                processed_sequence,
+                add_special_tokens=True,
+                return_tensors="pt"
+            )
+            
+            input_ids = inputs['input_ids'].to(device)
+            attention_mask = inputs['attention_mask'].to(device)
+            
+            # Create dummy decoder inputs
+            decoder_input_ids = torch.full(
+                (1, 1), 
+                tokenizer.pad_token_id or 0, 
+                dtype=torch.long,
+                device=device
+            )
+            
+            outputs = model(input_ids=input_ids, 
+                          attention_mask=attention_mask,
+                          decoder_input_ids=decoder_input_ids)
+            
+            # Get encoder last hidden state including special tokens
+            embedding = outputs.encoder_last_hidden_state[0].detach().cpu().numpy()
         else:
             # ESM-2 logic
             inputs = tokenizer(sequence, return_tensors="pt").to(device)
             outputs = model(**inputs)
             embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy()
 
-    # normalize the embedding
-    embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True)
-
     return embedding  # type: ignore
 
 
@@ -315,6 +416,7 @@ def get_single_embedding_all_layers(
     For ESM-3 (ESMC) models, it assumes that passing
     LogitsConfig(return_hidden_states=True) returns a collection of layer embeddings.
     For ESM-2 models, it sets output_hidden_states=True.
+    For ProtT5 models, it gets encoder hidden states.
 
     Args:
         sequence (str): The protein sequence to embed.
@@ -354,6 +456,39 @@ def get_single_embedding_all_layers(
                 emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
                 embeddings_list.append(emb)
 
+        elif isinstance(model, T5Model):
+            # For ProtT5: Get encoder hidden states
+            processed_sequence = preprocess_sequence_for_prott5(sequence)
+            inputs = tokenizer.encode_plus(
+                processed_sequence,
+                add_special_tokens=True,
+                return_tensors="pt"
+            )
+            
+            input_ids = inputs['input_ids'].to(device)
+            attention_mask = inputs['attention_mask'].to(device)
+            
+            # Create dummy decoder inputs
+            decoder_input_ids = torch.full(
+                (1, 1), 
+                tokenizer.pad_token_id or 0, 
+                dtype=torch.long,
+                device=device
+            )
+            
+            outputs = model(input_ids=input_ids, 
+                          attention_mask=attention_mask,
+                          decoder_input_ids=decoder_input_ids,
+                          output_hidden_states=True)
+            
+            # Get all encoder hidden states
+            encoder_hidden_states = outputs.encoder_hidden_states
+            for layer_tensor in encoder_hidden_states:
+                # Remove batch dimension but keep special tokens
+                emb = layer_tensor[0].detach().cpu().numpy()
+                emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
+                embeddings_list.append(emb)
+
         else:
             # For ESM-2: Get hidden states with output_hidden_states=True
             inputs = tokenizer(sequence, return_tensors="pt").to(device)
@@ -379,13 +514,11 @@ def calculate_single_sequence_embedding_first_layer(
     return get_single_embedding_first_layer(sequence, model, tokenizer, device)
 
 
-# The rest of your existing functions will need to be adapted in a similar way
-# if they interact with the model or tokenizer directly
 def get_single_embedding_first_layer(
     sequence: str, model: Any, tokenizer: Any, device: torch.device
 ) -> NDArray[np.float64]:
     """
-    Generates normalized embeddings for each token in the sequence across all layers.
+    Generates normalized embeddings for each token in the sequence using the first layer.
     """
     embeddings_list = []
 
@@ -426,6 +559,34 @@ def get_single_embedding_first_layer(
                 raise ValueError("Model did not return embeddings")
             embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy()
 
+        elif isinstance(model, T5Model):
+            # ProtT5 logic - get first layer embedding
+            processed_sequence = preprocess_sequence_for_prott5(sequence)
+            inputs = tokenizer.encode_plus(
+                processed_sequence,
+                add_special_tokens=True,
+                return_tensors="pt"
+            )
+            
+            input_ids = inputs['input_ids'].to(device)
+            attention_mask = inputs['attention_mask'].to(device)
+            
+            # Create dummy decoder inputs
+            decoder_input_ids = torch.full(
+                (1, 1), 
+                tokenizer.pad_token_id or 0, 
+                dtype=torch.long,
+                device=device
+            )
+            
+            outputs = model(input_ids=input_ids, 
+                          attention_mask=attention_mask,
+                          decoder_input_ids=decoder_input_ids,
+                          output_hidden_states=True)
+            
+            # Get first encoder hidden state including special tokens
+            embedding = outputs.encoder_hidden_states[0][0].detach().cpu().numpy()
+
         else:
             # ESM-2 logic
             inputs = tokenizer(sequence, return_tensors="pt").to(device)
diff --git a/src/pyeed/embedding_refactored.py b/src/pyeed/embedding_refactored.py
new file mode 100644
index 00000000..8ce5deff
--- /dev/null
+++ b/src/pyeed/embedding_refactored.py
@@ -0,0 +1,251 @@
+"""
+Refactored embedding module that maintains original function signatures.
+
+This module provides the same interface as the original embedding.py while
+using the new organized structure with model classes, factory, and processor.
+"""
+
+import gc
+import os
+import re
+from typing import Any, Tuple, Union
+
+import numpy as np
+import torch
+from esm.models.esm3 import ESM3
+from esm.models.esmc import ESMC
+from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig, SamplingConfig
+from huggingface_hub import HfFolder, login
+from loguru import logger
+from numpy.typing import NDArray
+from torch.nn import DataParallel, Module
+from transformers import EsmModel, EsmTokenizer, T5Model, T5Tokenizer
+
+from pyeed.dbconnect import DatabaseConnector
+from pyeed.embeddings.processor import get_processor
+from pyeed.embeddings.factory import ModelFactory
+from pyeed.embeddings.database import update_protein_embeddings_in_db as _update_protein_embeddings_in_db
+from pyeed.embeddings.utils import get_hf_token as _get_hf_token, preprocess_sequence_for_prott5 as _preprocess_sequence_for_prott5, free_memory as _free_memory
+
+
+# ============================================================================
+# Original function signatures maintained for backward compatibility
+# ============================================================================
+
+def get_hf_token() -> str:
+    """Get or request Hugging Face token."""
+    return _get_hf_token()
+
+
+def process_batches_on_gpu(
+    data: list[tuple[str, str]],
+    batch_size: int,
+    model: Union[EsmModel, ESMC, ESM3, T5Model, DataParallel[Module]],
+    tokenizer: Union[EsmTokenizer, T5Tokenizer, None],
+    db: DatabaseConnector,
+    device: torch.device,
+) -> None:
+    """
+    Splits data into batches and processes them on a single GPU.
+
+    Args:
+        data (list): List of (accession_id, sequence) tuples.
+        batch_size (int): Size of each batch.
+        model: The model instance for this GPU.
+        tokenizer: The tokenizer for the model.
+        device (str): The assigned GPU device.
+        db: Database connection.
+    """
+    processor = get_processor()
+    processor.process_batches_on_gpu(data, batch_size, model, tokenizer, db, device)
+
+
+def load_model_and_tokenizer(
+    model_name: str,
+    device: torch.device = torch.device("cuda:0"),
+) -> Tuple[Union[EsmModel, ESMC, ESM3, T5Model], Union[EsmTokenizer, T5Tokenizer, None], torch.device]:
+    """
+    Loads the model and assigns it to a specific GPU.
+
+    Args:
+        model_name (str): The model name.
+        device (str): The specific GPU device.
+
+    Returns:
+        Tuple: (model, tokenizer, device)
+    """
+    return ModelFactory.load_model_and_tokenizer(model_name, device)
+
+
+def preprocess_sequence_for_prott5(sequence: str) -> str:
+    """
+    Preprocesses a protein sequence for ProtT5 models.
+    
+    Args:
+        sequence: Raw protein sequence
+        
+    Returns:
+        Preprocessed sequence with spaces between amino acids and rare AAs mapped to X
+    """
+    return _preprocess_sequence_for_prott5(sequence)
+
+
+def get_batch_embeddings(
+    batch_sequences: list[str],
+    model: Union[
+        EsmModel,
+        ESMC,
+        DataParallel[Module],
+        ESM3,
+        T5Model,
+    ],
+    tokenizer_or_alphabet: Union[EsmTokenizer, T5Tokenizer, None],
+    device: torch.device,
+    pool_embeddings: bool = True,
+) -> list[NDArray[np.float64]]:
+    """
+    Generates mean-pooled embeddings for a batch of sequences.
+    Supports ESM++, ESM-2, ESM-3 and ProtT5 models.
+
+    Args:
+        batch_sequences (list[str]): List of sequence strings.
+        model: Loaded model (could be wrapped in DataParallel).
+        tokenizer_or_alphabet: Tokenizer if needed.
+        device: Inference device (CPU/GPU).
+        pool_embeddings (bool): Whether to average embeddings across the sequence length.
+
+    Returns:
+        List of embeddings as NumPy arrays.
+    """
+    processor = get_processor()
+    return processor.get_batch_embeddings_unified(
+        batch_sequences, model, tokenizer_or_alphabet, device, pool_embeddings
+    )
+
+
+def calculate_single_sequence_embedding_last_hidden_state(
+    sequence: str,
+    device: torch.device = torch.device("cuda:0"),
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
+) -> NDArray[np.float64]:
+    """
+    Calculates an embedding for a single sequence.
+
+    Args:
+        sequence: Input protein sequence
+        model_name: Name of the ESM model to use
+
+    Returns:
+        NDArray[np.float64]: Normalized embedding vector for the sequence
+    """
+    processor = get_processor()
+    return processor.calculate_single_sequence_embedding_last_hidden_state(
+        sequence, device, model_name
+    )
+
+
+def calculate_single_sequence_embedding_all_layers(
+    sequence: str,
+    device: torch.device,
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
+) -> NDArray[np.float64]:
+    """
+    Calculates embeddings for a single sequence across all layers.
+
+    Args:
+        sequence: Input protein sequence
+        model_name: Name of the ESM model to use
+
+    Returns:
+        NDArray[np.float64]: A numpy array containing layer embeddings for the sequence.
+    """
+    processor = get_processor()
+    return processor.calculate_single_sequence_embedding_all_layers(
+        sequence, device, model_name
+    )
+
+
+def get_single_embedding_last_hidden_state(
+    sequence: str, model: Any, tokenizer: Any, device: torch.device
+) -> NDArray[np.float64]:
+    """Generate embeddings for a single sequence using the last hidden state.
+
+    Args:
+        sequence (str): The protein sequence to embed
+        model (Any): The transformer model to use
+        tokenizer (Any): The tokenizer for the model
+        device (torch.device): The device to run the model on (CPU/GPU)
+
+    Returns:
+        np.ndarray: Normalized embeddings for each token in the sequence
+    """
+    processor = get_processor()
+    return processor.get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
+
+
+def get_single_embedding_all_layers(
+    sequence: str, model: Any, tokenizer: Any, device: torch.device
+) -> NDArray[np.float64]:
+    """
+    Generates normalized embeddings for each token in the sequence across all layers.
+
+    For ESM-3 (ESMC) models, it assumes that passing
+    LogitsConfig(return_hidden_states=True) returns a collection of layer embeddings.
+    For ESM-2 models, it sets output_hidden_states=True.
+    For ProtT5 models, it gets encoder hidden states.
+
+    Args:
+        sequence (str): The protein sequence to embed.
+        model (Any): The transformer model to use.
+        tokenizer (Any): The tokenizer for the model (None for ESMC).
+        device (torch.device): The device to run the model on (CPU/GPU).
+
+    Returns:
+        NDArray[np.float64]: A numpy array containing the normalized token embeddings
+        concatenated across all layers.
+    """
+    processor = get_processor()
+    return processor.get_single_embedding_all_layers(sequence, model, tokenizer, device)
+
+
+def calculate_single_sequence_embedding_first_layer(
+    sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D", device: torch.device = torch.device("cuda:0"),
+) -> NDArray[np.float64]:
+    """
+    Calculates an embedding for a single sequence using the first layer.
+    """
+    processor = get_processor()
+    return processor.calculate_single_sequence_embedding_first_layer(sequence, model_name, device)
+
+
+def get_single_embedding_first_layer(
+    sequence: str, model: Any, tokenizer: Any, device: torch.device
+) -> NDArray[np.float64]:
+    """
+    Generates normalized embeddings for each token in the sequence using the first layer.
+    """
+    processor = get_processor()
+    return processor.get_single_embedding_first_layer(sequence, model, tokenizer, device)
+
+
+def free_memory() -> None:
+    """
+    Frees up memory by invoking garbage collection and clearing GPU caches.
+    """
+    _free_memory()
+
+
+def update_protein_embeddings_in_db(
+    db: DatabaseConnector,
+    accessions: list[str],
+    embeddings_batch: list[NDArray[np.float64]],
+) -> None:
+    """
+    Updates the embeddings for a batch of proteins in the database.
+
+    Args:
+        db (DatabaseConnector): The database connector.
+        accessions (list[str]): The accessions of the proteins to update.
+        embeddings_batch (list[NDArray[np.float64]]): The embeddings to update.
+    """
+    _update_protein_embeddings_in_db(db, accessions, embeddings_batch) 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/__init__.py b/src/pyeed/embeddings/__init__.py
new file mode 100644
index 00000000..9d13238c
--- /dev/null
+++ b/src/pyeed/embeddings/__init__.py
@@ -0,0 +1,106 @@
+"""
+Organized embedding module for protein language models.
+
+This module provides both the new organized structure and backward compatibility
+with the original embedding.py interface.
+"""
+
+# New organized structure
+from .base import BaseEmbeddingModel, ModelType, normalize_embedding
+from .factory import ModelFactory
+from .processor import EmbeddingProcessor, get_processor
+from .utils import get_hf_token, preprocess_sequence_for_prott5, free_memory, determine_model_type
+from .database import update_protein_embeddings_in_db
+from .models import ESM2EmbeddingModel, ESMCEmbeddingModel, ESM3EmbeddingModel, ProtT5EmbeddingModel
+
+# Backward compatibility imports from old embedding.py
+try:
+    from ..embedding import (
+        load_model_and_tokenizer,
+        process_batches_on_gpu,
+        get_batch_embeddings,
+        calculate_single_sequence_embedding_last_hidden_state,
+        calculate_single_sequence_embedding_all_layers,
+        calculate_single_sequence_embedding_first_layer,
+        get_single_embedding_last_hidden_state,
+        get_single_embedding_all_layers,
+        get_single_embedding_first_layer
+    )
+except ImportError:
+    # If old embedding.py is not available, use processor methods for compatibility
+    _processor = get_processor()
+    
+    def load_model_and_tokenizer(model_name: str, device=None):
+        """Backward compatibility function."""
+        # This is handled internally by the processor now
+        return None, None, device
+    
+    def process_batches_on_gpu(data, batch_size, model, tokenizer, db, device):
+        """Backward compatibility function."""
+        return _processor.process_batches_on_gpu(data, batch_size, model, tokenizer, db, device)
+    
+    def get_batch_embeddings(batch_sequences, model, tokenizer, device, pool_embeddings=True):
+        """Backward compatibility function."""
+        return _processor.get_batch_embeddings_unified(batch_sequences, model, tokenizer, device, pool_embeddings)
+    
+    def calculate_single_sequence_embedding_last_hidden_state(sequence, device=None, model_name="facebook/esm2_t33_650M_UR50D"):
+        """Backward compatibility function."""
+        return _processor.calculate_single_embedding(sequence, model_name, "last_hidden_state", device)
+    
+    def calculate_single_sequence_embedding_all_layers(sequence, device, model_name="facebook/esm2_t33_650M_UR50D"):
+        """Backward compatibility function."""
+        return _processor.calculate_single_embedding(sequence, model_name, "all_layers", device)
+    
+    def calculate_single_sequence_embedding_first_layer(sequence, model_name="facebook/esm2_t33_650M_UR50D", device=None):
+        """Backward compatibility function."""
+        return _processor.calculate_single_embedding(sequence, model_name, "first_layer", device)
+    
+    def get_single_embedding_last_hidden_state(sequence, model, tokenizer, device):
+        """Backward compatibility function."""
+        return _processor.get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
+    
+    def get_single_embedding_all_layers(sequence, model, tokenizer, device):
+        """Backward compatibility function."""
+        return _processor.get_single_embedding_all_layers(sequence, model, tokenizer, device)
+    
+    def get_single_embedding_first_layer(sequence, model, tokenizer, device):
+        """Backward compatibility function."""
+        return _processor.get_single_embedding_first_layer(sequence, model, tokenizer, device)
+
+__all__ = [
+    # Base classes and types
+    'BaseEmbeddingModel',
+    'ModelType',
+    'normalize_embedding',
+    
+    # Factory and processor
+    'ModelFactory',
+    'EmbeddingProcessor',
+    'get_processor',
+    
+    # Utilities
+    'get_hf_token',
+    'preprocess_sequence_for_prott5',
+    'free_memory',
+    'determine_model_type',
+    
+    # Database operations
+    'update_protein_embeddings_in_db',
+    
+    # Model implementations
+    'ESM2EmbeddingModel',
+    'ESMCEmbeddingModel',
+    'ESM3EmbeddingModel',
+    'ProtT5EmbeddingModel',
+    
+    # Backward compatibility functions
+    'load_model_and_tokenizer',
+    'process_batches_on_gpu',
+    'get_batch_embeddings',
+    'calculate_single_sequence_embedding_last_hidden_state',
+    'calculate_single_sequence_embedding_all_layers',
+    'calculate_single_sequence_embedding_first_layer',
+    'get_single_embedding_last_hidden_state',
+    'get_single_embedding_all_layers',
+    'get_single_embedding_first_layer',
+] 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/base.py b/src/pyeed/embeddings/base.py
new file mode 100644
index 00000000..745fd2cf
--- /dev/null
+++ b/src/pyeed/embeddings/base.py
@@ -0,0 +1,121 @@
+"""
+Base classes for protein embedding models.
+
+Defines the common interface that all embedding model implementations should follow.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, List, Union, Tuple, Optional
+import torch
+import numpy as np
+from numpy.typing import NDArray
+
+
+class BaseEmbeddingModel(ABC):
+    """Abstract base class for protein embedding models."""
+    
+    def __init__(self, model_name: str, device: torch.device):
+        self.model_name = model_name
+        self.device = device
+        self._model: Optional[Any] = None
+        self._tokenizer: Optional[Any] = None
+        
+    @property
+    def model(self) -> Optional[Any]:
+        """Get the model instance."""
+        return self._model
+    
+    @model.setter
+    def model(self, value: Any) -> None:
+        """Set the model instance."""
+        self._model = value
+    
+    @property
+    def tokenizer(self) -> Optional[Any]:
+        """Get the tokenizer instance."""
+        return self._tokenizer
+    
+    @tokenizer.setter
+    def tokenizer(self, value: Any) -> None:
+        """Set the tokenizer instance."""
+        self._tokenizer = value
+    
+    @abstractmethod
+    def load_model(self) -> Tuple[Any, Optional[Any]]:
+        """Load and return the model and tokenizer."""
+        pass
+    
+    @abstractmethod
+    def preprocess_sequence(self, sequence: str) -> Union[str, Any]:
+        """Preprocess a sequence for the specific model type."""
+        pass
+    
+    @abstractmethod
+    def get_batch_embeddings(
+        self, 
+        sequences: List[str], 
+        pool_embeddings: bool = True
+    ) -> List[NDArray[np.float64]]:
+        """Get embeddings for a batch of sequences."""
+        pass
+    
+    @abstractmethod
+    def get_single_embedding_last_hidden_state(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get embedding from the last hidden state for a single sequence."""
+        pass
+    
+    @abstractmethod
+    def get_single_embedding_all_layers(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get embeddings from all layers for a single sequence."""
+        pass
+    
+    @abstractmethod
+    def get_single_embedding_first_layer(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get embedding from the first layer for a single sequence."""
+        pass
+    
+    def get_final_embeddings(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """
+        Get final embeddings for a single sequence.
+        
+        This method provides a robust embedding option that works across all models.
+        It falls back gracefully if certain layer-specific methods are not available.
+        Default implementation uses last hidden state, but can be overridden.
+        """
+        return self.get_single_embedding_last_hidden_state(sequence)
+    
+    def move_to_device(self) -> None:
+        """Move model to the specified device."""
+        if self.model is not None:
+            self.model = self.model.to(self.device)
+    
+    def cleanup(self) -> None:
+        """Clean up model resources."""
+        if self._model is not None:
+            self._model = None
+        torch.cuda.empty_cache() if torch.cuda.is_available() else None
+
+
+class ModelType:
+    """Constants for different model types."""
+    ESM2 = "esm2"
+    ESMC = "esmc"
+    ESM3 = "esm3"
+    PROTT5 = "prott5"
+
+
+def normalize_embedding(embedding: NDArray[np.float64]) -> NDArray[np.float64]:
+    """Normalize embeddings using L2 normalization."""
+    return embedding / np.linalg.norm(embedding, axis=1, keepdims=True) 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/database.py b/src/pyeed/embeddings/database.py
new file mode 100644
index 00000000..f1536878
--- /dev/null
+++ b/src/pyeed/embeddings/database.py
@@ -0,0 +1,41 @@
+"""
+Database operations for protein embeddings.
+
+Handles storing and updating protein embeddings in the database.
+"""
+
+from typing import List
+import numpy as np
+from numpy.typing import NDArray
+from pyeed.dbconnect import DatabaseConnector
+
+
+def update_protein_embeddings_in_db(
+    db: DatabaseConnector,
+    accessions: List[str],
+    embeddings_batch: List[NDArray[np.float64]],
+) -> None:
+    """
+    Updates the embeddings for a batch of proteins in the database.
+
+    Args:
+        db (DatabaseConnector): The database connector.
+        accessions (List[str]): The accessions of the proteins to update.
+        embeddings_batch (List[NDArray[np.float64]]): The embeddings to update.
+    """
+    # Prepare the data for batch update
+    updates = []
+    for acc, emb in zip(accessions, embeddings_batch):
+        # Flatten the embedding array and convert to list
+        flat_embedding = emb.flatten().tolist()
+        updates.append({"accession": acc, "embedding": flat_embedding})
+
+    # Cypher query for batch update
+    query = """
+    UNWIND $updates AS update
+    MATCH (p:Protein {accession_id: update.accession})
+    SET p.embedding = update.embedding
+    """
+
+    # Execute the update query with parameters
+    db.execute_write(query, {"updates": updates}) 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/factory.py b/src/pyeed/embeddings/factory.py
new file mode 100644
index 00000000..66b7f7c5
--- /dev/null
+++ b/src/pyeed/embeddings/factory.py
@@ -0,0 +1,67 @@
+"""
+Factory for creating embedding model instances.
+
+Provides a centralized way to create different types of embedding models
+based on model names and automatically handles device assignment.
+"""
+
+from typing import Union, Tuple, Any
+import torch
+from torch.nn import DataParallel, Module
+
+from .base import BaseEmbeddingModel
+from .models import ESM2EmbeddingModel, ESMCEmbeddingModel, ESM3EmbeddingModel, ProtT5EmbeddingModel
+from .utils import determine_model_type
+
+
+class ModelFactory:
+    """Factory for creating embedding model instances."""
+    
+    @staticmethod
+    def create_model(
+        model_name: str, 
+        device: torch.device = torch.device("cuda:0")
+    ) -> BaseEmbeddingModel:
+        """
+        Create an embedding model instance based on the model name.
+        
+        Args:
+            model_name: Name of the model to create
+            device: Device to run the model on
+            
+        Returns:
+            BaseEmbeddingModel instance
+        """
+        model_type = determine_model_type(model_name)
+        
+        if model_type == "esmc":
+            return ESMCEmbeddingModel(model_name, device)
+        elif model_type == "esm3":
+            return ESM3EmbeddingModel(model_name, device)
+        elif model_type == "prott5":
+            return ProtT5EmbeddingModel(model_name, device)
+        else:  # Default to ESM-2
+            return ESM2EmbeddingModel(model_name, device)
+    
+    @staticmethod
+    def load_model_and_tokenizer(
+        model_name: str,
+        device: torch.device = torch.device("cuda:0"),
+    ) -> Tuple[Union[Any, DataParallel[Module]], Union[Any, None], torch.device]:
+        """
+        Load model and tokenizer using the factory pattern.
+        
+        This method maintains compatibility with the original function signature
+        while using the new OOP structure internally.
+        
+        Args:
+            model_name: The model name
+            device: The specific GPU device
+            
+        Returns:
+            Tuple: (model, tokenizer, device)
+        """
+        embedding_model = ModelFactory.create_model(model_name, device)
+        model, tokenizer = embedding_model.load_model()
+        
+        return model, tokenizer, device 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/models/__init__.py b/src/pyeed/embeddings/models/__init__.py
new file mode 100644
index 00000000..f2f8908f
--- /dev/null
+++ b/src/pyeed/embeddings/models/__init__.py
@@ -0,0 +1,17 @@
+"""
+Model implementations for different protein language models.
+
+Contains specific implementations for ESM-2, ESMC, ESM-3, and ProtT5 models.
+"""
+
+from .esm2 import ESM2EmbeddingModel
+from .esmc import ESMCEmbeddingModel
+from .esm3 import ESM3EmbeddingModel
+from .prott5 import ProtT5EmbeddingModel
+
+__all__ = [
+    'ESM2EmbeddingModel',
+    'ESMCEmbeddingModel', 
+    'ESM3EmbeddingModel',
+    'ProtT5EmbeddingModel',
+] 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/models/esm2.py b/src/pyeed/embeddings/models/esm2.py
new file mode 100644
index 00000000..1edeea97
--- /dev/null
+++ b/src/pyeed/embeddings/models/esm2.py
@@ -0,0 +1,172 @@
+"""
+ESM-2 model implementation for protein embeddings.
+"""
+
+from typing import List, Tuple, Optional, Any, cast
+import torch
+import numpy as np
+from numpy.typing import NDArray
+from transformers import EsmModel, EsmTokenizer
+from loguru import logger
+
+from ..base import BaseEmbeddingModel, normalize_embedding
+from ..utils import get_hf_token
+
+
+class ESM2EmbeddingModel(BaseEmbeddingModel):
+    """ESM-2 model implementation."""
+    
+    def __init__(self, model_name: str, device: torch.device):
+        super().__init__(model_name, device)
+    
+    def load_model(self) -> Tuple[EsmModel, EsmTokenizer]:
+        """Load ESM-2 model and tokenizer."""
+        token = get_hf_token()
+        
+        full_model_name = (
+            self.model_name
+            if self.model_name.startswith("facebook/")
+            else f"facebook/{self.model_name}"
+        )
+        
+        model = EsmModel.from_pretrained(full_model_name, use_auth_token=token)
+        tokenizer = EsmTokenizer.from_pretrained(full_model_name, use_auth_token=token)
+        
+        # Move to device
+        model = model.to(self.device)
+        
+        self.model = model
+        self.tokenizer = tokenizer
+        
+        return model, tokenizer
+    
+    def preprocess_sequence(self, sequence: str) -> str:
+        """ESM-2 doesn't need special preprocessing."""
+        return sequence
+    
+    def get_batch_embeddings(
+        self, 
+        sequences: List[str], 
+        pool_embeddings: bool = True
+    ) -> List[NDArray[np.float64]]:
+        """Get embeddings for a batch of sequences using ESM-2."""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows they're not None
+        model = cast(EsmModel, self.model)
+        tokenizer = cast(EsmTokenizer, self.tokenizer)
+        
+        inputs = tokenizer(
+            sequences, padding=True, truncation=True, return_tensors="pt"
+        ).to(self.device)
+        
+        with torch.no_grad():
+            outputs = model(**inputs, output_hidden_states=True)
+
+        # Get last hidden state for each sequence
+        hidden_states = outputs.last_hidden_state.cpu().numpy()
+
+        if pool_embeddings:
+            # Mean pooling across sequence length
+            return [embedding.mean(axis=0) for embedding in hidden_states]
+        return list(hidden_states)
+    
+    def get_single_embedding_last_hidden_state(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get last hidden state embedding for a single sequence."""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows they're not None
+        model = cast(EsmModel, self.model)
+        tokenizer = cast(EsmTokenizer, self.tokenizer)
+        
+        inputs = tokenizer(sequence, return_tensors="pt").to(self.device)
+        
+        with torch.no_grad():
+            outputs = model(**inputs)
+        
+        # Remove batch dimension and special tokens ([CLS] and [SEP])
+        embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy()
+        return embedding
+    
+    def get_single_embedding_all_layers(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get embeddings from all layers for a single sequence."""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows they're not None
+        model = cast(EsmModel, self.model)
+        tokenizer = cast(EsmTokenizer, self.tokenizer)
+        
+        inputs = tokenizer(sequence, return_tensors="pt").to(self.device)
+        
+        with torch.no_grad():
+            outputs = model(**inputs, output_hidden_states=True)
+        
+        embeddings_list = []
+        hidden_states = outputs.hidden_states  # Tuple: (layer0, layer1, ..., layerN)
+        
+        for layer_tensor in hidden_states:
+            # Remove batch dimension and special tokens ([CLS] and [SEP])
+            emb = layer_tensor[0, 1:-1, :].detach().cpu().numpy()
+            emb = normalize_embedding(emb)
+            embeddings_list.append(emb)
+
+        return np.array(embeddings_list)
+    
+    def get_single_embedding_first_layer(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get first layer embedding for a single sequence."""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows they're not None
+        model = cast(EsmModel, self.model)
+        tokenizer = cast(EsmTokenizer, self.tokenizer)
+        
+        inputs = tokenizer(sequence, return_tensors="pt").to(self.device)
+        
+        with torch.no_grad():
+            outputs = model(**inputs, output_hidden_states=True)
+        
+        # Get the first layer's hidden states for all residues (excluding special tokens)
+        embedding = outputs.hidden_states[0][0, 1:-1, :].detach().cpu().numpy()
+        
+        # Normalize the embedding
+        embedding = normalize_embedding(embedding)
+        return embedding
+
+    def get_final_embeddings(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """
+        Get final embeddings for ESM-2 with robust fallback.
+        
+        Provides a more robust embedding extraction that prioritizes
+        batch processing for better performance.
+        """
+        try:
+            # For ESM-2, batch processing is more efficient
+            embeddings = self.get_batch_embeddings([sequence], pool_embeddings=True)
+            if embeddings and len(embeddings) > 0:
+                return embeddings[0]
+            else:
+                raise ValueError("Batch embeddings method returned empty results")
+        except Exception as e:
+            logger.warning(f"Batch embeddings method failed for ESM-2: {e}. Trying single sequence method.")
+            try:
+                # Fallback to single sequence method
+                return self.get_single_embedding_last_hidden_state(sequence)
+            except Exception as fallback_error:
+                logger.error(f"All embedding extraction methods failed for ESM-2: {fallback_error}")
+                raise ValueError(f"ESM-2 embedding extraction failed: {fallback_error}") 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/models/esm3.py b/src/pyeed/embeddings/models/esm3.py
new file mode 100644
index 00000000..2f962a67
--- /dev/null
+++ b/src/pyeed/embeddings/models/esm3.py
@@ -0,0 +1,191 @@
+"""
+ESM-3 model implementation for protein embeddings.
+"""
+
+from typing import List, Tuple, Optional, cast
+import torch
+import numpy as np
+from numpy.typing import NDArray
+from loguru import logger
+from esm.models.esm3 import ESM3
+from esm.sdk.api import ESMProtein, SamplingConfig
+
+from ..base import BaseEmbeddingModel, normalize_embedding
+
+
+class ESM3EmbeddingModel(BaseEmbeddingModel):
+    """ESM-3 model implementation."""
+    
+    def __init__(self, model_name: str, device: torch.device):
+        super().__init__(model_name, device)
+    
+    def load_model(self) -> Tuple[ESM3, None]:
+        """Load ESM3 model."""
+        model = ESM3.from_pretrained("esm3_sm_open_v1")
+        model = model.to(self.device)
+        
+        self.model = model
+        
+        return model, None
+    
+    def preprocess_sequence(self, sequence: str) -> ESMProtein:
+        """ESM3 uses ESMProtein objects."""
+        return ESMProtein(sequence=sequence)
+    
+    def get_batch_embeddings(
+        self, 
+        sequences: List[str], 
+        pool_embeddings: bool = True
+    ) -> List[NDArray[np.float64]]:
+        """Get embeddings for a batch of sequences using ESM3."""
+        if self.model is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows it's not None
+        model = cast(ESM3, self.model)
+        
+        embedding_list = []
+        with torch.no_grad():
+            for sequence in sequences:
+                protein = self.preprocess_sequence(sequence)
+                sequence_encoding = model.encode(protein)
+                result = model.forward_and_sample(
+                    sequence_encoding,
+                    SamplingConfig(return_per_residue_embeddings=True),
+                )
+                if result is None or result.per_residue_embedding is None:
+                    raise ValueError("Model did not return embeddings")
+                embeddings = (
+                    result.per_residue_embedding.to(torch.float32).cpu().numpy()
+                )
+                if pool_embeddings:
+                    embeddings = embeddings.mean(axis=0)
+                embedding_list.append(embeddings)
+        return embedding_list
+    
+    def get_single_embedding_last_hidden_state(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get last hidden state embedding for a single sequence."""
+        if self.model is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows it's not None
+        model = cast(ESM3, self.model)
+        
+        with torch.no_grad():
+            protein = self.preprocess_sequence(sequence)
+            sequence_encoding = model.encode(protein)
+            embedding = model.forward_and_sample(
+                sequence_encoding,
+                SamplingConfig(return_per_residue_embeddings=True),
+            )
+            if embedding is None or embedding.per_residue_embedding is None:
+                raise ValueError("Model did not return embeddings")
+            embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy()
+
+        # Normalize the embedding
+        embedding = normalize_embedding(embedding)
+        return embedding
+    
+    def get_single_embedding_all_layers(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get embeddings from all layers for a single sequence."""
+        # ESM3 doesn't support all layers extraction in the same way
+        # This is a simplified implementation - might need enhancement based on ESM3 capabilities
+        if self.model is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows it's not None
+        model = cast(ESM3, self.model)
+        
+        with torch.no_grad():
+            protein = self.preprocess_sequence(sequence)
+            sequence_encoding = model.encode(protein)
+            result = model.forward_and_sample(
+                sequence_encoding,
+                SamplingConfig(return_per_residue_embeddings=True),
+            )
+            if result is None or result.per_residue_embedding is None:
+                raise ValueError("Model did not return embeddings")
+            
+            # For ESM3, we return the per-residue embedding as a single layer
+            # This might need adjustment based on actual ESM3 API capabilities
+            embedding = result.per_residue_embedding.to(torch.float32).cpu().numpy()
+            embedding = normalize_embedding(embedding)
+
+        # Return as a single layer array for consistency with other models
+        return np.array([embedding])
+    
+    def get_single_embedding_first_layer(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get first layer embedding for a single sequence."""
+        # For ESM3, this is the same as the per-residue embedding
+        if self.model is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows it's not None
+        model = cast(ESM3, self.model)
+        
+        with torch.no_grad():
+            protein = self.preprocess_sequence(sequence)
+            sequence_encoding = model.encode(protein)
+            result = model.forward_and_sample(
+                sequence_encoding,
+                SamplingConfig(return_per_residue_embeddings=True),
+            )
+            if result is None or result.per_residue_embedding is None:
+                raise ValueError("Model did not return embeddings")
+            embedding = result.per_residue_embedding.to(torch.float32).cpu().numpy()
+
+        # Normalize the embedding
+        embedding = normalize_embedding(embedding)
+        return embedding 
+    
+    def get_final_embeddings(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """
+        Get final embeddings for ESM3 with robust fallback.
+        
+        ESM3 has different API structure, so this provides a more robust
+        embedding extraction that works reliably across different ESM3 versions.
+        """
+        try:
+            # Try to get the standard per-residue embedding
+            return self.get_single_embedding_last_hidden_state(sequence)
+        except Exception as e:
+            # If that fails, try alternative method
+            logger.warning(f"Standard embedding method failed for ESM3: {e}. Trying alternative method.")
+            try:
+                if self.model is None:
+                    self.load_model()
+                
+                model = cast(ESM3, self.model)
+                
+                with torch.no_grad():
+                    protein = self.preprocess_sequence(sequence)
+                    sequence_encoding = model.encode(protein)
+                    # Try with minimal sampling config
+                    result = model.forward_and_sample(
+                        sequence_encoding,
+                        SamplingConfig()
+                    )
+                    
+                    # Extract any available embedding
+                    if hasattr(result, 'per_residue_embedding') and result.per_residue_embedding is not None:
+                        embedding = result.per_residue_embedding.to(torch.float32).cpu().numpy()
+                        return embedding
+                    else:
+                        # Last resort: use a simple mean-pooled sequence representation
+                        logger.warning("No per-residue embeddings available, using basic fallback")
+                        raise ValueError("Could not extract any embeddings from ESM3 model")
+            except Exception as fallback_error:
+                logger.error(f"All embedding extraction methods failed for ESM3: {fallback_error}")
+                raise ValueError(f"ESM3 embedding extraction failed: {fallback_error}") 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/models/esmc.py b/src/pyeed/embeddings/models/esmc.py
new file mode 100644
index 00000000..04690dc4
--- /dev/null
+++ b/src/pyeed/embeddings/models/esmc.py
@@ -0,0 +1,267 @@
+"""
+ESMC model implementation for protein embeddings.
+"""
+
+from typing import List, Tuple, Optional, cast
+import torch
+import numpy as np
+from numpy.typing import NDArray
+from loguru import logger
+from esm.models.esmc import ESMC
+from esm.sdk.api import ESMProtein, LogitsConfig
+
+from ..base import BaseEmbeddingModel, normalize_embedding
+
+
+class ESMCEmbeddingModel(BaseEmbeddingModel):
+    """ESMC model implementation."""
+    
+    def __init__(self, model_name: str, device: torch.device):
+        super().__init__(model_name, device)
+    
+    def load_model(self) -> Tuple[ESMC, None]:
+        """Load ESMC model with improved error handling."""
+        try:
+            # Try to disable tqdm to avoid threading issues
+            import os
+            os.environ['DISABLE_TQDM'] = 'True'
+            
+            model = ESMC.from_pretrained(self.model_name)
+            model = model.to(self.device)
+            
+            self.model = model
+            
+            return model, None
+            
+        except Exception as e:
+            if "tqdm" in str(e).lower() or "_lock" in str(e).lower():
+                logger.warning(f"ESMC model loading failed due to tqdm threading issue: {e}. Retrying with threading workaround...")
+                
+                # Try alternative approach with threading lock
+                import threading
+                import time
+                
+                # Add a small delay and retry
+                time.sleep(0.1 + torch.cuda.current_device() * 0.05)  # Staggered delay per GPU
+                
+                try:
+                    # Try importing tqdm and resetting its state
+                    try:
+                        import tqdm
+                        if hasattr(tqdm.tqdm, '_lock'):
+                            delattr(tqdm.tqdm, '_lock')
+                    except:
+                        pass
+                    
+                    model = ESMC.from_pretrained(self.model_name)
+                    model = model.to(self.device)
+                    
+                    self.model = model
+                    
+                    return model, None
+                    
+                except Exception as retry_error:
+                    logger.error(f"ESMC model loading failed even after retry: {retry_error}")
+                    raise retry_error
+            else:
+                logger.error(f"ESMC model loading failed: {e}")
+                raise e
+    
+    def preprocess_sequence(self, sequence: str) -> ESMProtein:
+        """ESMC uses ESMProtein objects."""
+        return ESMProtein(sequence=sequence)
+    
+    def get_batch_embeddings(
+        self, 
+        sequences: List[str], 
+        pool_embeddings: bool = True
+    ) -> List[NDArray[np.float64]]:
+        """Get embeddings for a batch of sequences using ESMC."""
+        if self.model is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows it's not None
+        model = cast(ESMC, self.model)
+        
+        embedding_list = []
+        with torch.no_grad():
+            for sequence in sequences:
+                protein = self.preprocess_sequence(sequence)
+                # Use the model directly - DataParallel handles internal distribution
+                protein_tensor = model.encode(protein)
+                logits_output = model.logits(
+                    protein_tensor, LogitsConfig(sequence=True, return_embeddings=True)
+                )
+                if logits_output.embeddings is None:
+                    raise ValueError(
+                        "Model did not return embeddings. Check LogitsConfig settings."
+                    )
+                embeddings = logits_output.embeddings.cpu().numpy()
+                if pool_embeddings:
+                    embeddings = embeddings.mean(axis=1)
+                embedding_list.append(embeddings[0])
+        return embedding_list
+    
+    def get_single_embedding_last_hidden_state(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get last hidden state embedding for a single sequence."""
+        if self.model is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows it's not None
+        model = cast(ESMC, self.model)
+        
+        with torch.no_grad():
+            protein = self.preprocess_sequence(sequence)
+            protein_tensor = model.encode(protein)
+            logits_output = model.logits(
+                protein_tensor,
+                LogitsConfig(
+                    sequence=True,
+                    return_embeddings=True,
+                    return_hidden_states=True,
+                ),
+            )
+            # Ensure hidden_states is not None before accessing it
+            if logits_output.hidden_states is None:
+                raise ValueError(
+                    "Model did not return hidden states. Check LogitsConfig settings."
+                )
+
+            embedding = (
+                logits_output.hidden_states[-1][0].to(torch.float32).cpu().numpy()
+            )
+
+        # Normalize the embedding
+        embedding = normalize_embedding(embedding)
+        return embedding
+    
+    def get_single_embedding_all_layers(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get embeddings from all layers for a single sequence."""
+        if self.model is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows it's not None
+        model = cast(ESMC, self.model)
+        
+        embeddings_list = []
+        with torch.no_grad():
+            protein = self.preprocess_sequence(sequence)
+            protein_tensor = model.encode(protein)
+            logits_output = model.logits(
+                protein_tensor,
+                LogitsConfig(
+                    sequence=True,
+                    return_embeddings=True,
+                    return_hidden_states=True,
+                ),
+            )
+            # Ensure hidden_states is not None before iterating
+            if logits_output.hidden_states is None:
+                raise ValueError(
+                    "Model did not return hidden states. Check if return_hidden_states=True is supported."
+                )
+
+            # logits_output.hidden_states should be a tuple of tensors: (layer, batch, seq_len, hidden_dim)
+            for layer_tensor in logits_output.hidden_states:
+                # Remove batch dimension and (if applicable) any special tokens
+                emb = layer_tensor[0].to(torch.float32).cpu().numpy()
+                # If your model adds special tokens, adjust the slicing (e.g., emb[1:-1])
+                emb = normalize_embedding(emb)
+                embeddings_list.append(emb)
+
+        return np.array(embeddings_list)
+    
+    def get_single_embedding_first_layer(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get first layer embedding for a single sequence."""
+        if self.model is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows it's not None
+        model = cast(ESMC, self.model)
+        
+        with torch.no_grad():
+            protein = self.preprocess_sequence(sequence)
+            protein_tensor = model.encode(protein)
+            logits_output = model.logits(
+                protein_tensor,
+                LogitsConfig(
+                    sequence=True,
+                    return_embeddings=True,
+                    return_hidden_states=True,
+                ),
+            )
+            if logits_output.hidden_states is None:
+                raise ValueError(
+                    "Model did not return hidden states. Check LogitsConfig settings."
+                )
+            embedding = (
+                logits_output.hidden_states[0][0].to(torch.float32).cpu().numpy()
+            )
+
+        # Normalize the embedding
+        embedding = normalize_embedding(embedding)
+        return embedding 
+    
+    def get_final_embeddings(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """
+        Get final embeddings for ESMC with robust fallback.
+        
+        Provides a more robust embedding extraction that prioritizes
+        batch embeddings (properly pooled) over last hidden state.
+        """
+        try:
+            # For ESMC, batch embeddings with pooling is more reliable and memory efficient
+            embeddings = self.get_batch_embeddings([sequence], pool_embeddings=True)
+            if embeddings and len(embeddings) > 0:
+                return embeddings[0]
+            else:
+                raise ValueError("Batch embeddings method returned empty results")
+        except (torch.cuda.OutOfMemoryError, RuntimeError) as e:
+            if "out of memory" in str(e).lower():
+                logger.warning(f"Batch embeddings method failed due to OOM for ESMC: {e}. Clearing cache and trying minimal approach.")
+                # Clear cache and try a more memory-efficient approach
+                torch.cuda.empty_cache()
+                try:
+                    # Minimal approach - just get embeddings without requesting hidden states
+                    if self.model is None:
+                        self.load_model()
+                    
+                    model = cast(ESMC, self.model)
+                    
+                    with torch.no_grad():
+                        protein = self.preprocess_sequence(sequence)
+                        protein_tensor = model.encode(protein)
+                        logits_output = model.logits(
+                            protein_tensor, 
+                            LogitsConfig(sequence=True, return_embeddings=True)
+                        )
+                        if logits_output.embeddings is None:
+                            raise ValueError("Model did not return embeddings")
+                        
+                        # Get embeddings and pool them properly
+                        embeddings = logits_output.embeddings.cpu().numpy()
+                        # Pool across sequence dimension to get single vector
+                        pooled_embedding = embeddings.mean(axis=1)[0]
+                        
+                        return pooled_embedding
+                        
+                except Exception as minimal_error:
+                    logger.error(f"Minimal embedding extraction also failed for ESMC: {minimal_error}")
+                    raise ValueError(f"ESMC embedding extraction failed with OOM: {minimal_error}")
+            else:
+                raise e
+        except Exception as e:
+            logger.error(f"All embedding extraction methods failed for ESMC: {e}")
+            raise ValueError(f"ESMC embedding extraction failed: {e}") 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/models/prott5.py b/src/pyeed/embeddings/models/prott5.py
new file mode 100644
index 00000000..7e0a82ef
--- /dev/null
+++ b/src/pyeed/embeddings/models/prott5.py
@@ -0,0 +1,241 @@
+"""
+ProtT5 model implementation for protein embeddings.
+"""
+
+from typing import List, Tuple, Optional, cast
+import torch
+import numpy as np
+from numpy.typing import NDArray
+from transformers import T5Model, T5Tokenizer
+
+from ..base import BaseEmbeddingModel, normalize_embedding
+from ..utils import get_hf_token, preprocess_sequence_for_prott5
+
+
+class ProtT5EmbeddingModel(BaseEmbeddingModel):
+    """ProtT5 model implementation."""
+    
+    def __init__(self, model_name: str, device: torch.device):
+        super().__init__(model_name, device)
+    
+    def load_model(self) -> Tuple[T5Model, T5Tokenizer]:
+        """Load ProtT5 model and tokenizer."""
+        token = get_hf_token()
+        
+        full_model_name = (
+            self.model_name
+            if self.model_name.startswith("Rostlab/")
+            else f"Rostlab/{self.model_name}"
+        )
+        
+        model = T5Model.from_pretrained(full_model_name, use_auth_token=token)
+        tokenizer = T5Tokenizer.from_pretrained(
+            full_model_name, use_auth_token=token, do_lower_case=False
+        )
+        
+        # Move to device
+        model = model.to(self.device)
+        
+        self.model = model
+        self.tokenizer = tokenizer
+        
+        return model, tokenizer
+    
+    def preprocess_sequence(self, sequence: str) -> str:
+        """ProtT5 needs space-separated sequences with rare AAs mapped to X."""
+        return preprocess_sequence_for_prott5(sequence)
+    
+    def get_batch_embeddings(
+        self, 
+        sequences: List[str], 
+        pool_embeddings: bool = True
+    ) -> List[NDArray[np.float64]]:
+        """Get embeddings for a batch of sequences using ProtT5."""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows they're not None
+        model = cast(T5Model, self.model)
+        tokenizer = cast(T5Tokenizer, self.tokenizer)
+        
+        # Preprocess sequences for ProtT5
+        processed_sequences = [self.preprocess_sequence(seq) for seq in sequences]
+        
+        inputs = tokenizer.batch_encode_plus(
+            processed_sequences, 
+            add_special_tokens=True, 
+            padding="longest",
+            return_tensors="pt"
+        )
+        
+        # Move inputs to device
+        input_ids = inputs['input_ids'].to(self.device)
+        attention_mask = inputs['attention_mask'].to(self.device)
+        
+        with torch.no_grad():
+            # For ProtT5, use encoder embeddings for feature extraction
+            # Create dummy decoder inputs (just the pad token)
+            batch_size = input_ids.shape[0]
+            decoder_input_ids = torch.full(
+                (batch_size, 1), 
+                tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0, 
+                dtype=torch.long,
+                device=self.device
+            )
+            
+            outputs = model(
+                input_ids=input_ids, 
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids
+            )
+            
+            # Get encoder last hidden state (encoder embeddings)
+            hidden_states = outputs.encoder_last_hidden_state.cpu().numpy()
+
+        if pool_embeddings:
+            # Mean pooling across sequence length, excluding padding tokens
+            embedding_list = []
+            for i, hidden_state in enumerate(hidden_states):
+                # Get actual sequence length (excluding padding)
+                attention_mask_np = attention_mask[i].cpu().numpy()
+                seq_len = attention_mask_np.sum()
+                # Pool only over actual sequence tokens
+                pooled_embedding = hidden_state[:seq_len].mean(axis=0)
+                embedding_list.append(pooled_embedding)
+            return embedding_list
+        return list(hidden_states)
+    
+    def get_single_embedding_last_hidden_state(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get last hidden state embedding for a single sequence."""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows they're not None
+        model = cast(T5Model, self.model)
+        tokenizer = cast(T5Tokenizer, self.tokenizer)
+        
+        processed_sequence = self.preprocess_sequence(sequence)
+        inputs = tokenizer.encode_plus(
+            processed_sequence,
+            add_special_tokens=True,
+            return_tensors="pt"
+        )
+        
+        input_ids = inputs['input_ids'].to(self.device)
+        attention_mask = inputs['attention_mask'].to(self.device)
+        
+        # Create dummy decoder inputs
+        decoder_input_ids = torch.full(
+            (1, 1), 
+            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0, 
+            dtype=torch.long,
+            device=self.device
+        )
+        
+        with torch.no_grad():
+            outputs = model(
+                input_ids=input_ids, 
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids
+            )
+        
+        # Get encoder last hidden state including special tokens
+        embedding = outputs.encoder_last_hidden_state[0].detach().cpu().numpy()
+        return embedding
+    
+    def get_single_embedding_all_layers(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get embeddings from all layers for a single sequence."""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows they're not None
+        model = cast(T5Model, self.model)
+        tokenizer = cast(T5Tokenizer, self.tokenizer)
+        
+        processed_sequence = self.preprocess_sequence(sequence)
+        inputs = tokenizer.encode_plus(
+            processed_sequence,
+            add_special_tokens=True,
+            return_tensors="pt"
+        )
+        
+        input_ids = inputs['input_ids'].to(self.device)
+        attention_mask = inputs['attention_mask'].to(self.device)
+        
+        # Create dummy decoder inputs
+        decoder_input_ids = torch.full(
+            (1, 1), 
+            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0, 
+            dtype=torch.long,
+            device=self.device
+        )
+        
+        with torch.no_grad():
+            outputs = model(
+                input_ids=input_ids, 
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids,
+                output_hidden_states=True
+            )
+        
+        embeddings_list = []
+        # Get all encoder hidden states
+        encoder_hidden_states = outputs.encoder_hidden_states
+        for layer_tensor in encoder_hidden_states:
+            # Remove batch dimension but keep special tokens
+            emb = layer_tensor[0].detach().cpu().numpy()
+            emb = normalize_embedding(emb)
+            embeddings_list.append(emb)
+
+        return np.array(embeddings_list)
+    
+    def get_single_embedding_first_layer(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """Get first layer embedding for a single sequence."""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        
+        # Type cast to ensure type checker knows they're not None
+        model = cast(T5Model, self.model)
+        tokenizer = cast(T5Tokenizer, self.tokenizer)
+        
+        processed_sequence = self.preprocess_sequence(sequence)
+        inputs = tokenizer.encode_plus(
+            processed_sequence,
+            add_special_tokens=True,
+            return_tensors="pt"
+        )
+        
+        input_ids = inputs['input_ids'].to(self.device)
+        attention_mask = inputs['attention_mask'].to(self.device)
+        
+        # Create dummy decoder inputs
+        decoder_input_ids = torch.full(
+            (1, 1), 
+            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0, 
+            dtype=torch.long,
+            device=self.device
+        )
+        
+        with torch.no_grad():
+            outputs = model(
+                input_ids=input_ids, 
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids,
+                output_hidden_states=True
+            )
+        
+        # Get first encoder hidden state including special tokens
+        embedding = outputs.encoder_hidden_states[0][0].detach().cpu().numpy()
+        
+        # Normalize the embedding
+        embedding = normalize_embedding(embedding)
+        return embedding 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/processor.py b/src/pyeed/embeddings/processor.py
new file mode 100644
index 00000000..d025f1c0
--- /dev/null
+++ b/src/pyeed/embeddings/processor.py
@@ -0,0 +1,482 @@
+"""
+Main embedding processor for coordinating embedding operations.
+
+Provides high-level interfaces for batch processing, single sequence processing,
+and database operations with automatic device management and model loading.
+"""
+
+from typing import List, Union, Any, Literal, Optional
+import torch
+from torch.nn import DataParallel, Module
+from loguru import logger
+import numpy as np
+from numpy.typing import NDArray
+import time
+from concurrent.futures import ThreadPoolExecutor
+import os
+
+from .factory import ModelFactory
+from .base import BaseEmbeddingModel
+from .models import ESM2EmbeddingModel, ESMCEmbeddingModel, ESM3EmbeddingModel, ProtT5EmbeddingModel
+from .database import update_protein_embeddings_in_db
+from .utils import free_memory
+from pyeed.dbconnect import DatabaseConnector
+
+
+class EmbeddingProcessor:
+    """
+    Main processor for handling protein embedding operations.
+    
+    Automatically manages device selection, model loading, and provides
+    simplified interfaces for all embedding operations.
+    """
+    
+    def __init__(self):
+        self._models: dict[str, BaseEmbeddingModel] = {}
+        self._devices: List[torch.device] = []
+        self._initialize_devices()
+    
+    def _initialize_devices(self) -> None:
+        """Initialize available devices for computation."""
+        if torch.cuda.is_available():
+            device_count = torch.cuda.device_count()
+            self._devices = [torch.device(f"cuda:{i}") for i in range(device_count)]
+            logger.info(f"Initialized {device_count} GPU device(s): {self._devices}")
+        else:
+            self._devices = [torch.device("cpu")]
+            logger.warning("No GPU available, using CPU.")
+    
+    def get_available_devices(self) -> List[torch.device]:
+        """Get list of available devices."""
+        return self._devices.copy()
+    
+    def get_or_create_model(
+        self, 
+        model_name: str, 
+        device: Optional[torch.device] = None
+    ) -> BaseEmbeddingModel:
+        """Get existing model or create new one on specified or best available device."""
+        if device is None:
+            device = self._devices[0]  # Use first available device
+        
+        key = f"{model_name}_{device}"
+        if key not in self._models:
+            self._models[key] = ModelFactory.create_model(model_name, device)
+            logger.info(f"Loaded model {model_name} on {device}")
+        return self._models[key]
+    
+    def calculate_batch_embeddings(
+        self,
+        data: List[tuple[str, str]],
+        model_name: str = "facebook/esm2_t33_650M_UR50D",
+        batch_size: int = 16,
+        num_gpus: Optional[int] = None,
+        db: Optional[DatabaseConnector] = None,
+        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "last_hidden_state"
+    ) -> Optional[List[NDArray[np.float64]]]:
+        """
+        Calculate embeddings for a batch of sequences with automatic device management.
+        
+        Args:
+            data: List of (accession_id, sequence) tuples
+            model_name: Name of the model to use
+            batch_size: Batch size for processing
+            num_gpus: Number of GPUs to use (None = use all available)
+            db: Database connector for storing results (optional)
+            embedding_type: Type of embedding to calculate:
+                - "last_hidden_state": Use last hidden state (most common)
+                - "all_layers": Average across all transformer layers  
+                - "first_layer": Use first layer embedding
+                - "final_embeddings": Robust option that works across all models (recommended for compatibility)
+            
+        Returns:
+            List of embeddings if db is None, otherwise None (results stored in DB)
+        """
+        # Disable tqdm to prevent threading issues with multiple GPUs
+        os.environ['DISABLE_TQDM'] = 'True'
+        
+        if not data:
+            logger.info("No sequences to process.")
+            return []
+        
+        # Determine number of GPUs to use
+        available_gpus = len([d for d in self._devices if d.type == 'cuda'])
+        if num_gpus is None:
+            num_gpus = available_gpus
+        else:
+            num_gpus = min(num_gpus, available_gpus)
+        
+        if num_gpus == 0:
+            devices_to_use = [torch.device("cpu")]
+            num_gpus = 1
+        else:
+            devices_to_use = [torch.device(f"cuda:{i}") for i in range(num_gpus)]
+        
+        logger.info(f"Processing {len(data)} sequences using {num_gpus} device(s)")
+        
+        # Load models for each device
+        models = []
+        for device in devices_to_use:
+            try:
+                model = self.get_or_create_model(model_name, device)
+                models.append(model)
+            except Exception as e:
+                if "tqdm" in str(e).lower() or "_lock" in str(e).lower():
+                    logger.warning(f"Model loading failed on {device} due to threading issue. Reducing to single GPU mode.")
+                    # Fall back to single GPU mode to avoid threading issues
+                    devices_to_use = [devices_to_use[0]]
+                    num_gpus = 1
+                    models = [self.get_or_create_model(model_name, devices_to_use[0])]
+                    break
+                else:
+                    raise e
+        
+        # Split data across devices
+        gpu_batches = [
+            data[i::num_gpus] for i in range(num_gpus)
+        ]
+        
+        start_time = time.time()
+        all_embeddings = []
+        
+        if num_gpus == 1:
+            # Single device processing
+            embeddings = self._process_batch_single_device(
+                gpu_batches[0], models[0], batch_size, db, embedding_type
+            )
+            all_embeddings.extend(embeddings)
+        else:
+            # Multi-device parallel processing
+            with ThreadPoolExecutor(max_workers=num_gpus) as executor:
+                futures = []
+                for i, gpu_data in enumerate(gpu_batches):
+                    if not gpu_data:
+                        continue
+                    
+                    futures.append(
+                        executor.submit(
+                            self._process_batch_single_device,
+                            gpu_data,
+                            models[i],
+                            batch_size,
+                            db,
+                            embedding_type
+                        )
+                    )
+                
+                for future in futures:
+                    embeddings = future.result()
+                    all_embeddings.extend(embeddings)
+        
+        end_time = time.time()
+        logger.info(f"Batch processing completed in {end_time - start_time:.2f} seconds")
+        
+        return all_embeddings if db is None else None
+    
+    def _process_batch_single_device(
+        self,
+        data: List[tuple[str, str]],
+        model: BaseEmbeddingModel,
+        batch_size: int,
+        db: Optional[DatabaseConnector] = None,
+        embedding_type: str = "last_hidden_state"
+    ) -> List[NDArray[np.float64]]:
+        """Process batch on a single device."""
+        all_embeddings = []
+        
+        for batch_start in range(0, len(data), batch_size):
+            batch_end = min(batch_start + batch_size, len(data))
+            batch = data[batch_start:batch_end]
+            
+            accessions, sequences = zip(*batch)
+            current_batch_size = len(sequences)
+            
+            while current_batch_size > 0:
+                try:
+                    # Calculate embeddings based on type
+                    if embedding_type == "last_hidden_state":
+                        # no batching for last hidden state
+                        embeddings_batch = [
+                            model.get_single_embedding_last_hidden_state(seq)
+                            for seq in sequences[:current_batch_size]
+                        ]
+                    elif embedding_type == "all_layers":
+                        embeddings_batch = [
+                            model.get_single_embedding_all_layers(seq)
+                            for seq in sequences[:current_batch_size]
+                        ]
+                    elif embedding_type == "first_layer":
+                        embeddings_batch = [
+                            model.get_single_embedding_first_layer(seq)
+                            for seq in sequences[:current_batch_size]
+                        ]
+                    elif embedding_type == "final_embeddings":
+                        embeddings_batch = [
+                            model.get_final_embeddings(seq)
+                            for seq in sequences[:current_batch_size]
+                        ]
+                    else:
+                        raise ValueError(f"Unknown embedding_type: {embedding_type}")
+                    
+                    # Store in database if provided
+                    if db is not None:
+                        update_protein_embeddings_in_db(
+                            db, list(accessions[:current_batch_size]), embeddings_batch
+                        )
+                    
+                    all_embeddings.extend(embeddings_batch)
+                    break  # Successful execution
+                
+                except torch.cuda.OutOfMemoryError:
+                    torch.cuda.empty_cache()
+                    current_batch_size = max(1, current_batch_size // 2)
+                    logger.warning(f"Reduced batch size to {current_batch_size} due to OOM error.")
+        
+        return all_embeddings
+    
+    def calculate_single_embedding(
+        self,
+        sequence: str,
+        model_name: str = "facebook/esm2_t33_650M_UR50D",
+        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "last_hidden_state",
+        device: Optional[torch.device] = None
+    ) -> NDArray[np.float64]:
+        """
+        Calculate embedding for a single sequence.
+        
+        Args:
+            sequence: Protein sequence
+            model_name: Name of the model to use
+            embedding_type: Type of embedding to calculate
+            device: Specific device to use (optional)
+            
+        Returns:
+            Embedding as numpy array
+        """
+        model = self.get_or_create_model(model_name, device)
+        
+        if embedding_type == "last_hidden_state":
+            return model.get_single_embedding_last_hidden_state(sequence)
+        elif embedding_type == "all_layers":
+            return model.get_single_embedding_all_layers(sequence)
+        elif embedding_type == "first_layer":
+            return model.get_single_embedding_first_layer(sequence)
+        elif embedding_type == "final_embeddings":
+            return model.get_final_embeddings(sequence)
+        else:
+            raise ValueError(f"Unknown embedding_type: {embedding_type}")
+    
+    def calculate_database_embeddings(
+        self,
+        db: DatabaseConnector,
+        batch_size: int = 16,
+        model_name: str = "facebook/esm2_t33_650M_UR50D",
+        num_gpus: Optional[int] = None,
+        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "last_hidden_state"
+    ) -> None:
+        """
+        Calculate embeddings for all sequences in database that don't have embeddings.
+        
+        Args:
+            db: Database connector
+            batch_size: Batch size for processing
+            model_name: Name of the model to use
+            num_gpus: Number of GPUs to use (None = use all available)
+            embedding_type: Type of embedding to calculate
+        """
+        # Retrieve sequences without embeddings
+        query = """
+        MATCH (p:Protein)
+        WHERE p.embedding IS NULL AND p.sequence IS NOT NULL
+        RETURN p.accession_id AS accession, p.sequence AS sequence
+        """
+        results = db.execute_read(query)
+        data = [(result["accession"], result["sequence"]) for result in results]
+        
+        if not data:
+            logger.info("No sequences to process.")
+            return
+        
+        logger.info(f"Found {len(data)} sequences without embeddings")
+        
+        # Process using batch embedding method
+        self.calculate_batch_embeddings(
+            data=data,
+            model_name=model_name,
+            batch_size=batch_size,
+            num_gpus=num_gpus,
+            db=db,
+            embedding_type=embedding_type
+        )
+    
+    # Legacy compatibility methods (for backward compatibility with existing processor.py)
+    def process_batches_on_gpu(
+        self,
+        data: List[tuple[str, str]],
+        batch_size: int,
+        model: Union[Any, DataParallel[Module]],
+        tokenizer: Union[Any, None],
+        db: DatabaseConnector,
+        device: torch.device,
+    ) -> None:
+        """Legacy method for backward compatibility."""
+        logger.warning("Using legacy process_batches_on_gpu method. Consider using calculate_batch_embeddings instead.")
+        
+        # Convert to new interface
+        accessions, sequences = zip(*data)
+        embedding_data = list(zip(accessions, sequences))
+        
+        # Use new method
+        self.calculate_batch_embeddings(
+            data=embedding_data,
+            batch_size=batch_size,
+            db=db
+        )
+    
+    def get_batch_embeddings_unified(
+        self,
+        batch_sequences: List[str],
+        model: Union[Any, DataParallel[Module]],
+        tokenizer: Union[Any, None],
+        device: torch.device = torch.device("cuda:0"),
+        pool_embeddings: bool = True,
+    ) -> List[NDArray[np.float64]]:
+        """Legacy method for backward compatibility."""
+        logger.warning("Using legacy get_batch_embeddings_unified method.")
+        
+        # Determine model type from the actual model instance
+        base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
+        model_type = type(base_model).__name__
+        
+        # Map model class names to our model types
+        if "ESMC" in model_type:
+            embedding_model = ESMCEmbeddingModel("", device)
+            embedding_model.model = base_model
+            return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
+        elif "ESM3" in model_type:
+            embedding_model = ESM3EmbeddingModel("", device)
+            embedding_model.model = base_model
+            return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
+        elif "T5Model" in model_type:
+            embedding_model = ProtT5EmbeddingModel("", device)
+            embedding_model.model = base_model
+            embedding_model.tokenizer = tokenizer
+            return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
+        else:  # ESM-2 and other ESM models
+            embedding_model = ESM2EmbeddingModel("", device)
+            embedding_model.model = base_model
+            embedding_model.tokenizer = tokenizer
+            return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
+    
+    def calculate_single_sequence_embedding_last_hidden_state(
+        self,
+        sequence: str,
+        device: torch.device = torch.device("cuda:0"),
+        model_name: str = "facebook/esm2_t33_650M_UR50D",
+    ) -> NDArray[np.float64]:
+        """Legacy method for backward compatibility."""
+        return self.calculate_single_embedding(sequence, model_name, "last_hidden_state", device)
+    
+    def calculate_single_sequence_embedding_all_layers(
+        self,
+        sequence: str,
+        device: torch.device,
+        model_name: str = "facebook/esm2_t33_650M_UR50D",
+    ) -> NDArray[np.float64]:
+        """Legacy method for backward compatibility."""
+        return self.calculate_single_embedding(sequence, model_name, "all_layers", device)
+    
+    def calculate_single_sequence_embedding_first_layer(
+        self,
+        sequence: str,
+        model_name: str = "facebook/esm2_t33_650M_UR50D",
+        device: torch.device = torch.device("cuda:0"),
+    ) -> NDArray[np.float64]:
+        """Legacy method for backward compatibility."""
+        return self.calculate_single_embedding(sequence, model_name, "first_layer", device)
+    
+    def get_single_embedding_last_hidden_state(
+        self, 
+        sequence: str, 
+        model: Any, 
+        tokenizer: Any, 
+        device: torch.device
+    ) -> NDArray[np.float64]:
+        """Legacy method for backward compatibility."""
+        logger.warning("Using legacy get_single_embedding_last_hidden_state method.")
+        return self._get_single_embedding_legacy(sequence, model, tokenizer, device, "last_hidden_state")
+    
+    def get_single_embedding_all_layers(
+        self, 
+        sequence: str, 
+        model: Any, 
+        tokenizer: Any, 
+        device: torch.device
+    ) -> NDArray[np.float64]:
+        """Legacy method for backward compatibility."""
+        logger.warning("Using legacy get_single_embedding_all_layers method.")
+        return self._get_single_embedding_legacy(sequence, model, tokenizer, device, "all_layers")
+    
+    def get_single_embedding_first_layer(
+        self, 
+        sequence: str, 
+        model: Any, 
+        tokenizer: Any, 
+        device: torch.device
+    ) -> NDArray[np.float64]:
+        """Legacy method for backward compatibility."""
+        logger.warning("Using legacy get_single_embedding_first_layer method.")
+        return self._get_single_embedding_legacy(sequence, model, tokenizer, device, "first_layer")
+    
+    def _get_single_embedding_legacy(
+        self, 
+        sequence: str, 
+        model: Any, 
+        tokenizer: Any, 
+        device: torch.device,
+        embedding_type: str
+    ) -> NDArray[np.float64]:
+        """Helper method for legacy single embedding methods."""
+        # Determine model type and create appropriate embedding model
+        base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
+        model_type = type(base_model).__name__
+        
+        if "ESMC" in model_type:
+            embedding_model = ESMCEmbeddingModel("", device)
+            embedding_model.model = base_model
+        elif "ESM3" in model_type:
+            embedding_model = ESM3EmbeddingModel("", device)
+            embedding_model.model = base_model
+        elif "T5Model" in model_type:
+            embedding_model = ProtT5EmbeddingModel("", device)
+            embedding_model.model = base_model
+            embedding_model.tokenizer = tokenizer
+        else:  # ESM-2 and other ESM models
+            embedding_model = ESM2EmbeddingModel("", device)
+            embedding_model.model = base_model
+            embedding_model.tokenizer = tokenizer
+        
+        if embedding_type == "last_hidden_state":
+            return embedding_model.get_single_embedding_last_hidden_state(sequence)
+        elif embedding_type == "all_layers":
+            return embedding_model.get_single_embedding_all_layers(sequence)
+        elif embedding_type == "first_layer":
+            return embedding_model.get_single_embedding_first_layer(sequence)
+        else:
+            raise ValueError(f"Unknown embedding_type: {embedding_type}")
+    
+    def cleanup(self) -> None:
+        """Clean up all models and free memory."""
+        for model in self._models.values():
+            model.cleanup()
+        self._models.clear()
+        free_memory()
+
+
+# Global processor instance
+_processor = EmbeddingProcessor()
+
+
+def get_processor() -> EmbeddingProcessor:
+    """Get the global embedding processor instance."""
+    return _processor 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/utils.py b/src/pyeed/embeddings/utils.py
new file mode 100644
index 00000000..6559a66f
--- /dev/null
+++ b/src/pyeed/embeddings/utils.py
@@ -0,0 +1,77 @@
+"""
+Utility functions for embedding operations.
+
+Contains helper functions for token management, memory management, 
+and sequence preprocessing.
+"""
+
+import gc
+import os
+import re
+from huggingface_hub import HfFolder, login
+import torch
+
+
+def get_hf_token() -> str:
+    """Get or request Hugging Face token."""
+    if os.getenv("PYTEST_DISABLE_HF_LOGIN"):  # Disable Hugging Face login in tests
+        return "dummy_token_for_tests"
+
+    hf_folder = HfFolder()
+    token = hf_folder.get_token()
+    if not token:
+        login()  # Login returns None, get token after login
+        token = hf_folder.get_token()
+
+    if isinstance(token, str):
+        return token
+    else:
+        raise RuntimeError("Failed to get Hugging Face token")
+
+
+def preprocess_sequence_for_prott5(sequence: str) -> str:
+    """
+    Preprocesses a protein sequence for ProtT5 models.
+    
+    Args:
+        sequence: Raw protein sequence
+        
+    Returns:
+        Preprocessed sequence with spaces between amino acids and rare AAs mapped to X
+    """
+    # Map rare amino acids to X and add spaces between amino acids
+    sequence = re.sub(r"[UZOB]", "X", sequence.upper())
+    return " ".join(list(sequence))
+
+
+def free_memory() -> None:
+    """
+    Frees up memory by invoking garbage collection and clearing GPU caches.
+    """
+    gc.collect()
+    if torch.backends.mps.is_available():
+        torch.mps.empty_cache()
+    elif torch.cuda.is_available():
+        torch.cuda.empty_cache()
+
+
+def determine_model_type(model_name: str) -> str:
+    """
+    Determine the model type based on model name.
+    
+    Args:
+        model_name: Name of the model
+        
+    Returns:
+        Model type string
+    """
+    model_name_lower = model_name.lower()
+    
+    if "esmc" in model_name_lower:
+        return "esmc"
+    elif "esm3" in model_name_lower:
+        return "esm3"
+    elif "prot_t5" in model_name_lower or "prott5" in model_name_lower:
+        return "prott5"
+    else:
+        return "esm2"  # Default to ESM-2 for other facebook/esm models 
\ No newline at end of file
diff --git a/src/pyeed/main.py b/src/pyeed/main.py
index af09e59b..206cd644 100644
--- a/src/pyeed/main.py
+++ b/src/pyeed/main.py
@@ -14,10 +14,7 @@
 from pyeed.adapter.uniprot_mapper import UniprotToPyeed
 from pyeed.dbchat import DBChat
 from pyeed.dbconnect import DatabaseConnector
-from pyeed.embedding import (
-    load_model_and_tokenizer,
-    process_batches_on_gpu,
-)
+from pyeed.embeddings import get_processor, free_memory
 
 
 class Pyeed:
@@ -209,92 +206,32 @@ def calculate_sequence_embeddings(
         batch_size: int = 16,
         model_name: str = "facebook/esm2_t33_650M_UR50D",
         num_gpus: int = 1,  # Number of GPUs to use
+        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "final_embeddings"
     ) -> None:
         """
         Calculates embeddings for all sequences in the database that do not have embeddings,
-        distributing the workload across available GPUs.
+        using the new EmbeddingProcessor with automatic device management.
 
         Args:
             batch_size (int): Number of sequences to process in each batch.
             model_name (str): Model used for calculating embeddings.
             num_gpus (int, optional): Number of GPUs to use. If None, use all available GPUs.
+            embedding_type (str): Type of embedding to calculate ("last_hidden_state", "all_layers", "first_layer", "final_embeddings").
         """
-
-        # Get the available GPUs
-        available_gpus = torch.cuda.device_count()
-        if num_gpus is None or num_gpus > available_gpus:
-            num_gpus = available_gpus
-
-        if num_gpus == 0:
-            logger.warning("No GPU available! Running on CPU.")
-
-        # Load separate models for each GPU
-        devices = (
-            [torch.device(f"cuda:{i}") for i in range(num_gpus)]
-            if num_gpus > 0
-            else [torch.device("cpu")]
-        )
-
-        models_and_tokenizers = [
-            load_model_and_tokenizer(model_name, device) for device in devices
-        ]
-
-        # Retrieve sequences without embeddings
-        query = """
-        MATCH (p:Protein)
-        WHERE p.embedding IS NULL AND p.sequence IS NOT NULL
-        RETURN p.accession_id AS accession, p.sequence AS sequence
-        """
-        results = self.db.execute_read(query)
-        data = [(result["accession"], result["sequence"]) for result in results]
-
-        if not data:
-            logger.info("No sequences to process.")
-            return
-
-        accessions, sequences = zip(*data)
-        total_sequences = len(sequences)
-        logger.debug(f"Total sequences to process: {total_sequences}")
-
-        # Split the data into num_gpus chunks
-        gpu_batches = [
-            list(zip(accessions[i::num_gpus], sequences[i::num_gpus]))
-            for i in range(num_gpus)
-        ]
-
-        start_time = time.time()
-
-        # Process batches in parallel across GPUs
-        with ThreadPoolExecutor(max_workers=num_gpus) as executor:
-            futures = []
-            for i, gpu_data in enumerate(gpu_batches):
-                if not gpu_data:
-                    continue  # Skip empty GPU batches
-
-                model, tokenizer, device = models_and_tokenizers[i]
-                futures.append(
-                    executor.submit(
-                        process_batches_on_gpu,
-                        gpu_data,
-                        batch_size,
-                        model,
-                        tokenizer,
-                        self.db,
-                        device,
-                    )
-                )
-
-            for future in futures:
-                future.result()  # Wait for all threads to complete
-
-        end_time = time.time()
-        logger.info(
-            f"Total embedding calculation time: {end_time - start_time:.2f} seconds"
+        # Get the embedding processor
+        processor = get_processor()
+        
+        # Use the simplified interface
+        processor.calculate_database_embeddings(
+            db=self.db,
+            batch_size=batch_size,
+            model_name=model_name,
+            num_gpus=num_gpus,
+            embedding_type=embedding_type
         )
 
-        # Cleanup
-        for model, _, _ in models_and_tokenizers:
-            del model
+        # free memory
+        free_memory()
 
     def get_proteins(self, accession_ids: list[str]) -> list[dict[str, Any]]:
         """
@@ -534,3 +471,38 @@ def create_coding_sequences_regions(self) -> None:
         """
         result = self.db.execute_read(count_query)
         logger.info(f"Created {result[0]['region_count']} coding sequence regions")
+
+    def calculate_single_sequence_embedding(
+        self,
+        sequence: str,
+        model_name: str = "facebook/esm2_t33_650M_UR50D",
+        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "last_hidden_state"
+    ) -> Any:
+        """
+        Calculate embedding for a single protein sequence.
+        
+        Args:
+            sequence: Protein sequence string
+            model_name: Model to use for embedding calculation
+            embedding_type: Type of embedding to calculate
+            
+        Returns:
+            Numpy array containing the embedding
+        """
+        processor = get_processor()
+        return processor.calculate_single_embedding(
+            sequence=sequence,
+            model_name=model_name,
+            embedding_type=embedding_type
+        )
+    
+    def get_available_devices(self) -> list[str]:
+        """
+        Get list of available devices for embedding computation.
+        
+        Returns:
+            List of available device names
+        """
+        processor = get_processor()
+        devices = processor.get_available_devices()
+        return [str(device) for device in devices]

From d3b7639897ef72e58d511242a07286ac2932b90f Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 30 May 2025 09:21:37 +0000
Subject: [PATCH 03/11] fixed special token errors

---
 src/pyeed/embeddings/models/esmc.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/pyeed/embeddings/models/esmc.py b/src/pyeed/embeddings/models/esmc.py
index 04690dc4..245a2172 100644
--- a/src/pyeed/embeddings/models/esmc.py
+++ b/src/pyeed/embeddings/models/esmc.py
@@ -97,6 +97,8 @@ def get_batch_embeddings(
                         "Model did not return embeddings. Check LogitsConfig settings."
                     )
                 embeddings = logits_output.embeddings.cpu().numpy()
+                # drop the special tokens
+                embeddings = embeddings[:, 1:-1, :]
                 if pool_embeddings:
                     embeddings = embeddings.mean(axis=1)
                 embedding_list.append(embeddings[0])
@@ -130,8 +132,9 @@ def get_single_embedding_last_hidden_state(
                     "Model did not return hidden states. Check LogitsConfig settings."
                 )
 
+            # remove special tokens
             embedding = (
-                logits_output.hidden_states[-1][0].to(torch.float32).cpu().numpy()
+                logits_output.hidden_states[-1][0][1:-1].to(torch.float32).cpu().numpy()
             )
 
         # Normalize the embedding
@@ -252,6 +255,8 @@ def get_final_embeddings(
                         
                         # Get embeddings and pool them properly
                         embeddings = logits_output.embeddings.cpu().numpy()
+                        logger.info(f"Embeddings shape: {embeddings.shape}")
+                        
                         # Pool across sequence dimension to get single vector
                         pooled_embedding = embeddings.mean(axis=1)[0]
                         

From cdd88c986f8c6edd75dbc22f8a34701c0128a3e6 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 30 May 2025 09:56:53 +0000
Subject: [PATCH 04/11] major linter refactor

---
 src/pyeed/embedding.py                |  41 +++---
 src/pyeed/embeddings/__init__.py      | 199 +++++++++++++++++++-------
 src/pyeed/embeddings/base.py          |  14 +-
 src/pyeed/embeddings/models/esm2.py   |  18 +--
 src/pyeed/embeddings/models/esm3.py   |  64 ++++-----
 src/pyeed/embeddings/models/esmc.py   |   4 +-
 src/pyeed/embeddings/models/prott5.py |  20 ++-
 src/pyeed/embeddings/processor.py     |  46 ++----
 8 files changed, 246 insertions(+), 160 deletions(-)

diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index d5b933b0..522f198a 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -1,7 +1,7 @@
 import gc
 import os
 import re
-from typing import Any, Tuple, Union
+from typing import Any, Tuple, Union, List
 
 import numpy as np
 import torch
@@ -188,7 +188,7 @@ def get_batch_embeddings(
 
     if isinstance(base_model, ESMC):
         # For ESMC models
-        embedding_list = []
+        embedding_list: List[NDArray[np.float64]] = []
         with torch.no_grad():
             for sequence in batch_sequences:
                 protein = ESMProtein(sequence=sequence)
@@ -208,7 +208,7 @@ def get_batch_embeddings(
         return embedding_list
     elif isinstance(base_model, ESM3):
         # For ESM3 models
-        embedding_list = []
+        embedding_list_esm3: List[NDArray[np.float64]] = []
         with torch.no_grad():
             for sequence in batch_sequences:
                 protein = ESMProtein(sequence=sequence)
@@ -224,8 +224,8 @@ def get_batch_embeddings(
                 )
                 if pool_embeddings:
                     embeddings = embeddings.mean(axis=0)
-                embedding_list.append(embeddings)
-        return embedding_list
+                embedding_list_esm3.append(embeddings)
+        return embedding_list_esm3
     elif isinstance(base_model, T5Model):
         # For ProtT5 models
         assert tokenizer_or_alphabet is not None, "Tokenizer required for ProtT5 models"
@@ -265,15 +265,15 @@ def get_batch_embeddings(
 
         if pool_embeddings:
             # Mean pooling across sequence length, excluding padding tokens
-            embedding_list = []
+            prott5_embedding_list: List[NDArray[np.float64]] = []
             for i, hidden_state in enumerate(hidden_states):
                 # Get actual sequence length (excluding padding)
                 attention_mask_np = attention_mask[i].cpu().numpy()
                 seq_len = attention_mask_np.sum()
                 # Pool only over actual sequence tokens
                 pooled_embedding = hidden_state[:seq_len].mean(axis=0)
-                embedding_list.append(pooled_embedding)
-            return embedding_list
+                prott5_embedding_list.append(pooled_embedding)
+            return prott5_embedding_list
         return list(hidden_states)
     else:
         # ESM-2 logic
@@ -404,7 +404,12 @@ def get_single_embedding_last_hidden_state(
             outputs = model(**inputs)
             embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy()
 
-    return embedding  # type: ignore
+    # Ensure embedding is a numpy array with proper dtype and normalize it
+    embedding = np.asarray(embedding, dtype=np.float64)
+    norm = np.linalg.norm(embedding, axis=1, keepdims=True)
+    norm[norm == 0] = 1.0  # Handle zero norm case
+    normalized_embedding = embedding / norm
+    return np.asarray(normalized_embedding, dtype=np.float64)
 
 
 def get_single_embedding_all_layers(
@@ -428,7 +433,7 @@ def get_single_embedding_all_layers(
         NDArray[np.float64]: A numpy array containing the normalized token embeddings
         concatenated across all layers.
     """
-    embeddings_list = []
+    embeddings_list: List[NDArray[np.float64]] = []
     with torch.no_grad():
         if isinstance(model, ESMC):
             # For ESM-3: Use ESMProtein and request hidden states via LogitsConfig
@@ -520,7 +525,7 @@ def get_single_embedding_first_layer(
     """
     Generates normalized embeddings for each token in the sequence using the first layer.
     """
-    embeddings_list = []
+    embedding: NDArray[np.float64]
 
     with torch.no_grad():
         if isinstance(model, ESMC):
@@ -551,13 +556,13 @@ def get_single_embedding_first_layer(
 
             protein = ESMProtein(sequence=sequence)
             protein_tensor = model.encode(protein)
-            embedding = model.forward_and_sample(
+            result = model.forward_and_sample(
                 protein_tensor,
                 SamplingConfig(return_per_residue_embeddings=True),
             )
-            if embedding is None or embedding.per_residue_embedding is None:
+            if result is None or result.per_residue_embedding is None:
                 raise ValueError("Model did not return embeddings")
-            embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy()
+            embedding = result.per_residue_embedding.to(torch.float32).cpu().numpy()
 
         elif isinstance(model, T5Model):
             # ProtT5 logic - get first layer embedding
@@ -594,10 +599,12 @@ def get_single_embedding_first_layer(
             # Get the first layer's hidden states for all residues (excluding special tokens)
             embedding = outputs.hidden_states[0][0, 1:-1, :].detach().cpu().numpy()
 
-    # Ensure embedding is a numpy array and normalize it
+    # Ensure embedding is a numpy array with proper dtype and normalize it
     embedding = np.asarray(embedding, dtype=np.float64)
-    embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True)
-    return embedding
+    norm = np.linalg.norm(embedding, axis=1, keepdims=True)
+    norm[norm == 0] = 1.0  # Handle zero norm case
+    normalized_embedding = embedding / norm
+    return np.asarray(normalized_embedding, dtype=np.float64)
 
 def free_memory() -> None:
     """
diff --git a/src/pyeed/embeddings/__init__.py b/src/pyeed/embeddings/__init__.py
index 9d13238c..81cc8a83 100644
--- a/src/pyeed/embeddings/__init__.py
+++ b/src/pyeed/embeddings/__init__.py
@@ -5,6 +5,15 @@
 with the original embedding.py interface.
 """
 
+from typing import Any, Tuple, Union, List, Optional, cast
+import torch
+from torch.nn import DataParallel, Module
+from numpy.typing import NDArray
+import numpy as np
+from transformers import EsmModel, EsmTokenizer, T5Model, T5Tokenizer
+from esm.models.esmc import ESMC
+from esm.models.esm3 import ESM3
+
 # New organized structure
 from .base import BaseEmbeddingModel, ModelType, normalize_embedding
 from .factory import ModelFactory
@@ -13,59 +22,145 @@
 from .database import update_protein_embeddings_in_db
 from .models import ESM2EmbeddingModel, ESMCEmbeddingModel, ESM3EmbeddingModel, ProtT5EmbeddingModel
 
-# Backward compatibility imports from old embedding.py
-try:
-    from ..embedding import (
-        load_model_and_tokenizer,
-        process_batches_on_gpu,
-        get_batch_embeddings,
-        calculate_single_sequence_embedding_last_hidden_state,
-        calculate_single_sequence_embedding_all_layers,
-        calculate_single_sequence_embedding_first_layer,
-        get_single_embedding_last_hidden_state,
-        get_single_embedding_all_layers,
-        get_single_embedding_first_layer
+from pyeed.dbconnect import DatabaseConnector
+
+# Type aliases for better readability
+TokenizerType = Union[EsmTokenizer, T5Tokenizer, None]
+DeviceType = torch.device
+
+# Re-export functions from processor
+__all__ = [
+    'load_model_and_tokenizer',
+    'process_batches_on_gpu',
+    'get_batch_embeddings',
+    'calculate_single_sequence_embedding_last_hidden_state',
+    'calculate_single_sequence_embedding_all_layers',
+    'calculate_single_sequence_embedding_first_layer',
+    'get_single_embedding_last_hidden_state',
+    'get_single_embedding_all_layers',
+    'get_single_embedding_first_layer',
+]
+
+# Function implementations
+def load_model_and_tokenizer(
+    model_name: str,
+    device: Optional[DeviceType] = None,
+) -> Tuple[ModelType, TokenizerType, DeviceType]:
+    """Load model and tokenizer."""
+    if device is None:
+        device = torch.device("cuda:0")
+    return cast(Tuple[ModelType, TokenizerType, DeviceType], ModelFactory.load_model_and_tokenizer(model_name, device))
+
+
+def process_batches_on_gpu(
+    data: List[Tuple[str, str]],
+    batch_size: int,
+    model: Union[EsmModel, ESMC, ESM3, T5Model, DataParallel[Module]],
+    tokenizer: Union[EsmTokenizer, T5Tokenizer, None],
+    db: DatabaseConnector,
+    device: torch.device,
+) -> None:
+    """Process batches on GPU."""
+    processor = get_processor()
+    processor.process_batches_on_gpu(data, batch_size, model, tokenizer, db, device)
+
+
+def get_batch_embeddings(
+    batch_sequences: List[str],
+    model: Union[EsmModel, ESMC, ESM3, T5Model, DataParallel[Module]],
+    tokenizer_or_alphabet: Union[EsmTokenizer, T5Tokenizer, None],
+    device: torch.device,
+    pool_embeddings: bool = True,
+) -> List[NDArray[np.float64]]:
+    """Get batch embeddings."""
+    processor = get_processor()
+    return processor.get_batch_embeddings_unified(
+        batch_sequences, model, tokenizer_or_alphabet, device, pool_embeddings
     )
-except ImportError:
-    # If old embedding.py is not available, use processor methods for compatibility
-    _processor = get_processor()
-    
-    def load_model_and_tokenizer(model_name: str, device=None):
-        """Backward compatibility function."""
-        # This is handled internally by the processor now
-        return None, None, device
-    
-    def process_batches_on_gpu(data, batch_size, model, tokenizer, db, device):
-        """Backward compatibility function."""
-        return _processor.process_batches_on_gpu(data, batch_size, model, tokenizer, db, device)
-    
-    def get_batch_embeddings(batch_sequences, model, tokenizer, device, pool_embeddings=True):
-        """Backward compatibility function."""
-        return _processor.get_batch_embeddings_unified(batch_sequences, model, tokenizer, device, pool_embeddings)
-    
-    def calculate_single_sequence_embedding_last_hidden_state(sequence, device=None, model_name="facebook/esm2_t33_650M_UR50D"):
-        """Backward compatibility function."""
-        return _processor.calculate_single_embedding(sequence, model_name, "last_hidden_state", device)
-    
-    def calculate_single_sequence_embedding_all_layers(sequence, device, model_name="facebook/esm2_t33_650M_UR50D"):
-        """Backward compatibility function."""
-        return _processor.calculate_single_embedding(sequence, model_name, "all_layers", device)
-    
-    def calculate_single_sequence_embedding_first_layer(sequence, model_name="facebook/esm2_t33_650M_UR50D", device=None):
-        """Backward compatibility function."""
-        return _processor.calculate_single_embedding(sequence, model_name, "first_layer", device)
-    
-    def get_single_embedding_last_hidden_state(sequence, model, tokenizer, device):
-        """Backward compatibility function."""
-        return _processor.get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
-    
-    def get_single_embedding_all_layers(sequence, model, tokenizer, device):
-        """Backward compatibility function."""
-        return _processor.get_single_embedding_all_layers(sequence, model, tokenizer, device)
-    
-    def get_single_embedding_first_layer(sequence, model, tokenizer, device):
-        """Backward compatibility function."""
-        return _processor.get_single_embedding_first_layer(sequence, model, tokenizer, device)
+
+
+def calculate_single_sequence_embedding_last_hidden_state(
+    sequence: str,
+    device: Optional[torch.device] = None,
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
+) -> NDArray[np.float64]:
+    """Calculate single sequence embedding using last hidden state."""
+    if device is None:
+        device = torch.device("cuda:0")
+    processor = get_processor()
+    return processor.calculate_single_sequence_embedding_last_hidden_state(
+        sequence, device, model_name
+    )
+
+
+def calculate_single_sequence_embedding_all_layers(
+    sequence: str,
+    device: torch.device,
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
+) -> NDArray[np.float64]:
+    """Calculate single sequence embedding using all layers."""
+    processor = get_processor()
+    return processor.calculate_single_sequence_embedding_all_layers(
+        sequence, device, model_name
+    )
+
+
+def calculate_single_sequence_embedding_first_layer(
+    sequence: str,
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
+    device: Optional[torch.device] = None,
+) -> NDArray[np.float64]:
+    """Calculate single sequence embedding using first layer."""
+    if device is None:
+        device = torch.device("cuda:0")
+    processor = get_processor()
+    return processor.calculate_single_sequence_embedding_first_layer(
+        sequence, model_name, device
+    )
+
+
+def get_single_embedding_last_hidden_state(
+    sequence: str,
+    model: Union[EsmModel, ESMC, ESM3, T5Model, DataParallel[Module]],
+    tokenizer: Union[EsmTokenizer, T5Tokenizer, None],
+    device: torch.device,
+) -> NDArray[np.float64]:
+    """Get single embedding using last hidden state."""
+    processor = get_processor()
+    return processor.get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
+
+
+def get_single_embedding_all_layers(
+    sequence: str,
+    model: Union[EsmModel, ESMC, ESM3, T5Model, DataParallel[Module]],
+    tokenizer: Union[EsmTokenizer, T5Tokenizer, None],
+    device: torch.device,
+) -> NDArray[np.float64]:
+    """Get single embedding using all layers."""
+    processor = get_processor()
+    return processor.get_single_embedding_all_layers(sequence, model, tokenizer, device)
+
+
+def get_single_embedding_first_layer(
+    sequence: str,
+    model: Union[EsmModel, ESMC, ESM3, T5Model, DataParallel[Module]],
+    tokenizer: Union[EsmTokenizer, T5Tokenizer, None],
+    device: torch.device,
+) -> NDArray[np.float64]:
+    """Get single embedding using first layer."""
+    processor = get_processor()
+    return processor.get_single_embedding_first_layer(sequence, model, tokenizer, device)
+
+# Public API
+load_model_and_tokenizer = load_model_and_tokenizer
+process_batches_on_gpu = process_batches_on_gpu
+get_batch_embeddings = get_batch_embeddings
+calculate_single_sequence_embedding_last_hidden_state = calculate_single_sequence_embedding_last_hidden_state
+calculate_single_sequence_embedding_all_layers = calculate_single_sequence_embedding_all_layers
+calculate_single_sequence_embedding_first_layer = calculate_single_sequence_embedding_first_layer
+get_single_embedding_last_hidden_state = get_single_embedding_last_hidden_state
+get_single_embedding_all_layers = get_single_embedding_all_layers
+get_single_embedding_first_layer = get_single_embedding_first_layer
 
 __all__ = [
     # Base classes and types
diff --git a/src/pyeed/embeddings/base.py b/src/pyeed/embeddings/base.py
index 745fd2cf..cefa5415 100644
--- a/src/pyeed/embeddings/base.py
+++ b/src/pyeed/embeddings/base.py
@@ -94,7 +94,8 @@ def get_final_embeddings(
         It falls back gracefully if certain layer-specific methods are not available.
         Default implementation uses last hidden state, but can be overridden.
         """
-        return self.get_single_embedding_last_hidden_state(sequence)
+        result = self.get_single_embedding_last_hidden_state(sequence)
+        return np.asarray(result, dtype=np.float64)
     
     def move_to_device(self) -> None:
         """Move model to the specified device."""
@@ -105,7 +106,10 @@ def cleanup(self) -> None:
         """Clean up model resources."""
         if self._model is not None:
             self._model = None
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        # Explicit return None
+        return None
 
 
 class ModelType:
@@ -118,4 +122,8 @@ class ModelType:
 
 def normalize_embedding(embedding: NDArray[np.float64]) -> NDArray[np.float64]:
     """Normalize embeddings using L2 normalization."""
-    return embedding / np.linalg.norm(embedding, axis=1, keepdims=True) 
\ No newline at end of file
+    norm = np.linalg.norm(embedding, axis=1, keepdims=True)
+    # Handle zero norm case to avoid division by zero
+    norm[norm == 0] = 1.0
+    normalized = embedding / norm
+    return np.asarray(normalized, dtype=np.float64) 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/models/esm2.py b/src/pyeed/embeddings/models/esm2.py
index 1edeea97..a02e5861 100644
--- a/src/pyeed/embeddings/models/esm2.py
+++ b/src/pyeed/embeddings/models/esm2.py
@@ -91,7 +91,7 @@ def get_single_embedding_last_hidden_state(
         
         # Remove batch dimension and special tokens ([CLS] and [SEP])
         embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy()
-        return embedding
+        return np.asarray(embedding, dtype=np.float64)
     
     def get_single_embedding_all_layers(
         self, 
@@ -150,23 +150,13 @@ def get_final_embeddings(
         sequence: str
     ) -> NDArray[np.float64]:
         """
-        Get final embeddings for ESM-2 with robust fallback.
-        
-        Provides a more robust embedding extraction that prioritizes
-        batch processing for better performance.
+        Get final embeddings for ESM2 with robust fallback.
         """
         try:
-            # For ESM-2, batch processing is more efficient
             embeddings = self.get_batch_embeddings([sequence], pool_embeddings=True)
             if embeddings and len(embeddings) > 0:
-                return embeddings[0]
+                return np.asarray(embeddings[0], dtype=np.float64)
             else:
                 raise ValueError("Batch embeddings method returned empty results")
         except Exception as e:
-            logger.warning(f"Batch embeddings method failed for ESM-2: {e}. Trying single sequence method.")
-            try:
-                # Fallback to single sequence method
-                return self.get_single_embedding_last_hidden_state(sequence)
-            except Exception as fallback_error:
-                logger.error(f"All embedding extraction methods failed for ESM-2: {fallback_error}")
-                raise ValueError(f"ESM-2 embedding extraction failed: {fallback_error}") 
\ No newline at end of file
+            raise ValueError(f"ESM2 embedding extraction failed: {e}") 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/models/esm3.py b/src/pyeed/embeddings/models/esm3.py
index 2f962a67..1783f0fe 100644
--- a/src/pyeed/embeddings/models/esm3.py
+++ b/src/pyeed/embeddings/models/esm3.py
@@ -8,7 +8,7 @@
 from numpy.typing import NDArray
 from loguru import logger
 from esm.models.esm3 import ESM3
-from esm.sdk.api import ESMProtein, SamplingConfig
+from esm.sdk.api import ESMProtein, SamplingConfig, LogitsConfig
 
 from ..base import BaseEmbeddingModel, normalize_embedding
 
@@ -153,39 +153,35 @@ def get_final_embeddings(
     ) -> NDArray[np.float64]:
         """
         Get final embeddings for ESM3 with robust fallback.
-        
-        ESM3 has different API structure, so this provides a more robust
-        embedding extraction that works reliably across different ESM3 versions.
         """
         try:
-            # Try to get the standard per-residue embedding
-            return self.get_single_embedding_last_hidden_state(sequence)
+            embeddings = self.get_batch_embeddings([sequence], pool_embeddings=True)
+            if embeddings and len(embeddings) > 0:
+                return np.asarray(embeddings[0], dtype=np.float64)
+            else:
+                raise ValueError("Batch embeddings method returned empty results")
+        except (torch.cuda.OutOfMemoryError, RuntimeError) as e:
+            if "out of memory" in str(e).lower():
+                torch.cuda.empty_cache()
+                try:
+                    if self.model is None:
+                        self.load_model()
+                    model = cast(ESM3, self.model)
+                    with torch.no_grad():
+                        protein = self.preprocess_sequence(sequence)
+                        protein_tensor = model.encode(protein)
+                        logits_output = model.logits(
+                            protein_tensor, 
+                            LogitsConfig(sequence=True, return_embeddings=True)
+                        )
+                        if logits_output.embeddings is None:
+                            raise ValueError("Model did not return embeddings")
+                        embeddings = logits_output.embeddings.cpu().numpy()
+                        pooled_embedding = embeddings.mean(axis=1)[0]
+                        return np.asarray(pooled_embedding, dtype=np.float64)
+                except Exception as minimal_error:
+                    raise ValueError(f"ESM3 embedding extraction failed with OOM: {minimal_error}")
+            else:
+                raise e
         except Exception as e:
-            # If that fails, try alternative method
-            logger.warning(f"Standard embedding method failed for ESM3: {e}. Trying alternative method.")
-            try:
-                if self.model is None:
-                    self.load_model()
-                
-                model = cast(ESM3, self.model)
-                
-                with torch.no_grad():
-                    protein = self.preprocess_sequence(sequence)
-                    sequence_encoding = model.encode(protein)
-                    # Try with minimal sampling config
-                    result = model.forward_and_sample(
-                        sequence_encoding,
-                        SamplingConfig()
-                    )
-                    
-                    # Extract any available embedding
-                    if hasattr(result, 'per_residue_embedding') and result.per_residue_embedding is not None:
-                        embedding = result.per_residue_embedding.to(torch.float32).cpu().numpy()
-                        return embedding
-                    else:
-                        # Last resort: use a simple mean-pooled sequence representation
-                        logger.warning("No per-residue embeddings available, using basic fallback")
-                        raise ValueError("Could not extract any embeddings from ESM3 model")
-            except Exception as fallback_error:
-                logger.error(f"All embedding extraction methods failed for ESM3: {fallback_error}")
-                raise ValueError(f"ESM3 embedding extraction failed: {fallback_error}") 
\ No newline at end of file
+            raise ValueError(f"ESM3 embedding extraction failed: {e}") 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/models/esmc.py b/src/pyeed/embeddings/models/esmc.py
index 245a2172..d79c58f4 100644
--- a/src/pyeed/embeddings/models/esmc.py
+++ b/src/pyeed/embeddings/models/esmc.py
@@ -228,7 +228,7 @@ def get_final_embeddings(
             # For ESMC, batch embeddings with pooling is more reliable and memory efficient
             embeddings = self.get_batch_embeddings([sequence], pool_embeddings=True)
             if embeddings and len(embeddings) > 0:
-                return embeddings[0]
+                return np.asarray(embeddings[0], dtype=np.float64)
             else:
                 raise ValueError("Batch embeddings method returned empty results")
         except (torch.cuda.OutOfMemoryError, RuntimeError) as e:
@@ -260,7 +260,7 @@ def get_final_embeddings(
                         # Pool across sequence dimension to get single vector
                         pooled_embedding = embeddings.mean(axis=1)[0]
                         
-                        return pooled_embedding
+                        return np.asarray(pooled_embedding, dtype=np.float64)
                         
                 except Exception as minimal_error:
                     logger.error(f"Minimal embedding extraction also failed for ESMC: {minimal_error}")
diff --git a/src/pyeed/embeddings/models/prott5.py b/src/pyeed/embeddings/models/prott5.py
index 7e0a82ef..924f6795 100644
--- a/src/pyeed/embeddings/models/prott5.py
+++ b/src/pyeed/embeddings/models/prott5.py
@@ -144,7 +144,7 @@ def get_single_embedding_last_hidden_state(
         
         # Get encoder last hidden state including special tokens
         embedding = outputs.encoder_last_hidden_state[0].detach().cpu().numpy()
-        return embedding
+        return np.asarray(embedding, dtype=np.float64)
     
     def get_single_embedding_all_layers(
         self, 
@@ -238,4 +238,20 @@ def get_single_embedding_first_layer(
         
         # Normalize the embedding
         embedding = normalize_embedding(embedding)
-        return embedding 
\ No newline at end of file
+        return embedding
+    
+    def get_final_embeddings(
+        self, 
+        sequence: str
+    ) -> NDArray[np.float64]:
+        """
+        Get final embeddings for ProtT5 with robust fallback.
+        """
+        try:
+            embeddings = self.get_batch_embeddings([sequence], pool_embeddings=True)
+            if embeddings and len(embeddings) > 0:
+                return np.asarray(embeddings[0], dtype=np.float64)
+            else:
+                raise ValueError("Batch embeddings method returned empty results")
+        except Exception as e:
+            raise ValueError(f"ProtT5 embedding extraction failed: {e}") 
\ No newline at end of file
diff --git a/src/pyeed/embeddings/processor.py b/src/pyeed/embeddings/processor.py
index d025f1c0..2ff9ff0e 100644
--- a/src/pyeed/embeddings/processor.py
+++ b/src/pyeed/embeddings/processor.py
@@ -5,7 +5,7 @@
 and database operations with automatic device management and model loading.
 """
 
-from typing import List, Union, Any, Literal, Optional
+from typing import List, Union, Any, Literal, Optional, Dict, Type
 import torch
 from torch.nn import DataParallel, Module
 from loguru import logger
@@ -31,8 +31,8 @@ class EmbeddingProcessor:
     simplified interfaces for all embedding operations.
     """
     
-    def __init__(self):
-        self._models: dict[str, BaseEmbeddingModel] = {}
+    def __init__(self) -> None:
+        self._models: Dict[str, BaseEmbeddingModel] = {}
         self._devices: List[torch.device] = []
         self._initialize_devices()
     
@@ -348,25 +348,10 @@ def get_batch_embeddings_unified(
         base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
         model_type = type(base_model).__name__
         
-        # Map model class names to our model types
-        if "ESMC" in model_type:
-            embedding_model = ESMCEmbeddingModel("", device)
-            embedding_model.model = base_model
-            return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
-        elif "ESM3" in model_type:
-            embedding_model = ESM3EmbeddingModel("", device)
-            embedding_model.model = base_model
-            return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
-        elif "T5Model" in model_type:
-            embedding_model = ProtT5EmbeddingModel("", device)
-            embedding_model.model = base_model
-            embedding_model.tokenizer = tokenizer
-            return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
-        else:  # ESM-2 and other ESM models
-            embedding_model = ESM2EmbeddingModel("", device)
-            embedding_model.model = base_model
-            embedding_model.tokenizer = tokenizer
-            return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
+        embedding_model = ESM2EmbeddingModel("", device)
+        embedding_model.model = base_model
+        embedding_model.tokenizer = tokenizer
+        return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
     
     def calculate_single_sequence_embedding_last_hidden_state(
         self,
@@ -441,20 +426,9 @@ def _get_single_embedding_legacy(
         base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
         model_type = type(base_model).__name__
         
-        if "ESMC" in model_type:
-            embedding_model = ESMCEmbeddingModel("", device)
-            embedding_model.model = base_model
-        elif "ESM3" in model_type:
-            embedding_model = ESM3EmbeddingModel("", device)
-            embedding_model.model = base_model
-        elif "T5Model" in model_type:
-            embedding_model = ProtT5EmbeddingModel("", device)
-            embedding_model.model = base_model
-            embedding_model.tokenizer = tokenizer
-        else:  # ESM-2 and other ESM models
-            embedding_model = ESM2EmbeddingModel("", device)
-            embedding_model.model = base_model
-            embedding_model.tokenizer = tokenizer
+        embedding_model = ESM2EmbeddingModel("", device)
+        embedding_model.model = base_model
+        embedding_model.tokenizer = tokenizer
         
         if embedding_type == "last_hidden_state":
             return embedding_model.get_single_embedding_last_hidden_state(sequence)

From e0e033c2470fc0e23a8466dfa73ca8ebfbcba408 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 30 May 2025 10:05:20 +0000
Subject: [PATCH 05/11] update with ruff

---
 src/pyeed/embedding.py                  |  4 ++--
 src/pyeed/embedding_refactored.py       | 19 +++++++--------
 src/pyeed/embeddings/__init__.py        | 31 +++++++++++++++++--------
 src/pyeed/embeddings/base.py            |  5 ++--
 src/pyeed/embeddings/database.py        |  2 ++
 src/pyeed/embeddings/factory.py         | 10 ++++++--
 src/pyeed/embeddings/models/__init__.py |  2 +-
 src/pyeed/embeddings/models/esm2.py     |  6 ++---
 src/pyeed/embeddings/models/esm3.py     | 10 ++++----
 src/pyeed/embeddings/models/esmc.py     | 12 +++++-----
 src/pyeed/embeddings/models/prott5.py   |  5 ++--
 src/pyeed/embeddings/processor.py       | 22 +++++++++---------
 src/pyeed/embeddings/utils.py           |  3 ++-
 src/pyeed/main.py                       |  5 +---
 14 files changed, 77 insertions(+), 59 deletions(-)

diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index 522f198a..6642aa26 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -1,13 +1,13 @@
 import gc
 import os
 import re
-from typing import Any, Tuple, Union, List
+from typing import Any, List, Tuple, Union
 
 import numpy as np
 import torch
 from esm.models.esm3 import ESM3
 from esm.models.esmc import ESMC
-from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig, SamplingConfig
+from esm.sdk.api import ESMProtein, LogitsConfig, SamplingConfig
 from huggingface_hub import HfFolder, login
 from loguru import logger
 from numpy.typing import NDArray
diff --git a/src/pyeed/embedding_refactored.py b/src/pyeed/embedding_refactored.py
index 8ce5deff..d1748c37 100644
--- a/src/pyeed/embedding_refactored.py
+++ b/src/pyeed/embedding_refactored.py
@@ -5,28 +5,27 @@
 using the new organized structure with model classes, factory, and processor.
 """
 
-import gc
-import os
-import re
 from typing import Any, Tuple, Union
 
 import numpy as np
 import torch
 from esm.models.esm3 import ESM3
 from esm.models.esmc import ESMC
-from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig, SamplingConfig
-from huggingface_hub import HfFolder, login
-from loguru import logger
 from numpy.typing import NDArray
 from torch.nn import DataParallel, Module
 from transformers import EsmModel, EsmTokenizer, T5Model, T5Tokenizer
 
 from pyeed.dbconnect import DatabaseConnector
-from pyeed.embeddings.processor import get_processor
+from pyeed.embeddings.database import (
+    update_protein_embeddings_in_db as _update_protein_embeddings_in_db,
+)
 from pyeed.embeddings.factory import ModelFactory
-from pyeed.embeddings.database import update_protein_embeddings_in_db as _update_protein_embeddings_in_db
-from pyeed.embeddings.utils import get_hf_token as _get_hf_token, preprocess_sequence_for_prott5 as _preprocess_sequence_for_prott5, free_memory as _free_memory
-
+from pyeed.embeddings.processor import get_processor
+from pyeed.embeddings.utils import free_memory as _free_memory
+from pyeed.embeddings.utils import get_hf_token as _get_hf_token
+from pyeed.embeddings.utils import (
+    preprocess_sequence_for_prott5 as _preprocess_sequence_for_prott5,
+)
 
 # ============================================================================
 # Original function signatures maintained for backward compatibility
diff --git a/src/pyeed/embeddings/__init__.py b/src/pyeed/embeddings/__init__.py
index 81cc8a83..b1b49497 100644
--- a/src/pyeed/embeddings/__init__.py
+++ b/src/pyeed/embeddings/__init__.py
@@ -5,24 +5,35 @@
 with the original embedding.py interface.
 """
 
-from typing import Any, Tuple, Union, List, Optional, cast
+from typing import List, Optional, Tuple, Union, cast
+
+import numpy as np
 import torch
-from torch.nn import DataParallel, Module
+from esm.models.esm3 import ESM3
+from esm.models.esmc import ESMC
 from numpy.typing import NDArray
-import numpy as np
+from torch.nn import DataParallel, Module
 from transformers import EsmModel, EsmTokenizer, T5Model, T5Tokenizer
-from esm.models.esmc import ESMC
-from esm.models.esm3 import ESM3
+
+from pyeed.dbconnect import DatabaseConnector
 
 # New organized structure
 from .base import BaseEmbeddingModel, ModelType, normalize_embedding
+from .database import update_protein_embeddings_in_db
 from .factory import ModelFactory
+from .models import (
+    ESM2EmbeddingModel,
+    ESM3EmbeddingModel,
+    ESMCEmbeddingModel,
+    ProtT5EmbeddingModel,
+)
 from .processor import EmbeddingProcessor, get_processor
-from .utils import get_hf_token, preprocess_sequence_for_prott5, free_memory, determine_model_type
-from .database import update_protein_embeddings_in_db
-from .models import ESM2EmbeddingModel, ESMCEmbeddingModel, ESM3EmbeddingModel, ProtT5EmbeddingModel
-
-from pyeed.dbconnect import DatabaseConnector
+from .utils import (
+    determine_model_type,
+    free_memory,
+    get_hf_token,
+    preprocess_sequence_for_prott5,
+)
 
 # Type aliases for better readability
 TokenizerType = Union[EsmTokenizer, T5Tokenizer, None]
diff --git a/src/pyeed/embeddings/base.py b/src/pyeed/embeddings/base.py
index cefa5415..2fc8637c 100644
--- a/src/pyeed/embeddings/base.py
+++ b/src/pyeed/embeddings/base.py
@@ -5,9 +5,10 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import Any, List, Union, Tuple, Optional
-import torch
+from typing import Any, List, Optional, Tuple, Union
+
 import numpy as np
+import torch
 from numpy.typing import NDArray
 
 
diff --git a/src/pyeed/embeddings/database.py b/src/pyeed/embeddings/database.py
index f1536878..18a3aeed 100644
--- a/src/pyeed/embeddings/database.py
+++ b/src/pyeed/embeddings/database.py
@@ -5,8 +5,10 @@
 """
 
 from typing import List
+
 import numpy as np
 from numpy.typing import NDArray
+
 from pyeed.dbconnect import DatabaseConnector
 
 
diff --git a/src/pyeed/embeddings/factory.py b/src/pyeed/embeddings/factory.py
index 66b7f7c5..37650c98 100644
--- a/src/pyeed/embeddings/factory.py
+++ b/src/pyeed/embeddings/factory.py
@@ -5,12 +5,18 @@
 based on model names and automatically handles device assignment.
 """
 
-from typing import Union, Tuple, Any
+from typing import Any, Tuple, Union
+
 import torch
 from torch.nn import DataParallel, Module
 
 from .base import BaseEmbeddingModel
-from .models import ESM2EmbeddingModel, ESMCEmbeddingModel, ESM3EmbeddingModel, ProtT5EmbeddingModel
+from .models import (
+    ESM2EmbeddingModel,
+    ESM3EmbeddingModel,
+    ESMCEmbeddingModel,
+    ProtT5EmbeddingModel,
+)
 from .utils import determine_model_type
 
 
diff --git a/src/pyeed/embeddings/models/__init__.py b/src/pyeed/embeddings/models/__init__.py
index f2f8908f..1d2a7134 100644
--- a/src/pyeed/embeddings/models/__init__.py
+++ b/src/pyeed/embeddings/models/__init__.py
@@ -5,8 +5,8 @@
 """
 
 from .esm2 import ESM2EmbeddingModel
-from .esmc import ESMCEmbeddingModel
 from .esm3 import ESM3EmbeddingModel
+from .esmc import ESMCEmbeddingModel
 from .prott5 import ProtT5EmbeddingModel
 
 __all__ = [
diff --git a/src/pyeed/embeddings/models/esm2.py b/src/pyeed/embeddings/models/esm2.py
index a02e5861..b3d0068d 100644
--- a/src/pyeed/embeddings/models/esm2.py
+++ b/src/pyeed/embeddings/models/esm2.py
@@ -2,12 +2,12 @@
 ESM-2 model implementation for protein embeddings.
 """
 
-from typing import List, Tuple, Optional, Any, cast
-import torch
+from typing import List, Tuple, cast
+
 import numpy as np
+import torch
 from numpy.typing import NDArray
 from transformers import EsmModel, EsmTokenizer
-from loguru import logger
 
 from ..base import BaseEmbeddingModel, normalize_embedding
 from ..utils import get_hf_token
diff --git a/src/pyeed/embeddings/models/esm3.py b/src/pyeed/embeddings/models/esm3.py
index 1783f0fe..e6aca8b3 100644
--- a/src/pyeed/embeddings/models/esm3.py
+++ b/src/pyeed/embeddings/models/esm3.py
@@ -2,13 +2,13 @@
 ESM-3 model implementation for protein embeddings.
 """
 
-from typing import List, Tuple, Optional, cast
-import torch
+from typing import List, Tuple, cast
+
 import numpy as np
-from numpy.typing import NDArray
-from loguru import logger
+import torch
 from esm.models.esm3 import ESM3
-from esm.sdk.api import ESMProtein, SamplingConfig, LogitsConfig
+from esm.sdk.api import ESMProtein, LogitsConfig, SamplingConfig
+from numpy.typing import NDArray
 
 from ..base import BaseEmbeddingModel, normalize_embedding
 
diff --git a/src/pyeed/embeddings/models/esmc.py b/src/pyeed/embeddings/models/esmc.py
index d79c58f4..4256bd63 100644
--- a/src/pyeed/embeddings/models/esmc.py
+++ b/src/pyeed/embeddings/models/esmc.py
@@ -2,13 +2,14 @@
 ESMC model implementation for protein embeddings.
 """
 
-from typing import List, Tuple, Optional, cast
-import torch
+from typing import List, Tuple, cast
+
 import numpy as np
-from numpy.typing import NDArray
-from loguru import logger
+import torch
 from esm.models.esmc import ESMC
 from esm.sdk.api import ESMProtein, LogitsConfig
+from loguru import logger
+from numpy.typing import NDArray
 
 from ..base import BaseEmbeddingModel, normalize_embedding
 
@@ -38,7 +39,6 @@ def load_model(self) -> Tuple[ESMC, None]:
                 logger.warning(f"ESMC model loading failed due to tqdm threading issue: {e}. Retrying with threading workaround...")
                 
                 # Try alternative approach with threading lock
-                import threading
                 import time
                 
                 # Add a small delay and retry
@@ -50,7 +50,7 @@ def load_model(self) -> Tuple[ESMC, None]:
                         import tqdm
                         if hasattr(tqdm.tqdm, '_lock'):
                             delattr(tqdm.tqdm, '_lock')
-                    except:
+                    except (AttributeError, ImportError):
                         pass
                     
                     model = ESMC.from_pretrained(self.model_name)
diff --git a/src/pyeed/embeddings/models/prott5.py b/src/pyeed/embeddings/models/prott5.py
index 924f6795..a9b3e6c3 100644
--- a/src/pyeed/embeddings/models/prott5.py
+++ b/src/pyeed/embeddings/models/prott5.py
@@ -2,9 +2,10 @@
 ProtT5 model implementation for protein embeddings.
 """
 
-from typing import List, Tuple, Optional, cast
-import torch
+from typing import List, Tuple, cast
+
 import numpy as np
+import torch
 from numpy.typing import NDArray
 from transformers import T5Model, T5Tokenizer
 
diff --git a/src/pyeed/embeddings/processor.py b/src/pyeed/embeddings/processor.py
index 2ff9ff0e..1433b323 100644
--- a/src/pyeed/embeddings/processor.py
+++ b/src/pyeed/embeddings/processor.py
@@ -5,22 +5,24 @@
 and database operations with automatic device management and model loading.
 """
 
-from typing import List, Union, Any, Literal, Optional, Dict, Type
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, Dict, List, Literal, Optional, Union
+
+import numpy as np
 import torch
-from torch.nn import DataParallel, Module
 from loguru import logger
-import numpy as np
 from numpy.typing import NDArray
-import time
-from concurrent.futures import ThreadPoolExecutor
-import os
+from torch.nn import DataParallel, Module
+
+from pyeed.dbconnect import DatabaseConnector
 
-from .factory import ModelFactory
 from .base import BaseEmbeddingModel
-from .models import ESM2EmbeddingModel, ESMCEmbeddingModel, ESM3EmbeddingModel, ProtT5EmbeddingModel
 from .database import update_protein_embeddings_in_db
+from .factory import ModelFactory
+from .models import ESM2EmbeddingModel
 from .utils import free_memory
-from pyeed.dbconnect import DatabaseConnector
 
 
 class EmbeddingProcessor:
@@ -346,7 +348,6 @@ def get_batch_embeddings_unified(
         
         # Determine model type from the actual model instance
         base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
-        model_type = type(base_model).__name__
         
         embedding_model = ESM2EmbeddingModel("", device)
         embedding_model.model = base_model
@@ -424,7 +425,6 @@ def _get_single_embedding_legacy(
         """Helper method for legacy single embedding methods."""
         # Determine model type and create appropriate embedding model
         base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
-        model_type = type(base_model).__name__
         
         embedding_model = ESM2EmbeddingModel("", device)
         embedding_model.model = base_model
diff --git a/src/pyeed/embeddings/utils.py b/src/pyeed/embeddings/utils.py
index 6559a66f..987e3d11 100644
--- a/src/pyeed/embeddings/utils.py
+++ b/src/pyeed/embeddings/utils.py
@@ -8,8 +8,9 @@
 import gc
 import os
 import re
-from huggingface_hub import HfFolder, login
+
 import torch
+from huggingface_hub import HfFolder, login
 
 
 def get_hf_token() -> str:
diff --git a/src/pyeed/main.py b/src/pyeed/main.py
index 206cd644..22cdc61c 100644
--- a/src/pyeed/main.py
+++ b/src/pyeed/main.py
@@ -1,10 +1,7 @@
 import asyncio
-import time
-from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Literal
 
 import nest_asyncio
-import torch
 from loguru import logger
 
 from pyeed.adapter.ncbi_dna_mapper import NCBIDNAToPyeed
@@ -14,7 +11,7 @@
 from pyeed.adapter.uniprot_mapper import UniprotToPyeed
 from pyeed.dbchat import DBChat
 from pyeed.dbconnect import DatabaseConnector
-from pyeed.embeddings import get_processor, free_memory
+from pyeed.embeddings import free_memory, get_processor
 
 
 class Pyeed:

From 921cd6c8d7756750697ed1258f2e47785e7a55da Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 30 May 2025 10:10:16 +0000
Subject: [PATCH 06/11] ruff format check

---
 src/pyeed/embedding.py                  | 163 +++++++++---------
 src/pyeed/embedding_refactored.py       |  29 +++-
 src/pyeed/embeddings/__init__.py        | 100 ++++++-----
 src/pyeed/embeddings/base.py            |  55 +++---
 src/pyeed/embeddings/database.py        |   2 +-
 src/pyeed/embeddings/factory.py         |  23 ++-
 src/pyeed/embeddings/models/__init__.py |  10 +-
 src/pyeed/embeddings/models/esm2.py     |  82 ++++-----
 src/pyeed/embeddings/models/esm3.py     |  72 ++++----
 src/pyeed/embeddings/models/esmc.py     | 130 +++++++-------
 src/pyeed/embeddings/models/prott5.py   | 176 +++++++++----------
 src/pyeed/embeddings/processor.py       | 216 ++++++++++++------------
 src/pyeed/embeddings/utils.py           |  14 +-
 src/pyeed/main.py                       |  24 +--
 14 files changed, 543 insertions(+), 553 deletions(-)

diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index 6642aa26..fe928935 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -98,7 +98,11 @@ def process_batches_on_gpu(
 def load_model_and_tokenizer(
     model_name: str,
     device: torch.device = torch.device("cuda:0"),
-) -> Tuple[Union[EsmModel, ESMC, ESM3, T5Model], Union[EsmTokenizer, T5Tokenizer, None], torch.device]:
+) -> Tuple[
+    Union[EsmModel, ESMC, ESM3, T5Model],
+    Union[EsmTokenizer, T5Tokenizer, None],
+    torch.device,
+]:
     """
     Loads the model and assigns it to a specific GPU.
 
@@ -121,12 +125,12 @@ def load_model_and_tokenizer(
     elif "prot_t5" in model_name.lower() or "prott5" in model_name.lower():
         # ProtT5 models
         full_model_name = (
-            model_name
-            if model_name.startswith("Rostlab/")
-            else f"Rostlab/{model_name}"
+            model_name if model_name.startswith("Rostlab/") else f"Rostlab/{model_name}"
         )
         model = T5Model.from_pretrained(full_model_name, use_auth_token=token)
-        tokenizer = T5Tokenizer.from_pretrained(full_model_name, use_auth_token=token, do_lower_case=False)
+        tokenizer = T5Tokenizer.from_pretrained(
+            full_model_name, use_auth_token=token, do_lower_case=False
+        )
         model = model.to(device)
     else:
         full_model_name = (
@@ -144,10 +148,10 @@ def load_model_and_tokenizer(
 def preprocess_sequence_for_prott5(sequence: str) -> str:
     """
     Preprocesses a protein sequence for ProtT5 models.
-    
+
     Args:
         sequence: Raw protein sequence
-        
+
     Returns:
         Preprocessed sequence with spaces between amino acids and rare AAs mapped to X
     """
@@ -229,37 +233,43 @@ def get_batch_embeddings(
     elif isinstance(base_model, T5Model):
         # For ProtT5 models
         assert tokenizer_or_alphabet is not None, "Tokenizer required for ProtT5 models"
-        assert isinstance(tokenizer_or_alphabet, T5Tokenizer), "T5Tokenizer required for ProtT5 models"
-        
+        assert isinstance(
+            tokenizer_or_alphabet, T5Tokenizer
+        ), "T5Tokenizer required for ProtT5 models"
+
         # Preprocess sequences for ProtT5
-        processed_sequences = [preprocess_sequence_for_prott5(seq) for seq in batch_sequences]
-        
+        processed_sequences = [
+            preprocess_sequence_for_prott5(seq) for seq in batch_sequences
+        ]
+
         inputs = tokenizer_or_alphabet.batch_encode_plus(
-            processed_sequences, 
-            add_special_tokens=True, 
+            processed_sequences,
+            add_special_tokens=True,
             padding="longest",
-            return_tensors="pt"
+            return_tensors="pt",
         )
-        
+
         # Move inputs to device
-        input_ids = inputs['input_ids'].to(device)
-        attention_mask = inputs['attention_mask'].to(device)
-        
+        input_ids = inputs["input_ids"].to(device)
+        attention_mask = inputs["attention_mask"].to(device)
+
         with torch.no_grad():
             # For ProtT5, use encoder embeddings for feature extraction
             # Create dummy decoder inputs (just the pad token)
             batch_size = input_ids.shape[0]
             decoder_input_ids = torch.full(
-                (batch_size, 1), 
-                tokenizer_or_alphabet.pad_token_id or 0, 
+                (batch_size, 1),
+                tokenizer_or_alphabet.pad_token_id or 0,
                 dtype=torch.long,
-                device=device
+                device=device,
             )
-            
-            outputs = base_model(input_ids=input_ids, 
-                          attention_mask=attention_mask,
-                          decoder_input_ids=decoder_input_ids)
-            
+
+            outputs = base_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids,
+            )
+
             # Get encoder last hidden state (encoder embeddings)
             hidden_states = outputs.encoder_last_hidden_state.cpu().numpy()
 
@@ -278,7 +288,9 @@ def get_batch_embeddings(
     else:
         # ESM-2 logic
         assert tokenizer_or_alphabet is not None, "Tokenizer required for ESM-2 models"
-        assert isinstance(tokenizer_or_alphabet, EsmTokenizer), "EsmTokenizer required for ESM-2 models"
+        assert isinstance(
+            tokenizer_or_alphabet, EsmTokenizer
+        ), "EsmTokenizer required for ESM-2 models"
         inputs = tokenizer_or_alphabet(
             batch_sequences, padding=True, truncation=True, return_tensors="pt"
         ).to(device)
@@ -376,26 +388,23 @@ def get_single_embedding_last_hidden_state(
             # ProtT5 logic
             processed_sequence = preprocess_sequence_for_prott5(sequence)
             inputs = tokenizer.encode_plus(
-                processed_sequence,
-                add_special_tokens=True,
-                return_tensors="pt"
+                processed_sequence, add_special_tokens=True, return_tensors="pt"
             )
-            
-            input_ids = inputs['input_ids'].to(device)
-            attention_mask = inputs['attention_mask'].to(device)
-            
+
+            input_ids = inputs["input_ids"].to(device)
+            attention_mask = inputs["attention_mask"].to(device)
+
             # Create dummy decoder inputs
             decoder_input_ids = torch.full(
-                (1, 1), 
-                tokenizer.pad_token_id or 0, 
-                dtype=torch.long,
-                device=device
+                (1, 1), tokenizer.pad_token_id or 0, dtype=torch.long, device=device
+            )
+
+            outputs = model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids,
             )
-            
-            outputs = model(input_ids=input_ids, 
-                          attention_mask=attention_mask,
-                          decoder_input_ids=decoder_input_ids)
-            
+
             # Get encoder last hidden state including special tokens
             embedding = outputs.encoder_last_hidden_state[0].detach().cpu().numpy()
         else:
@@ -465,27 +474,24 @@ def get_single_embedding_all_layers(
             # For ProtT5: Get encoder hidden states
             processed_sequence = preprocess_sequence_for_prott5(sequence)
             inputs = tokenizer.encode_plus(
-                processed_sequence,
-                add_special_tokens=True,
-                return_tensors="pt"
+                processed_sequence, add_special_tokens=True, return_tensors="pt"
             )
-            
-            input_ids = inputs['input_ids'].to(device)
-            attention_mask = inputs['attention_mask'].to(device)
-            
+
+            input_ids = inputs["input_ids"].to(device)
+            attention_mask = inputs["attention_mask"].to(device)
+
             # Create dummy decoder inputs
             decoder_input_ids = torch.full(
-                (1, 1), 
-                tokenizer.pad_token_id or 0, 
-                dtype=torch.long,
-                device=device
+                (1, 1), tokenizer.pad_token_id or 0, dtype=torch.long, device=device
+            )
+
+            outputs = model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids,
+                output_hidden_states=True,
             )
-            
-            outputs = model(input_ids=input_ids, 
-                          attention_mask=attention_mask,
-                          decoder_input_ids=decoder_input_ids,
-                          output_hidden_states=True)
-            
+
             # Get all encoder hidden states
             encoder_hidden_states = outputs.encoder_hidden_states
             for layer_tensor in encoder_hidden_states:
@@ -509,8 +515,11 @@ def get_single_embedding_all_layers(
 
     return np.array(embeddings_list)
 
+
 def calculate_single_sequence_embedding_first_layer(
-    sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D", device: torch.device = torch.device("cuda:0"),
+    sequence: str,
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
+    device: torch.device = torch.device("cuda:0"),
 ) -> NDArray[np.float64]:
     """
     Calculates an embedding for a single sequence using the first layer.
@@ -568,27 +577,24 @@ def get_single_embedding_first_layer(
             # ProtT5 logic - get first layer embedding
             processed_sequence = preprocess_sequence_for_prott5(sequence)
             inputs = tokenizer.encode_plus(
-                processed_sequence,
-                add_special_tokens=True,
-                return_tensors="pt"
+                processed_sequence, add_special_tokens=True, return_tensors="pt"
             )
-            
-            input_ids = inputs['input_ids'].to(device)
-            attention_mask = inputs['attention_mask'].to(device)
-            
+
+            input_ids = inputs["input_ids"].to(device)
+            attention_mask = inputs["attention_mask"].to(device)
+
             # Create dummy decoder inputs
             decoder_input_ids = torch.full(
-                (1, 1), 
-                tokenizer.pad_token_id or 0, 
-                dtype=torch.long,
-                device=device
+                (1, 1), tokenizer.pad_token_id or 0, dtype=torch.long, device=device
             )
-            
-            outputs = model(input_ids=input_ids, 
-                          attention_mask=attention_mask,
-                          decoder_input_ids=decoder_input_ids,
-                          output_hidden_states=True)
-            
+
+            outputs = model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                decoder_input_ids=decoder_input_ids,
+                output_hidden_states=True,
+            )
+
             # Get first encoder hidden state including special tokens
             embedding = outputs.encoder_hidden_states[0][0].detach().cpu().numpy()
 
@@ -606,6 +612,7 @@ def get_single_embedding_first_layer(
     normalized_embedding = embedding / norm
     return np.asarray(normalized_embedding, dtype=np.float64)
 
+
 def free_memory() -> None:
     """
     Frees up memory by invoking garbage collection and clearing GPU caches.
diff --git a/src/pyeed/embedding_refactored.py b/src/pyeed/embedding_refactored.py
index d1748c37..6e583bb5 100644
--- a/src/pyeed/embedding_refactored.py
+++ b/src/pyeed/embedding_refactored.py
@@ -31,6 +31,7 @@
 # Original function signatures maintained for backward compatibility
 # ============================================================================
 
+
 def get_hf_token() -> str:
     """Get or request Hugging Face token."""
     return _get_hf_token()
@@ -62,7 +63,11 @@ def process_batches_on_gpu(
 def load_model_and_tokenizer(
     model_name: str,
     device: torch.device = torch.device("cuda:0"),
-) -> Tuple[Union[EsmModel, ESMC, ESM3, T5Model], Union[EsmTokenizer, T5Tokenizer, None], torch.device]:
+) -> Tuple[
+    Union[EsmModel, ESMC, ESM3, T5Model],
+    Union[EsmTokenizer, T5Tokenizer, None],
+    torch.device,
+]:
     """
     Loads the model and assigns it to a specific GPU.
 
@@ -79,10 +84,10 @@ def load_model_and_tokenizer(
 def preprocess_sequence_for_prott5(sequence: str) -> str:
     """
     Preprocesses a protein sequence for ProtT5 models.
-    
+
     Args:
         sequence: Raw protein sequence
-        
+
     Returns:
         Preprocessed sequence with spaces between amino acids and rare AAs mapped to X
     """
@@ -179,7 +184,9 @@ def get_single_embedding_last_hidden_state(
         np.ndarray: Normalized embeddings for each token in the sequence
     """
     processor = get_processor()
-    return processor.get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
+    return processor.get_single_embedding_last_hidden_state(
+        sequence, model, tokenizer, device
+    )
 
 
 def get_single_embedding_all_layers(
@@ -208,13 +215,17 @@ def get_single_embedding_all_layers(
 
 
 def calculate_single_sequence_embedding_first_layer(
-    sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D", device: torch.device = torch.device("cuda:0"),
+    sequence: str,
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
+    device: torch.device = torch.device("cuda:0"),
 ) -> NDArray[np.float64]:
     """
     Calculates an embedding for a single sequence using the first layer.
     """
     processor = get_processor()
-    return processor.calculate_single_sequence_embedding_first_layer(sequence, model_name, device)
+    return processor.calculate_single_sequence_embedding_first_layer(
+        sequence, model_name, device
+    )
 
 
 def get_single_embedding_first_layer(
@@ -224,7 +235,9 @@ def get_single_embedding_first_layer(
     Generates normalized embeddings for each token in the sequence using the first layer.
     """
     processor = get_processor()
-    return processor.get_single_embedding_first_layer(sequence, model, tokenizer, device)
+    return processor.get_single_embedding_first_layer(
+        sequence, model, tokenizer, device
+    )
 
 
 def free_memory() -> None:
@@ -247,4 +260,4 @@ def update_protein_embeddings_in_db(
         accessions (list[str]): The accessions of the proteins to update.
         embeddings_batch (list[NDArray[np.float64]]): The embeddings to update.
     """
-    _update_protein_embeddings_in_db(db, accessions, embeddings_batch) 
\ No newline at end of file
+    _update_protein_embeddings_in_db(db, accessions, embeddings_batch)
diff --git a/src/pyeed/embeddings/__init__.py b/src/pyeed/embeddings/__init__.py
index b1b49497..729ec422 100644
--- a/src/pyeed/embeddings/__init__.py
+++ b/src/pyeed/embeddings/__init__.py
@@ -41,17 +41,18 @@
 
 # Re-export functions from processor
 __all__ = [
-    'load_model_and_tokenizer',
-    'process_batches_on_gpu',
-    'get_batch_embeddings',
-    'calculate_single_sequence_embedding_last_hidden_state',
-    'calculate_single_sequence_embedding_all_layers',
-    'calculate_single_sequence_embedding_first_layer',
-    'get_single_embedding_last_hidden_state',
-    'get_single_embedding_all_layers',
-    'get_single_embedding_first_layer',
+    "load_model_and_tokenizer",
+    "process_batches_on_gpu",
+    "get_batch_embeddings",
+    "calculate_single_sequence_embedding_last_hidden_state",
+    "calculate_single_sequence_embedding_all_layers",
+    "calculate_single_sequence_embedding_first_layer",
+    "get_single_embedding_last_hidden_state",
+    "get_single_embedding_all_layers",
+    "get_single_embedding_first_layer",
 ]
 
+
 # Function implementations
 def load_model_and_tokenizer(
     model_name: str,
@@ -60,7 +61,10 @@ def load_model_and_tokenizer(
     """Load model and tokenizer."""
     if device is None:
         device = torch.device("cuda:0")
-    return cast(Tuple[ModelType, TokenizerType, DeviceType], ModelFactory.load_model_and_tokenizer(model_name, device))
+    return cast(
+        Tuple[ModelType, TokenizerType, DeviceType],
+        ModelFactory.load_model_and_tokenizer(model_name, device),
+    )
 
 
 def process_batches_on_gpu(
@@ -138,7 +142,9 @@ def get_single_embedding_last_hidden_state(
 ) -> NDArray[np.float64]:
     """Get single embedding using last hidden state."""
     processor = get_processor()
-    return processor.get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
+    return processor.get_single_embedding_last_hidden_state(
+        sequence, model, tokenizer, device
+    )
 
 
 def get_single_embedding_all_layers(
@@ -160,53 +166,57 @@ def get_single_embedding_first_layer(
 ) -> NDArray[np.float64]:
     """Get single embedding using first layer."""
     processor = get_processor()
-    return processor.get_single_embedding_first_layer(sequence, model, tokenizer, device)
+    return processor.get_single_embedding_first_layer(
+        sequence, model, tokenizer, device
+    )
+
 
 # Public API
 load_model_and_tokenizer = load_model_and_tokenizer
 process_batches_on_gpu = process_batches_on_gpu
 get_batch_embeddings = get_batch_embeddings
-calculate_single_sequence_embedding_last_hidden_state = calculate_single_sequence_embedding_last_hidden_state
-calculate_single_sequence_embedding_all_layers = calculate_single_sequence_embedding_all_layers
-calculate_single_sequence_embedding_first_layer = calculate_single_sequence_embedding_first_layer
+calculate_single_sequence_embedding_last_hidden_state = (
+    calculate_single_sequence_embedding_last_hidden_state
+)
+calculate_single_sequence_embedding_all_layers = (
+    calculate_single_sequence_embedding_all_layers
+)
+calculate_single_sequence_embedding_first_layer = (
+    calculate_single_sequence_embedding_first_layer
+)
 get_single_embedding_last_hidden_state = get_single_embedding_last_hidden_state
 get_single_embedding_all_layers = get_single_embedding_all_layers
 get_single_embedding_first_layer = get_single_embedding_first_layer
 
 __all__ = [
     # Base classes and types
-    'BaseEmbeddingModel',
-    'ModelType',
-    'normalize_embedding',
-    
+    "BaseEmbeddingModel",
+    "ModelType",
+    "normalize_embedding",
     # Factory and processor
-    'ModelFactory',
-    'EmbeddingProcessor',
-    'get_processor',
-    
+    "ModelFactory",
+    "EmbeddingProcessor",
+    "get_processor",
     # Utilities
-    'get_hf_token',
-    'preprocess_sequence_for_prott5',
-    'free_memory',
-    'determine_model_type',
-    
+    "get_hf_token",
+    "preprocess_sequence_for_prott5",
+    "free_memory",
+    "determine_model_type",
     # Database operations
-    'update_protein_embeddings_in_db',
-    
+    "update_protein_embeddings_in_db",
     # Model implementations
-    'ESM2EmbeddingModel',
-    'ESMCEmbeddingModel',
-    'ESM3EmbeddingModel',
-    'ProtT5EmbeddingModel',
-    
+    "ESM2EmbeddingModel",
+    "ESMCEmbeddingModel",
+    "ESM3EmbeddingModel",
+    "ProtT5EmbeddingModel",
     # Backward compatibility functions
-    'load_model_and_tokenizer',
-    'process_batches_on_gpu',
-    'get_batch_embeddings',
-    'calculate_single_sequence_embedding_last_hidden_state',
-    'calculate_single_sequence_embedding_all_layers',
-    'calculate_single_sequence_embedding_first_layer',
-    'get_single_embedding_last_hidden_state',
-    'get_single_embedding_all_layers',
-    'get_single_embedding_first_layer',
-] 
\ No newline at end of file
+    "load_model_and_tokenizer",
+    "process_batches_on_gpu",
+    "get_batch_embeddings",
+    "calculate_single_sequence_embedding_last_hidden_state",
+    "calculate_single_sequence_embedding_all_layers",
+    "calculate_single_sequence_embedding_first_layer",
+    "get_single_embedding_last_hidden_state",
+    "get_single_embedding_all_layers",
+    "get_single_embedding_first_layer",
+]
diff --git a/src/pyeed/embeddings/base.py b/src/pyeed/embeddings/base.py
index 2fc8637c..c436937d 100644
--- a/src/pyeed/embeddings/base.py
+++ b/src/pyeed/embeddings/base.py
@@ -14,95 +14,83 @@
 
 class BaseEmbeddingModel(ABC):
     """Abstract base class for protein embedding models."""
-    
+
     def __init__(self, model_name: str, device: torch.device):
         self.model_name = model_name
         self.device = device
         self._model: Optional[Any] = None
         self._tokenizer: Optional[Any] = None
-        
+
     @property
     def model(self) -> Optional[Any]:
         """Get the model instance."""
         return self._model
-    
+
     @model.setter
     def model(self, value: Any) -> None:
         """Set the model instance."""
         self._model = value
-    
+
     @property
     def tokenizer(self) -> Optional[Any]:
         """Get the tokenizer instance."""
         return self._tokenizer
-    
+
     @tokenizer.setter
     def tokenizer(self, value: Any) -> None:
         """Set the tokenizer instance."""
         self._tokenizer = value
-    
+
     @abstractmethod
     def load_model(self) -> Tuple[Any, Optional[Any]]:
         """Load and return the model and tokenizer."""
         pass
-    
+
     @abstractmethod
     def preprocess_sequence(self, sequence: str) -> Union[str, Any]:
         """Preprocess a sequence for the specific model type."""
         pass
-    
+
     @abstractmethod
     def get_batch_embeddings(
-        self, 
-        sequences: List[str], 
-        pool_embeddings: bool = True
+        self, sequences: List[str], pool_embeddings: bool = True
     ) -> List[NDArray[np.float64]]:
         """Get embeddings for a batch of sequences."""
         pass
-    
+
     @abstractmethod
     def get_single_embedding_last_hidden_state(
-        self, 
-        sequence: str
+        self, sequence: str
     ) -> NDArray[np.float64]:
         """Get embedding from the last hidden state for a single sequence."""
         pass
-    
+
     @abstractmethod
-    def get_single_embedding_all_layers(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+    def get_single_embedding_all_layers(self, sequence: str) -> NDArray[np.float64]:
         """Get embeddings from all layers for a single sequence."""
         pass
-    
+
     @abstractmethod
-    def get_single_embedding_first_layer(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+    def get_single_embedding_first_layer(self, sequence: str) -> NDArray[np.float64]:
         """Get embedding from the first layer for a single sequence."""
         pass
-    
-    def get_final_embeddings(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_final_embeddings(self, sequence: str) -> NDArray[np.float64]:
         """
         Get final embeddings for a single sequence.
-        
+
         This method provides a robust embedding option that works across all models.
         It falls back gracefully if certain layer-specific methods are not available.
         Default implementation uses last hidden state, but can be overridden.
         """
         result = self.get_single_embedding_last_hidden_state(sequence)
         return np.asarray(result, dtype=np.float64)
-    
+
     def move_to_device(self) -> None:
         """Move model to the specified device."""
         if self.model is not None:
             self.model = self.model.to(self.device)
-    
+
     def cleanup(self) -> None:
         """Clean up model resources."""
         if self._model is not None:
@@ -115,6 +103,7 @@ def cleanup(self) -> None:
 
 class ModelType:
     """Constants for different model types."""
+
     ESM2 = "esm2"
     ESMC = "esmc"
     ESM3 = "esm3"
@@ -127,4 +116,4 @@ def normalize_embedding(embedding: NDArray[np.float64]) -> NDArray[np.float64]:
     # Handle zero norm case to avoid division by zero
     norm[norm == 0] = 1.0
     normalized = embedding / norm
-    return np.asarray(normalized, dtype=np.float64) 
\ No newline at end of file
+    return np.asarray(normalized, dtype=np.float64)
diff --git a/src/pyeed/embeddings/database.py b/src/pyeed/embeddings/database.py
index 18a3aeed..371dc01c 100644
--- a/src/pyeed/embeddings/database.py
+++ b/src/pyeed/embeddings/database.py
@@ -40,4 +40,4 @@ def update_protein_embeddings_in_db(
     """
 
     # Execute the update query with parameters
-    db.execute_write(query, {"updates": updates}) 
\ No newline at end of file
+    db.execute_write(query, {"updates": updates})
diff --git a/src/pyeed/embeddings/factory.py b/src/pyeed/embeddings/factory.py
index 37650c98..5f23b2c6 100644
--- a/src/pyeed/embeddings/factory.py
+++ b/src/pyeed/embeddings/factory.py
@@ -22,24 +22,23 @@
 
 class ModelFactory:
     """Factory for creating embedding model instances."""
-    
+
     @staticmethod
     def create_model(
-        model_name: str, 
-        device: torch.device = torch.device("cuda:0")
+        model_name: str, device: torch.device = torch.device("cuda:0")
     ) -> BaseEmbeddingModel:
         """
         Create an embedding model instance based on the model name.
-        
+
         Args:
             model_name: Name of the model to create
             device: Device to run the model on
-            
+
         Returns:
             BaseEmbeddingModel instance
         """
         model_type = determine_model_type(model_name)
-        
+
         if model_type == "esmc":
             return ESMCEmbeddingModel(model_name, device)
         elif model_type == "esm3":
@@ -48,7 +47,7 @@ def create_model(
             return ProtT5EmbeddingModel(model_name, device)
         else:  # Default to ESM-2
             return ESM2EmbeddingModel(model_name, device)
-    
+
     @staticmethod
     def load_model_and_tokenizer(
         model_name: str,
@@ -56,18 +55,18 @@ def load_model_and_tokenizer(
     ) -> Tuple[Union[Any, DataParallel[Module]], Union[Any, None], torch.device]:
         """
         Load model and tokenizer using the factory pattern.
-        
+
         This method maintains compatibility with the original function signature
         while using the new OOP structure internally.
-        
+
         Args:
             model_name: The model name
             device: The specific GPU device
-            
+
         Returns:
             Tuple: (model, tokenizer, device)
         """
         embedding_model = ModelFactory.create_model(model_name, device)
         model, tokenizer = embedding_model.load_model()
-        
-        return model, tokenizer, device 
\ No newline at end of file
+
+        return model, tokenizer, device
diff --git a/src/pyeed/embeddings/models/__init__.py b/src/pyeed/embeddings/models/__init__.py
index 1d2a7134..fa7b5006 100644
--- a/src/pyeed/embeddings/models/__init__.py
+++ b/src/pyeed/embeddings/models/__init__.py
@@ -10,8 +10,8 @@
 from .prott5 import ProtT5EmbeddingModel
 
 __all__ = [
-    'ESM2EmbeddingModel',
-    'ESMCEmbeddingModel', 
-    'ESM3EmbeddingModel',
-    'ProtT5EmbeddingModel',
-] 
\ No newline at end of file
+    "ESM2EmbeddingModel",
+    "ESMCEmbeddingModel",
+    "ESM3EmbeddingModel",
+    "ProtT5EmbeddingModel",
+]
diff --git a/src/pyeed/embeddings/models/esm2.py b/src/pyeed/embeddings/models/esm2.py
index b3d0068d..2da08b66 100644
--- a/src/pyeed/embeddings/models/esm2.py
+++ b/src/pyeed/embeddings/models/esm2.py
@@ -15,52 +15,50 @@
 
 class ESM2EmbeddingModel(BaseEmbeddingModel):
     """ESM-2 model implementation."""
-    
+
     def __init__(self, model_name: str, device: torch.device):
         super().__init__(model_name, device)
-    
+
     def load_model(self) -> Tuple[EsmModel, EsmTokenizer]:
         """Load ESM-2 model and tokenizer."""
         token = get_hf_token()
-        
+
         full_model_name = (
             self.model_name
             if self.model_name.startswith("facebook/")
             else f"facebook/{self.model_name}"
         )
-        
+
         model = EsmModel.from_pretrained(full_model_name, use_auth_token=token)
         tokenizer = EsmTokenizer.from_pretrained(full_model_name, use_auth_token=token)
-        
+
         # Move to device
         model = model.to(self.device)
-        
+
         self.model = model
         self.tokenizer = tokenizer
-        
+
         return model, tokenizer
-    
+
     def preprocess_sequence(self, sequence: str) -> str:
         """ESM-2 doesn't need special preprocessing."""
         return sequence
-    
+
     def get_batch_embeddings(
-        self, 
-        sequences: List[str], 
-        pool_embeddings: bool = True
+        self, sequences: List[str], pool_embeddings: bool = True
     ) -> List[NDArray[np.float64]]:
         """Get embeddings for a batch of sequences using ESM-2."""
         if self.model is None or self.tokenizer is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows they're not None
         model = cast(EsmModel, self.model)
         tokenizer = cast(EsmTokenizer, self.tokenizer)
-        
+
         inputs = tokenizer(
             sequences, padding=True, truncation=True, return_tensors="pt"
         ).to(self.device)
-        
+
         with torch.no_grad():
             outputs = model(**inputs, output_hidden_states=True)
 
@@ -71,48 +69,44 @@ def get_batch_embeddings(
             # Mean pooling across sequence length
             return [embedding.mean(axis=0) for embedding in hidden_states]
         return list(hidden_states)
-    
+
     def get_single_embedding_last_hidden_state(
-        self, 
-        sequence: str
+        self, sequence: str
     ) -> NDArray[np.float64]:
         """Get last hidden state embedding for a single sequence."""
         if self.model is None or self.tokenizer is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows they're not None
         model = cast(EsmModel, self.model)
         tokenizer = cast(EsmTokenizer, self.tokenizer)
-        
+
         inputs = tokenizer(sequence, return_tensors="pt").to(self.device)
-        
+
         with torch.no_grad():
             outputs = model(**inputs)
-        
+
         # Remove batch dimension and special tokens ([CLS] and [SEP])
         embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy()
         return np.asarray(embedding, dtype=np.float64)
-    
-    def get_single_embedding_all_layers(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_single_embedding_all_layers(self, sequence: str) -> NDArray[np.float64]:
         """Get embeddings from all layers for a single sequence."""
         if self.model is None or self.tokenizer is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows they're not None
         model = cast(EsmModel, self.model)
         tokenizer = cast(EsmTokenizer, self.tokenizer)
-        
+
         inputs = tokenizer(sequence, return_tensors="pt").to(self.device)
-        
+
         with torch.no_grad():
             outputs = model(**inputs, output_hidden_states=True)
-        
+
         embeddings_list = []
         hidden_states = outputs.hidden_states  # Tuple: (layer0, layer1, ..., layerN)
-        
+
         for layer_tensor in hidden_states:
             # Remove batch dimension and special tokens ([CLS] and [SEP])
             emb = layer_tensor[0, 1:-1, :].detach().cpu().numpy()
@@ -120,35 +114,29 @@ def get_single_embedding_all_layers(
             embeddings_list.append(emb)
 
         return np.array(embeddings_list)
-    
-    def get_single_embedding_first_layer(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_single_embedding_first_layer(self, sequence: str) -> NDArray[np.float64]:
         """Get first layer embedding for a single sequence."""
         if self.model is None or self.tokenizer is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows they're not None
         model = cast(EsmModel, self.model)
         tokenizer = cast(EsmTokenizer, self.tokenizer)
-        
+
         inputs = tokenizer(sequence, return_tensors="pt").to(self.device)
-        
+
         with torch.no_grad():
             outputs = model(**inputs, output_hidden_states=True)
-        
+
         # Get the first layer's hidden states for all residues (excluding special tokens)
         embedding = outputs.hidden_states[0][0, 1:-1, :].detach().cpu().numpy()
-        
+
         # Normalize the embedding
         embedding = normalize_embedding(embedding)
         return embedding
 
-    def get_final_embeddings(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+    def get_final_embeddings(self, sequence: str) -> NDArray[np.float64]:
         """
         Get final embeddings for ESM2 with robust fallback.
         """
@@ -159,4 +147,4 @@ def get_final_embeddings(
             else:
                 raise ValueError("Batch embeddings method returned empty results")
         except Exception as e:
-            raise ValueError(f"ESM2 embedding extraction failed: {e}") 
\ No newline at end of file
+            raise ValueError(f"ESM2 embedding extraction failed: {e}")
diff --git a/src/pyeed/embeddings/models/esm3.py b/src/pyeed/embeddings/models/esm3.py
index e6aca8b3..062df27b 100644
--- a/src/pyeed/embeddings/models/esm3.py
+++ b/src/pyeed/embeddings/models/esm3.py
@@ -15,35 +15,33 @@
 
 class ESM3EmbeddingModel(BaseEmbeddingModel):
     """ESM-3 model implementation."""
-    
+
     def __init__(self, model_name: str, device: torch.device):
         super().__init__(model_name, device)
-    
+
     def load_model(self) -> Tuple[ESM3, None]:
         """Load ESM3 model."""
         model = ESM3.from_pretrained("esm3_sm_open_v1")
         model = model.to(self.device)
-        
+
         self.model = model
-        
+
         return model, None
-    
+
     def preprocess_sequence(self, sequence: str) -> ESMProtein:
         """ESM3 uses ESMProtein objects."""
         return ESMProtein(sequence=sequence)
-    
+
     def get_batch_embeddings(
-        self, 
-        sequences: List[str], 
-        pool_embeddings: bool = True
+        self, sequences: List[str], pool_embeddings: bool = True
     ) -> List[NDArray[np.float64]]:
         """Get embeddings for a batch of sequences using ESM3."""
         if self.model is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows it's not None
         model = cast(ESM3, self.model)
-        
+
         embedding_list = []
         with torch.no_grad():
             for sequence in sequences:
@@ -62,18 +60,17 @@ def get_batch_embeddings(
                     embeddings = embeddings.mean(axis=0)
                 embedding_list.append(embeddings)
         return embedding_list
-    
+
     def get_single_embedding_last_hidden_state(
-        self, 
-        sequence: str
+        self, sequence: str
     ) -> NDArray[np.float64]:
         """Get last hidden state embedding for a single sequence."""
         if self.model is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows it's not None
         model = cast(ESM3, self.model)
-        
+
         with torch.no_grad():
             protein = self.preprocess_sequence(sequence)
             sequence_encoding = model.encode(protein)
@@ -88,20 +85,17 @@ def get_single_embedding_last_hidden_state(
         # Normalize the embedding
         embedding = normalize_embedding(embedding)
         return embedding
-    
-    def get_single_embedding_all_layers(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_single_embedding_all_layers(self, sequence: str) -> NDArray[np.float64]:
         """Get embeddings from all layers for a single sequence."""
         # ESM3 doesn't support all layers extraction in the same way
         # This is a simplified implementation - might need enhancement based on ESM3 capabilities
         if self.model is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows it's not None
         model = cast(ESM3, self.model)
-        
+
         with torch.no_grad():
             protein = self.preprocess_sequence(sequence)
             sequence_encoding = model.encode(protein)
@@ -111,7 +105,7 @@ def get_single_embedding_all_layers(
             )
             if result is None or result.per_residue_embedding is None:
                 raise ValueError("Model did not return embeddings")
-            
+
             # For ESM3, we return the per-residue embedding as a single layer
             # This might need adjustment based on actual ESM3 API capabilities
             embedding = result.per_residue_embedding.to(torch.float32).cpu().numpy()
@@ -119,19 +113,16 @@ def get_single_embedding_all_layers(
 
         # Return as a single layer array for consistency with other models
         return np.array([embedding])
-    
-    def get_single_embedding_first_layer(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_single_embedding_first_layer(self, sequence: str) -> NDArray[np.float64]:
         """Get first layer embedding for a single sequence."""
         # For ESM3, this is the same as the per-residue embedding
         if self.model is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows it's not None
         model = cast(ESM3, self.model)
-        
+
         with torch.no_grad():
             protein = self.preprocess_sequence(sequence)
             sequence_encoding = model.encode(protein)
@@ -145,12 +136,9 @@ def get_single_embedding_first_layer(
 
         # Normalize the embedding
         embedding = normalize_embedding(embedding)
-        return embedding 
-    
-    def get_final_embeddings(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+        return embedding
+
+    def get_final_embeddings(self, sequence: str) -> NDArray[np.float64]:
         """
         Get final embeddings for ESM3 with robust fallback.
         """
@@ -171,8 +159,8 @@ def get_final_embeddings(
                         protein = self.preprocess_sequence(sequence)
                         protein_tensor = model.encode(protein)
                         logits_output = model.logits(
-                            protein_tensor, 
-                            LogitsConfig(sequence=True, return_embeddings=True)
+                            protein_tensor,
+                            LogitsConfig(sequence=True, return_embeddings=True),
                         )
                         if logits_output.embeddings is None:
                             raise ValueError("Model did not return embeddings")
@@ -180,8 +168,10 @@ def get_final_embeddings(
                         pooled_embedding = embeddings.mean(axis=1)[0]
                         return np.asarray(pooled_embedding, dtype=np.float64)
                 except Exception as minimal_error:
-                    raise ValueError(f"ESM3 embedding extraction failed with OOM: {minimal_error}")
+                    raise ValueError(
+                        f"ESM3 embedding extraction failed with OOM: {minimal_error}"
+                    )
             else:
                 raise e
         except Exception as e:
-            raise ValueError(f"ESM3 embedding extraction failed: {e}") 
\ No newline at end of file
+            raise ValueError(f"ESM3 embedding extraction failed: {e}")
diff --git a/src/pyeed/embeddings/models/esmc.py b/src/pyeed/embeddings/models/esmc.py
index 4256bd63..1eddad4e 100644
--- a/src/pyeed/embeddings/models/esmc.py
+++ b/src/pyeed/embeddings/models/esmc.py
@@ -16,73 +16,79 @@
 
 class ESMCEmbeddingModel(BaseEmbeddingModel):
     """ESMC model implementation."""
-    
+
     def __init__(self, model_name: str, device: torch.device):
         super().__init__(model_name, device)
-    
+
     def load_model(self) -> Tuple[ESMC, None]:
         """Load ESMC model with improved error handling."""
         try:
             # Try to disable tqdm to avoid threading issues
             import os
-            os.environ['DISABLE_TQDM'] = 'True'
-            
+
+            os.environ["DISABLE_TQDM"] = "True"
+
             model = ESMC.from_pretrained(self.model_name)
             model = model.to(self.device)
-            
+
             self.model = model
-            
+
             return model, None
-            
+
         except Exception as e:
             if "tqdm" in str(e).lower() or "_lock" in str(e).lower():
-                logger.warning(f"ESMC model loading failed due to tqdm threading issue: {e}. Retrying with threading workaround...")
-                
+                logger.warning(
+                    f"ESMC model loading failed due to tqdm threading issue: {e}. Retrying with threading workaround..."
+                )
+
                 # Try alternative approach with threading lock
                 import time
-                
+
                 # Add a small delay and retry
-                time.sleep(0.1 + torch.cuda.current_device() * 0.05)  # Staggered delay per GPU
-                
+                time.sleep(
+                    0.1 + torch.cuda.current_device() * 0.05
+                )  # Staggered delay per GPU
+
                 try:
                     # Try importing tqdm and resetting its state
                     try:
                         import tqdm
-                        if hasattr(tqdm.tqdm, '_lock'):
-                            delattr(tqdm.tqdm, '_lock')
+
+                        if hasattr(tqdm.tqdm, "_lock"):
+                            delattr(tqdm.tqdm, "_lock")
                     except (AttributeError, ImportError):
                         pass
-                    
+
                     model = ESMC.from_pretrained(self.model_name)
                     model = model.to(self.device)
-                    
+
                     self.model = model
-                    
+
                     return model, None
-                    
+
                 except Exception as retry_error:
-                    logger.error(f"ESMC model loading failed even after retry: {retry_error}")
+                    logger.error(
+                        f"ESMC model loading failed even after retry: {retry_error}"
+                    )
                     raise retry_error
             else:
                 logger.error(f"ESMC model loading failed: {e}")
                 raise e
-    
+
     def preprocess_sequence(self, sequence: str) -> ESMProtein:
         """ESMC uses ESMProtein objects."""
         return ESMProtein(sequence=sequence)
-    
+
     def get_batch_embeddings(
-        self, 
-        sequences: List[str], 
-        pool_embeddings: bool = True
+        self, sequences: List[str], pool_embeddings: bool = True
     ) -> List[NDArray[np.float64]]:
         """Get embeddings for a batch of sequences using ESMC."""
         if self.model is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows it's not None
         model = cast(ESMC, self.model)
-        
+
         embedding_list = []
         with torch.no_grad():
             for sequence in sequences:
@@ -103,18 +109,17 @@ def get_batch_embeddings(
                     embeddings = embeddings.mean(axis=1)
                 embedding_list.append(embeddings[0])
         return embedding_list
-    
+
     def get_single_embedding_last_hidden_state(
-        self, 
-        sequence: str
+        self, sequence: str
     ) -> NDArray[np.float64]:
         """Get last hidden state embedding for a single sequence."""
         if self.model is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows it's not None
         model = cast(ESMC, self.model)
-        
+
         with torch.no_grad():
             protein = self.preprocess_sequence(sequence)
             protein_tensor = model.encode(protein)
@@ -140,18 +145,15 @@ def get_single_embedding_last_hidden_state(
         # Normalize the embedding
         embedding = normalize_embedding(embedding)
         return embedding
-    
-    def get_single_embedding_all_layers(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_single_embedding_all_layers(self, sequence: str) -> NDArray[np.float64]:
         """Get embeddings from all layers for a single sequence."""
         if self.model is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows it's not None
         model = cast(ESMC, self.model)
-        
+
         embeddings_list = []
         with torch.no_grad():
             protein = self.preprocess_sequence(sequence)
@@ -179,18 +181,15 @@ def get_single_embedding_all_layers(
                 embeddings_list.append(emb)
 
         return np.array(embeddings_list)
-    
-    def get_single_embedding_first_layer(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_single_embedding_first_layer(self, sequence: str) -> NDArray[np.float64]:
         """Get first layer embedding for a single sequence."""
         if self.model is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows it's not None
         model = cast(ESMC, self.model)
-        
+
         with torch.no_grad():
             protein = self.preprocess_sequence(sequence)
             protein_tensor = model.encode(protein)
@@ -212,15 +211,12 @@ def get_single_embedding_first_layer(
 
         # Normalize the embedding
         embedding = normalize_embedding(embedding)
-        return embedding 
-    
-    def get_final_embeddings(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+        return embedding
+
+    def get_final_embeddings(self, sequence: str) -> NDArray[np.float64]:
         """
         Get final embeddings for ESMC with robust fallback.
-        
+
         Provides a more robust embedding extraction that prioritizes
         batch embeddings (properly pooled) over last hidden state.
         """
@@ -233,40 +229,46 @@ def get_final_embeddings(
                 raise ValueError("Batch embeddings method returned empty results")
         except (torch.cuda.OutOfMemoryError, RuntimeError) as e:
             if "out of memory" in str(e).lower():
-                logger.warning(f"Batch embeddings method failed due to OOM for ESMC: {e}. Clearing cache and trying minimal approach.")
+                logger.warning(
+                    f"Batch embeddings method failed due to OOM for ESMC: {e}. Clearing cache and trying minimal approach."
+                )
                 # Clear cache and try a more memory-efficient approach
                 torch.cuda.empty_cache()
                 try:
                     # Minimal approach - just get embeddings without requesting hidden states
                     if self.model is None:
                         self.load_model()
-                    
+
                     model = cast(ESMC, self.model)
-                    
+
                     with torch.no_grad():
                         protein = self.preprocess_sequence(sequence)
                         protein_tensor = model.encode(protein)
                         logits_output = model.logits(
-                            protein_tensor, 
-                            LogitsConfig(sequence=True, return_embeddings=True)
+                            protein_tensor,
+                            LogitsConfig(sequence=True, return_embeddings=True),
                         )
                         if logits_output.embeddings is None:
                             raise ValueError("Model did not return embeddings")
-                        
+
                         # Get embeddings and pool them properly
                         embeddings = logits_output.embeddings.cpu().numpy()
                         logger.info(f"Embeddings shape: {embeddings.shape}")
-                        
+
                         # Pool across sequence dimension to get single vector
                         pooled_embedding = embeddings.mean(axis=1)[0]
-                        
+
                         return np.asarray(pooled_embedding, dtype=np.float64)
-                        
+
                 except Exception as minimal_error:
-                    logger.error(f"Minimal embedding extraction also failed for ESMC: {minimal_error}")
-                    raise ValueError(f"ESMC embedding extraction failed with OOM: {minimal_error}")
+                    logger.error(
+                        f"Minimal embedding extraction also failed for ESMC: {minimal_error}"
+                    )
+                    raise ValueError(
+                        f"ESMC embedding extraction failed with OOM: {minimal_error}"
+                    )
             else:
                 raise e
         except Exception as e:
             logger.error(f"All embedding extraction methods failed for ESMC: {e}")
-            raise ValueError(f"ESMC embedding extraction failed: {e}") 
\ No newline at end of file
+            raise ValueError(f"ESMC embedding extraction failed: {e}")
diff --git a/src/pyeed/embeddings/models/prott5.py b/src/pyeed/embeddings/models/prott5.py
index a9b3e6c3..5e4c996e 100644
--- a/src/pyeed/embeddings/models/prott5.py
+++ b/src/pyeed/embeddings/models/prott5.py
@@ -15,81 +15,79 @@
 
 class ProtT5EmbeddingModel(BaseEmbeddingModel):
     """ProtT5 model implementation."""
-    
+
     def __init__(self, model_name: str, device: torch.device):
         super().__init__(model_name, device)
-    
+
     def load_model(self) -> Tuple[T5Model, T5Tokenizer]:
         """Load ProtT5 model and tokenizer."""
         token = get_hf_token()
-        
+
         full_model_name = (
             self.model_name
             if self.model_name.startswith("Rostlab/")
             else f"Rostlab/{self.model_name}"
         )
-        
+
         model = T5Model.from_pretrained(full_model_name, use_auth_token=token)
         tokenizer = T5Tokenizer.from_pretrained(
             full_model_name, use_auth_token=token, do_lower_case=False
         )
-        
+
         # Move to device
         model = model.to(self.device)
-        
+
         self.model = model
         self.tokenizer = tokenizer
-        
+
         return model, tokenizer
-    
+
     def preprocess_sequence(self, sequence: str) -> str:
         """ProtT5 needs space-separated sequences with rare AAs mapped to X."""
         return preprocess_sequence_for_prott5(sequence)
-    
+
     def get_batch_embeddings(
-        self, 
-        sequences: List[str], 
-        pool_embeddings: bool = True
+        self, sequences: List[str], pool_embeddings: bool = True
     ) -> List[NDArray[np.float64]]:
         """Get embeddings for a batch of sequences using ProtT5."""
         if self.model is None or self.tokenizer is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows they're not None
         model = cast(T5Model, self.model)
         tokenizer = cast(T5Tokenizer, self.tokenizer)
-        
+
         # Preprocess sequences for ProtT5
         processed_sequences = [self.preprocess_sequence(seq) for seq in sequences]
-        
+
         inputs = tokenizer.batch_encode_plus(
-            processed_sequences, 
-            add_special_tokens=True, 
+            processed_sequences,
+            add_special_tokens=True,
             padding="longest",
-            return_tensors="pt"
+            return_tensors="pt",
         )
-        
+
         # Move inputs to device
-        input_ids = inputs['input_ids'].to(self.device)
-        attention_mask = inputs['attention_mask'].to(self.device)
-        
+        input_ids = inputs["input_ids"].to(self.device)
+        attention_mask = inputs["attention_mask"].to(self.device)
+
         with torch.no_grad():
             # For ProtT5, use encoder embeddings for feature extraction
             # Create dummy decoder inputs (just the pad token)
             batch_size = input_ids.shape[0]
             decoder_input_ids = torch.full(
-                (batch_size, 1), 
-                tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0, 
+                (batch_size, 1),
+                tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0,
                 dtype=torch.long,
-                device=self.device
+                device=self.device,
             )
-            
+
             outputs = model(
-                input_ids=input_ids, 
+                input_ids=input_ids,
                 attention_mask=attention_mask,
-                decoder_input_ids=decoder_input_ids
+                decoder_input_ids=decoder_input_ids,
             )
-            
+
             # Get encoder last hidden state (encoder embeddings)
             hidden_states = outputs.encoder_last_hidden_state.cpu().numpy()
 
@@ -105,86 +103,78 @@ def get_batch_embeddings(
                 embedding_list.append(pooled_embedding)
             return embedding_list
         return list(hidden_states)
-    
+
     def get_single_embedding_last_hidden_state(
-        self, 
-        sequence: str
+        self, sequence: str
     ) -> NDArray[np.float64]:
         """Get last hidden state embedding for a single sequence."""
         if self.model is None or self.tokenizer is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows they're not None
         model = cast(T5Model, self.model)
         tokenizer = cast(T5Tokenizer, self.tokenizer)
-        
+
         processed_sequence = self.preprocess_sequence(sequence)
         inputs = tokenizer.encode_plus(
-            processed_sequence,
-            add_special_tokens=True,
-            return_tensors="pt"
+            processed_sequence, add_special_tokens=True, return_tensors="pt"
         )
-        
-        input_ids = inputs['input_ids'].to(self.device)
-        attention_mask = inputs['attention_mask'].to(self.device)
-        
+
+        input_ids = inputs["input_ids"].to(self.device)
+        attention_mask = inputs["attention_mask"].to(self.device)
+
         # Create dummy decoder inputs
         decoder_input_ids = torch.full(
-            (1, 1), 
-            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0, 
+            (1, 1),
+            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0,
             dtype=torch.long,
-            device=self.device
+            device=self.device,
         )
-        
+
         with torch.no_grad():
             outputs = model(
-                input_ids=input_ids, 
+                input_ids=input_ids,
                 attention_mask=attention_mask,
-                decoder_input_ids=decoder_input_ids
+                decoder_input_ids=decoder_input_ids,
             )
-        
+
         # Get encoder last hidden state including special tokens
         embedding = outputs.encoder_last_hidden_state[0].detach().cpu().numpy()
         return np.asarray(embedding, dtype=np.float64)
-    
-    def get_single_embedding_all_layers(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_single_embedding_all_layers(self, sequence: str) -> NDArray[np.float64]:
         """Get embeddings from all layers for a single sequence."""
         if self.model is None or self.tokenizer is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows they're not None
         model = cast(T5Model, self.model)
         tokenizer = cast(T5Tokenizer, self.tokenizer)
-        
+
         processed_sequence = self.preprocess_sequence(sequence)
         inputs = tokenizer.encode_plus(
-            processed_sequence,
-            add_special_tokens=True,
-            return_tensors="pt"
+            processed_sequence, add_special_tokens=True, return_tensors="pt"
         )
-        
-        input_ids = inputs['input_ids'].to(self.device)
-        attention_mask = inputs['attention_mask'].to(self.device)
-        
+
+        input_ids = inputs["input_ids"].to(self.device)
+        attention_mask = inputs["attention_mask"].to(self.device)
+
         # Create dummy decoder inputs
         decoder_input_ids = torch.full(
-            (1, 1), 
-            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0, 
+            (1, 1),
+            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0,
             dtype=torch.long,
-            device=self.device
+            device=self.device,
         )
-        
+
         with torch.no_grad():
             outputs = model(
-                input_ids=input_ids, 
+                input_ids=input_ids,
                 attention_mask=attention_mask,
                 decoder_input_ids=decoder_input_ids,
-                output_hidden_states=True
+                output_hidden_states=True,
             )
-        
+
         embeddings_list = []
         # Get all encoder hidden states
         encoder_hidden_states = outputs.encoder_hidden_states
@@ -195,56 +185,48 @@ def get_single_embedding_all_layers(
             embeddings_list.append(emb)
 
         return np.array(embeddings_list)
-    
-    def get_single_embedding_first_layer(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_single_embedding_first_layer(self, sequence: str) -> NDArray[np.float64]:
         """Get first layer embedding for a single sequence."""
         if self.model is None or self.tokenizer is None:
             self.load_model()
-        
+
         # Type cast to ensure type checker knows they're not None
         model = cast(T5Model, self.model)
         tokenizer = cast(T5Tokenizer, self.tokenizer)
-        
+
         processed_sequence = self.preprocess_sequence(sequence)
         inputs = tokenizer.encode_plus(
-            processed_sequence,
-            add_special_tokens=True,
-            return_tensors="pt"
+            processed_sequence, add_special_tokens=True, return_tensors="pt"
         )
-        
-        input_ids = inputs['input_ids'].to(self.device)
-        attention_mask = inputs['attention_mask'].to(self.device)
-        
+
+        input_ids = inputs["input_ids"].to(self.device)
+        attention_mask = inputs["attention_mask"].to(self.device)
+
         # Create dummy decoder inputs
         decoder_input_ids = torch.full(
-            (1, 1), 
-            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0, 
+            (1, 1),
+            tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0,
             dtype=torch.long,
-            device=self.device
+            device=self.device,
         )
-        
+
         with torch.no_grad():
             outputs = model(
-                input_ids=input_ids, 
+                input_ids=input_ids,
                 attention_mask=attention_mask,
                 decoder_input_ids=decoder_input_ids,
-                output_hidden_states=True
+                output_hidden_states=True,
             )
-        
+
         # Get first encoder hidden state including special tokens
         embedding = outputs.encoder_hidden_states[0][0].detach().cpu().numpy()
-        
+
         # Normalize the embedding
         embedding = normalize_embedding(embedding)
         return embedding
-    
-    def get_final_embeddings(
-        self, 
-        sequence: str
-    ) -> NDArray[np.float64]:
+
+    def get_final_embeddings(self, sequence: str) -> NDArray[np.float64]:
         """
         Get final embeddings for ProtT5 with robust fallback.
         """
@@ -255,4 +237,4 @@ def get_final_embeddings(
             else:
                 raise ValueError("Batch embeddings method returned empty results")
         except Exception as e:
-            raise ValueError(f"ProtT5 embedding extraction failed: {e}") 
\ No newline at end of file
+            raise ValueError(f"ProtT5 embedding extraction failed: {e}")
diff --git a/src/pyeed/embeddings/processor.py b/src/pyeed/embeddings/processor.py
index 1433b323..a816d874 100644
--- a/src/pyeed/embeddings/processor.py
+++ b/src/pyeed/embeddings/processor.py
@@ -28,16 +28,16 @@
 class EmbeddingProcessor:
     """
     Main processor for handling protein embedding operations.
-    
+
     Automatically manages device selection, model loading, and provides
     simplified interfaces for all embedding operations.
     """
-    
+
     def __init__(self) -> None:
         self._models: Dict[str, BaseEmbeddingModel] = {}
         self._devices: List[torch.device] = []
         self._initialize_devices()
-    
+
     def _initialize_devices(self) -> None:
         """Initialize available devices for computation."""
         if torch.cuda.is_available():
@@ -47,26 +47,24 @@ def _initialize_devices(self) -> None:
         else:
             self._devices = [torch.device("cpu")]
             logger.warning("No GPU available, using CPU.")
-    
+
     def get_available_devices(self) -> List[torch.device]:
         """Get list of available devices."""
         return self._devices.copy()
-    
+
     def get_or_create_model(
-        self, 
-        model_name: str, 
-        device: Optional[torch.device] = None
+        self, model_name: str, device: Optional[torch.device] = None
     ) -> BaseEmbeddingModel:
         """Get existing model or create new one on specified or best available device."""
         if device is None:
             device = self._devices[0]  # Use first available device
-        
+
         key = f"{model_name}_{device}"
         if key not in self._models:
             self._models[key] = ModelFactory.create_model(model_name, device)
             logger.info(f"Loaded model {model_name} on {device}")
         return self._models[key]
-    
+
     def calculate_batch_embeddings(
         self,
         data: List[tuple[str, str]],
@@ -74,11 +72,13 @@ def calculate_batch_embeddings(
         batch_size: int = 16,
         num_gpus: Optional[int] = None,
         db: Optional[DatabaseConnector] = None,
-        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "last_hidden_state"
+        embedding_type: Literal[
+            "last_hidden_state", "all_layers", "first_layer", "final_embeddings"
+        ] = "last_hidden_state",
     ) -> Optional[List[NDArray[np.float64]]]:
         """
         Calculate embeddings for a batch of sequences with automatic device management.
-        
+
         Args:
             data: List of (accession_id, sequence) tuples
             model_name: Name of the model to use
@@ -87,35 +87,35 @@ def calculate_batch_embeddings(
             db: Database connector for storing results (optional)
             embedding_type: Type of embedding to calculate:
                 - "last_hidden_state": Use last hidden state (most common)
-                - "all_layers": Average across all transformer layers  
+                - "all_layers": Average across all transformer layers
                 - "first_layer": Use first layer embedding
                 - "final_embeddings": Robust option that works across all models (recommended for compatibility)
-            
+
         Returns:
             List of embeddings if db is None, otherwise None (results stored in DB)
         """
         # Disable tqdm to prevent threading issues with multiple GPUs
-        os.environ['DISABLE_TQDM'] = 'True'
-        
+        os.environ["DISABLE_TQDM"] = "True"
+
         if not data:
             logger.info("No sequences to process.")
             return []
-        
+
         # Determine number of GPUs to use
-        available_gpus = len([d for d in self._devices if d.type == 'cuda'])
+        available_gpus = len([d for d in self._devices if d.type == "cuda"])
         if num_gpus is None:
             num_gpus = available_gpus
         else:
             num_gpus = min(num_gpus, available_gpus)
-        
+
         if num_gpus == 0:
             devices_to_use = [torch.device("cpu")]
             num_gpus = 1
         else:
             devices_to_use = [torch.device(f"cuda:{i}") for i in range(num_gpus)]
-        
+
         logger.info(f"Processing {len(data)} sequences using {num_gpus} device(s)")
-        
+
         # Load models for each device
         models = []
         for device in devices_to_use:
@@ -124,7 +124,9 @@ def calculate_batch_embeddings(
                 models.append(model)
             except Exception as e:
                 if "tqdm" in str(e).lower() or "_lock" in str(e).lower():
-                    logger.warning(f"Model loading failed on {device} due to threading issue. Reducing to single GPU mode.")
+                    logger.warning(
+                        f"Model loading failed on {device} due to threading issue. Reducing to single GPU mode."
+                    )
                     # Fall back to single GPU mode to avoid threading issues
                     devices_to_use = [devices_to_use[0]]
                     num_gpus = 1
@@ -132,15 +134,13 @@ def calculate_batch_embeddings(
                     break
                 else:
                     raise e
-        
+
         # Split data across devices
-        gpu_batches = [
-            data[i::num_gpus] for i in range(num_gpus)
-        ]
-        
+        gpu_batches = [data[i::num_gpus] for i in range(num_gpus)]
+
         start_time = time.time()
         all_embeddings = []
-        
+
         if num_gpus == 1:
             # Single device processing
             embeddings = self._process_batch_single_device(
@@ -154,7 +154,7 @@ def calculate_batch_embeddings(
                 for i, gpu_data in enumerate(gpu_batches):
                     if not gpu_data:
                         continue
-                    
+
                     futures.append(
                         executor.submit(
                             self._process_batch_single_device,
@@ -162,37 +162,39 @@ def calculate_batch_embeddings(
                             models[i],
                             batch_size,
                             db,
-                            embedding_type
+                            embedding_type,
                         )
                     )
-                
+
                 for future in futures:
                     embeddings = future.result()
                     all_embeddings.extend(embeddings)
-        
+
         end_time = time.time()
-        logger.info(f"Batch processing completed in {end_time - start_time:.2f} seconds")
-        
+        logger.info(
+            f"Batch processing completed in {end_time - start_time:.2f} seconds"
+        )
+
         return all_embeddings if db is None else None
-    
+
     def _process_batch_single_device(
         self,
         data: List[tuple[str, str]],
         model: BaseEmbeddingModel,
         batch_size: int,
         db: Optional[DatabaseConnector] = None,
-        embedding_type: str = "last_hidden_state"
+        embedding_type: str = "last_hidden_state",
     ) -> List[NDArray[np.float64]]:
         """Process batch on a single device."""
         all_embeddings = []
-        
+
         for batch_start in range(0, len(data), batch_size):
             batch_end = min(batch_start + batch_size, len(data))
             batch = data[batch_start:batch_end]
-            
+
             accessions, sequences = zip(*batch)
             current_batch_size = len(sequences)
-            
+
             while current_batch_size > 0:
                 try:
                     # Calculate embeddings based on type
@@ -219,44 +221,48 @@ def _process_batch_single_device(
                         ]
                     else:
                         raise ValueError(f"Unknown embedding_type: {embedding_type}")
-                    
+
                     # Store in database if provided
                     if db is not None:
                         update_protein_embeddings_in_db(
                             db, list(accessions[:current_batch_size]), embeddings_batch
                         )
-                    
+
                     all_embeddings.extend(embeddings_batch)
                     break  # Successful execution
-                
+
                 except torch.cuda.OutOfMemoryError:
                     torch.cuda.empty_cache()
                     current_batch_size = max(1, current_batch_size // 2)
-                    logger.warning(f"Reduced batch size to {current_batch_size} due to OOM error.")
-        
+                    logger.warning(
+                        f"Reduced batch size to {current_batch_size} due to OOM error."
+                    )
+
         return all_embeddings
-    
+
     def calculate_single_embedding(
         self,
         sequence: str,
         model_name: str = "facebook/esm2_t33_650M_UR50D",
-        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "last_hidden_state",
-        device: Optional[torch.device] = None
+        embedding_type: Literal[
+            "last_hidden_state", "all_layers", "first_layer", "final_embeddings"
+        ] = "last_hidden_state",
+        device: Optional[torch.device] = None,
     ) -> NDArray[np.float64]:
         """
         Calculate embedding for a single sequence.
-        
+
         Args:
             sequence: Protein sequence
             model_name: Name of the model to use
             embedding_type: Type of embedding to calculate
             device: Specific device to use (optional)
-            
+
         Returns:
             Embedding as numpy array
         """
         model = self.get_or_create_model(model_name, device)
-        
+
         if embedding_type == "last_hidden_state":
             return model.get_single_embedding_last_hidden_state(sequence)
         elif embedding_type == "all_layers":
@@ -267,18 +273,20 @@ def calculate_single_embedding(
             return model.get_final_embeddings(sequence)
         else:
             raise ValueError(f"Unknown embedding_type: {embedding_type}")
-    
+
     def calculate_database_embeddings(
         self,
         db: DatabaseConnector,
         batch_size: int = 16,
         model_name: str = "facebook/esm2_t33_650M_UR50D",
         num_gpus: Optional[int] = None,
-        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "last_hidden_state"
+        embedding_type: Literal[
+            "last_hidden_state", "all_layers", "first_layer", "final_embeddings"
+        ] = "last_hidden_state",
     ) -> None:
         """
         Calculate embeddings for all sequences in database that don't have embeddings.
-        
+
         Args:
             db: Database connector
             batch_size: Batch size for processing
@@ -294,13 +302,13 @@ def calculate_database_embeddings(
         """
         results = db.execute_read(query)
         data = [(result["accession"], result["sequence"]) for result in results]
-        
+
         if not data:
             logger.info("No sequences to process.")
             return
-        
+
         logger.info(f"Found {len(data)} sequences without embeddings")
-        
+
         # Process using batch embedding method
         self.calculate_batch_embeddings(
             data=data,
@@ -308,9 +316,9 @@ def calculate_database_embeddings(
             batch_size=batch_size,
             num_gpus=num_gpus,
             db=db,
-            embedding_type=embedding_type
+            embedding_type=embedding_type,
         )
-    
+
     # Legacy compatibility methods (for backward compatibility with existing processor.py)
     def process_batches_on_gpu(
         self,
@@ -322,19 +330,19 @@ def process_batches_on_gpu(
         device: torch.device,
     ) -> None:
         """Legacy method for backward compatibility."""
-        logger.warning("Using legacy process_batches_on_gpu method. Consider using calculate_batch_embeddings instead.")
-        
+        logger.warning(
+            "Using legacy process_batches_on_gpu method. Consider using calculate_batch_embeddings instead."
+        )
+
         # Convert to new interface
         accessions, sequences = zip(*data)
         embedding_data = list(zip(accessions, sequences))
-        
+
         # Use new method
         self.calculate_batch_embeddings(
-            data=embedding_data,
-            batch_size=batch_size,
-            db=db
+            data=embedding_data, batch_size=batch_size, db=db
         )
-    
+
     def get_batch_embeddings_unified(
         self,
         batch_sequences: List[str],
@@ -345,15 +353,15 @@ def get_batch_embeddings_unified(
     ) -> List[NDArray[np.float64]]:
         """Legacy method for backward compatibility."""
         logger.warning("Using legacy get_batch_embeddings_unified method.")
-        
+
         # Determine model type from the actual model instance
         base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
-        
+
         embedding_model = ESM2EmbeddingModel("", device)
         embedding_model.model = base_model
         embedding_model.tokenizer = tokenizer
         return embedding_model.get_batch_embeddings(batch_sequences, pool_embeddings)
-    
+
     def calculate_single_sequence_embedding_last_hidden_state(
         self,
         sequence: str,
@@ -361,8 +369,10 @@ def calculate_single_sequence_embedding_last_hidden_state(
         model_name: str = "facebook/esm2_t33_650M_UR50D",
     ) -> NDArray[np.float64]:
         """Legacy method for backward compatibility."""
-        return self.calculate_single_embedding(sequence, model_name, "last_hidden_state", device)
-    
+        return self.calculate_single_embedding(
+            sequence, model_name, "last_hidden_state", device
+        )
+
     def calculate_single_sequence_embedding_all_layers(
         self,
         sequence: str,
@@ -370,8 +380,10 @@ def calculate_single_sequence_embedding_all_layers(
         model_name: str = "facebook/esm2_t33_650M_UR50D",
     ) -> NDArray[np.float64]:
         """Legacy method for backward compatibility."""
-        return self.calculate_single_embedding(sequence, model_name, "all_layers", device)
-    
+        return self.calculate_single_embedding(
+            sequence, model_name, "all_layers", device
+        )
+
     def calculate_single_sequence_embedding_first_layer(
         self,
         sequence: str,
@@ -379,57 +391,53 @@ def calculate_single_sequence_embedding_first_layer(
         device: torch.device = torch.device("cuda:0"),
     ) -> NDArray[np.float64]:
         """Legacy method for backward compatibility."""
-        return self.calculate_single_embedding(sequence, model_name, "first_layer", device)
-    
+        return self.calculate_single_embedding(
+            sequence, model_name, "first_layer", device
+        )
+
     def get_single_embedding_last_hidden_state(
-        self, 
-        sequence: str, 
-        model: Any, 
-        tokenizer: Any, 
-        device: torch.device
+        self, sequence: str, model: Any, tokenizer: Any, device: torch.device
     ) -> NDArray[np.float64]:
         """Legacy method for backward compatibility."""
         logger.warning("Using legacy get_single_embedding_last_hidden_state method.")
-        return self._get_single_embedding_legacy(sequence, model, tokenizer, device, "last_hidden_state")
-    
+        return self._get_single_embedding_legacy(
+            sequence, model, tokenizer, device, "last_hidden_state"
+        )
+
     def get_single_embedding_all_layers(
-        self, 
-        sequence: str, 
-        model: Any, 
-        tokenizer: Any, 
-        device: torch.device
+        self, sequence: str, model: Any, tokenizer: Any, device: torch.device
     ) -> NDArray[np.float64]:
         """Legacy method for backward compatibility."""
         logger.warning("Using legacy get_single_embedding_all_layers method.")
-        return self._get_single_embedding_legacy(sequence, model, tokenizer, device, "all_layers")
-    
+        return self._get_single_embedding_legacy(
+            sequence, model, tokenizer, device, "all_layers"
+        )
+
     def get_single_embedding_first_layer(
-        self, 
-        sequence: str, 
-        model: Any, 
-        tokenizer: Any, 
-        device: torch.device
+        self, sequence: str, model: Any, tokenizer: Any, device: torch.device
     ) -> NDArray[np.float64]:
         """Legacy method for backward compatibility."""
         logger.warning("Using legacy get_single_embedding_first_layer method.")
-        return self._get_single_embedding_legacy(sequence, model, tokenizer, device, "first_layer")
-    
+        return self._get_single_embedding_legacy(
+            sequence, model, tokenizer, device, "first_layer"
+        )
+
     def _get_single_embedding_legacy(
-        self, 
-        sequence: str, 
-        model: Any, 
-        tokenizer: Any, 
+        self,
+        sequence: str,
+        model: Any,
+        tokenizer: Any,
         device: torch.device,
-        embedding_type: str
+        embedding_type: str,
     ) -> NDArray[np.float64]:
         """Helper method for legacy single embedding methods."""
         # Determine model type and create appropriate embedding model
         base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
-        
+
         embedding_model = ESM2EmbeddingModel("", device)
         embedding_model.model = base_model
         embedding_model.tokenizer = tokenizer
-        
+
         if embedding_type == "last_hidden_state":
             return embedding_model.get_single_embedding_last_hidden_state(sequence)
         elif embedding_type == "all_layers":
@@ -438,7 +446,7 @@ def _get_single_embedding_legacy(
             return embedding_model.get_single_embedding_first_layer(sequence)
         else:
             raise ValueError(f"Unknown embedding_type: {embedding_type}")
-    
+
     def cleanup(self) -> None:
         """Clean up all models and free memory."""
         for model in self._models.values():
@@ -453,4 +461,4 @@ def cleanup(self) -> None:
 
 def get_processor() -> EmbeddingProcessor:
     """Get the global embedding processor instance."""
-    return _processor 
\ No newline at end of file
+    return _processor
diff --git a/src/pyeed/embeddings/utils.py b/src/pyeed/embeddings/utils.py
index 987e3d11..bda92286 100644
--- a/src/pyeed/embeddings/utils.py
+++ b/src/pyeed/embeddings/utils.py
@@ -1,7 +1,7 @@
 """
 Utility functions for embedding operations.
 
-Contains helper functions for token management, memory management, 
+Contains helper functions for token management, memory management,
 and sequence preprocessing.
 """
 
@@ -33,10 +33,10 @@ def get_hf_token() -> str:
 def preprocess_sequence_for_prott5(sequence: str) -> str:
     """
     Preprocesses a protein sequence for ProtT5 models.
-    
+
     Args:
         sequence: Raw protein sequence
-        
+
     Returns:
         Preprocessed sequence with spaces between amino acids and rare AAs mapped to X
     """
@@ -59,15 +59,15 @@ def free_memory() -> None:
 def determine_model_type(model_name: str) -> str:
     """
     Determine the model type based on model name.
-    
+
     Args:
         model_name: Name of the model
-        
+
     Returns:
         Model type string
     """
     model_name_lower = model_name.lower()
-    
+
     if "esmc" in model_name_lower:
         return "esmc"
     elif "esm3" in model_name_lower:
@@ -75,4 +75,4 @@ def determine_model_type(model_name: str) -> str:
     elif "prot_t5" in model_name_lower or "prott5" in model_name_lower:
         return "prott5"
     else:
-        return "esm2"  # Default to ESM-2 for other facebook/esm models 
\ No newline at end of file
+        return "esm2"  # Default to ESM-2 for other facebook/esm models
diff --git a/src/pyeed/main.py b/src/pyeed/main.py
index 22cdc61c..d5ab048d 100644
--- a/src/pyeed/main.py
+++ b/src/pyeed/main.py
@@ -203,7 +203,9 @@ def calculate_sequence_embeddings(
         batch_size: int = 16,
         model_name: str = "facebook/esm2_t33_650M_UR50D",
         num_gpus: int = 1,  # Number of GPUs to use
-        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "final_embeddings"
+        embedding_type: Literal[
+            "last_hidden_state", "all_layers", "first_layer", "final_embeddings"
+        ] = "final_embeddings",
     ) -> None:
         """
         Calculates embeddings for all sequences in the database that do not have embeddings,
@@ -217,14 +219,14 @@ def calculate_sequence_embeddings(
         """
         # Get the embedding processor
         processor = get_processor()
-        
+
         # Use the simplified interface
         processor.calculate_database_embeddings(
             db=self.db,
             batch_size=batch_size,
             model_name=model_name,
             num_gpus=num_gpus,
-            embedding_type=embedding_type
+            embedding_type=embedding_type,
         )
 
         # free memory
@@ -473,30 +475,30 @@ def calculate_single_sequence_embedding(
         self,
         sequence: str,
         model_name: str = "facebook/esm2_t33_650M_UR50D",
-        embedding_type: Literal["last_hidden_state", "all_layers", "first_layer", "final_embeddings"] = "last_hidden_state"
+        embedding_type: Literal[
+            "last_hidden_state", "all_layers", "first_layer", "final_embeddings"
+        ] = "last_hidden_state",
     ) -> Any:
         """
         Calculate embedding for a single protein sequence.
-        
+
         Args:
             sequence: Protein sequence string
             model_name: Model to use for embedding calculation
             embedding_type: Type of embedding to calculate
-            
+
         Returns:
             Numpy array containing the embedding
         """
         processor = get_processor()
         return processor.calculate_single_embedding(
-            sequence=sequence,
-            model_name=model_name,
-            embedding_type=embedding_type
+            sequence=sequence, model_name=model_name, embedding_type=embedding_type
         )
-    
+
     def get_available_devices(self) -> list[str]:
         """
         Get list of available devices for embedding computation.
-        
+
         Returns:
             List of available device names
         """

From 657ac1568385c408f5f05257ba43cedb51d672ed Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 30 May 2025 11:21:09 +0000
Subject: [PATCH 07/11] update the utils

---
 src/pyeed/embeddings/processor.py | 30 ++++++++++++++++++++++++++++++
 src/pyeed/embeddings/utils.py     | 18 +++++++++++++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/pyeed/embeddings/processor.py b/src/pyeed/embeddings/processor.py
index a816d874..5d9e5326 100644
--- a/src/pyeed/embeddings/processor.py
+++ b/src/pyeed/embeddings/processor.py
@@ -447,12 +447,42 @@ def _get_single_embedding_legacy(
         else:
             raise ValueError(f"Unknown embedding_type: {embedding_type}")
 
+    def remove_model(self, model_name: str, device: Optional[torch.device] = None) -> None:
+        """
+        Remove a specific model from the processor's cache and clean up its resources.
+        
+        Args:
+            model_name: Name of the model to remove
+            device: Specific device the model is on (optional)
+        """
+        if device is None:
+            # Remove model from all devices
+            keys_to_remove = [k for k in self._models.keys() if model_name in k]
+        else:
+            key = f"{model_name}_{device}"
+            keys_to_remove = [key] if key in self._models else []
+
+        for key in keys_to_remove:
+            if key in self._models:
+                # Clean up the model's resources
+                self._models[key].cleanup()
+                del self._models[key]
+                logger.info(f"Removed model {key} from processor cache")
+
+        # Force memory cleanup
+        free_memory()
+
     def cleanup(self) -> None:
         """Clean up all models and free memory."""
         for model in self._models.values():
             model.cleanup()
         self._models.clear()
         free_memory()
+        # Additional cleanup to ensure GPU memory is freed
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.reset_peak_memory_stats()
+            torch.cuda.synchronize()
 
 
 # Global processor instance
diff --git a/src/pyeed/embeddings/utils.py b/src/pyeed/embeddings/utils.py
index bda92286..9d6ac2fc 100644
--- a/src/pyeed/embeddings/utils.py
+++ b/src/pyeed/embeddings/utils.py
@@ -48,12 +48,28 @@ def preprocess_sequence_for_prott5(sequence: str) -> str:
 def free_memory() -> None:
     """
     Frees up memory by invoking garbage collection and clearing GPU caches.
+    This function performs a more thorough cleanup by:
+    1. Running garbage collection multiple times
+    2. Clearing CUDA/MPS caches
+    3. Resetting peak memory stats
+    4. Synchronizing CUDA operations
     """
-    gc.collect()
+    # Run garbage collection multiple times to ensure thorough cleanup
+    for _ in range(3):
+        gc.collect()
+    
     if torch.backends.mps.is_available():
         torch.mps.empty_cache()
     elif torch.cuda.is_available():
+        # Clear CUDA cache
         torch.cuda.empty_cache()
+        # Reset peak memory stats
+        torch.cuda.reset_peak_memory_stats()
+        # Synchronize CUDA operations
+        torch.cuda.synchronize()
+    
+    # Force garbage collection one final time
+    gc.collect()
 
 
 def determine_model_type(model_name: str) -> str:

From 460e8e3a71ffd41d9a71eceb061a5d0e6aaaa583 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Fri, 30 May 2025 11:43:29 +0000
Subject: [PATCH 08/11] ruff

---
 src/pyeed/embeddings/processor.py | 6 ++++--
 src/pyeed/embeddings/utils.py     | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/pyeed/embeddings/processor.py b/src/pyeed/embeddings/processor.py
index 5d9e5326..ab376fea 100644
--- a/src/pyeed/embeddings/processor.py
+++ b/src/pyeed/embeddings/processor.py
@@ -447,10 +447,12 @@ def _get_single_embedding_legacy(
         else:
             raise ValueError(f"Unknown embedding_type: {embedding_type}")
 
-    def remove_model(self, model_name: str, device: Optional[torch.device] = None) -> None:
+    def remove_model(
+        self, model_name: str, device: Optional[torch.device] = None
+    ) -> None:
         """
         Remove a specific model from the processor's cache and clean up its resources.
-        
+
         Args:
             model_name: Name of the model to remove
             device: Specific device the model is on (optional)
diff --git a/src/pyeed/embeddings/utils.py b/src/pyeed/embeddings/utils.py
index 9d6ac2fc..da5e69cd 100644
--- a/src/pyeed/embeddings/utils.py
+++ b/src/pyeed/embeddings/utils.py
@@ -57,7 +57,7 @@ def free_memory() -> None:
     # Run garbage collection multiple times to ensure thorough cleanup
     for _ in range(3):
         gc.collect()
-    
+
     if torch.backends.mps.is_available():
         torch.mps.empty_cache()
     elif torch.cuda.is_available():
@@ -67,7 +67,7 @@ def free_memory() -> None:
         torch.cuda.reset_peak_memory_stats()
         # Synchronize CUDA operations
         torch.cuda.synchronize()
-    
+
     # Force garbage collection one final time
     gc.collect()
 

From d636c049c6e190d162f8ebccc90e19e1cddc31f1 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Tue, 3 Jun 2025 10:22:20 +0000
Subject: [PATCH 09/11] update embeddings for esm no more error

---
 src/pyeed/embedding.py              | 654 ----------------------------
 src/pyeed/embeddings/models/esm2.py |  26 +-
 2 files changed, 15 insertions(+), 665 deletions(-)
 delete mode 100644 src/pyeed/embedding.py

diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
deleted file mode 100644
index fe928935..00000000
--- a/src/pyeed/embedding.py
+++ /dev/null
@@ -1,654 +0,0 @@
-import gc
-import os
-import re
-from typing import Any, List, Tuple, Union
-
-import numpy as np
-import torch
-from esm.models.esm3 import ESM3
-from esm.models.esmc import ESMC
-from esm.sdk.api import ESMProtein, LogitsConfig, SamplingConfig
-from huggingface_hub import HfFolder, login
-from loguru import logger
-from numpy.typing import NDArray
-from torch.nn import DataParallel, Module
-from transformers import EsmModel, EsmTokenizer, T5Model, T5Tokenizer
-
-from pyeed.dbconnect import DatabaseConnector
-
-
-def get_hf_token() -> str:
-    """Get or request Hugging Face token."""
-    if os.getenv("PYTEST_DISABLE_HF_LOGIN"):  # Disable Hugging Face login in tests
-        return "dummy_token_for_tests"
-
-    hf_folder = HfFolder()
-    token = hf_folder.get_token()
-    if not token:
-        login()  # Login returns None, get token after login
-        token = hf_folder.get_token()
-
-    if isinstance(token, str):
-        return token
-    else:
-        raise RuntimeError("Failed to get Hugging Face token")
-
-
-def process_batches_on_gpu(
-    data: list[tuple[str, str]],
-    batch_size: int,
-    model: Union[EsmModel, ESMC, ESM3, T5Model, DataParallel[Module]],
-    tokenizer: Union[EsmTokenizer, T5Tokenizer, None],
-    db: DatabaseConnector,
-    device: torch.device,
-) -> None:
-    """
-    Splits data into batches and processes them on a single GPU.
-
-    Args:
-        data (list): List of (accession_id, sequence) tuples.
-        batch_size (int): Size of each batch.
-        model: The model instance for this GPU.
-        tokenizer: The tokenizer for the model.
-        device (str): The assigned GPU device.
-        db: Database connection.
-    """
-    logger.debug(f"Processing {len(data)} sequences on {device}.")
-
-    model = model.to(device)
-
-    # Split data into smaller batches
-    for batch_start in range(0, len(data), batch_size):
-        batch_end = min(batch_start + batch_size, len(data))
-        batch = data[batch_start:batch_end]
-
-        accessions, sequences = zip(*batch)
-
-        current_batch_size = len(sequences)
-
-        while current_batch_size > 0:
-            try:
-                # Compute embeddings
-                embeddings_batch = get_batch_embeddings(
-                    list(sequences[:current_batch_size]), model, tokenizer, device
-                )
-
-                # Update the database
-                update_protein_embeddings_in_db(
-                    db, list(accessions[:current_batch_size]), embeddings_batch
-                )
-
-                # Move to the next batch
-                break  # Successful execution, move to the next batch
-
-            except torch.cuda.OutOfMemoryError:
-                torch.cuda.empty_cache()
-                current_batch_size = max(
-                    1, current_batch_size // 2
-                )  # Reduce batch size
-                logger.warning(
-                    f"Reduced batch size to {current_batch_size} due to OOM error."
-                )
-
-    # Free memory
-    del model
-    torch.cuda.empty_cache()
-
-
-def load_model_and_tokenizer(
-    model_name: str,
-    device: torch.device = torch.device("cuda:0"),
-) -> Tuple[
-    Union[EsmModel, ESMC, ESM3, T5Model],
-    Union[EsmTokenizer, T5Tokenizer, None],
-    torch.device,
-]:
-    """
-    Loads the model and assigns it to a specific GPU.
-
-    Args:
-        model_name (str): The model name.
-        device (str): The specific GPU device.
-
-    Returns:
-        Tuple: (model, tokenizer, device)
-    """
-    token = get_hf_token()
-    tokenizer = None
-
-    if "esmc" in model_name.lower():
-        model = ESMC.from_pretrained(model_name)
-        model = model.to(device)
-    elif "esm3-sm-open-v1" in model_name.lower():
-        model = ESM3.from_pretrained("esm3_sm_open_v1")
-        model = model.to(device)
-    elif "prot_t5" in model_name.lower() or "prott5" in model_name.lower():
-        # ProtT5 models
-        full_model_name = (
-            model_name if model_name.startswith("Rostlab/") else f"Rostlab/{model_name}"
-        )
-        model = T5Model.from_pretrained(full_model_name, use_auth_token=token)
-        tokenizer = T5Tokenizer.from_pretrained(
-            full_model_name, use_auth_token=token, do_lower_case=False
-        )
-        model = model.to(device)
-    else:
-        full_model_name = (
-            model_name
-            if model_name.startswith("facebook/")
-            else f"facebook/{model_name}"
-        )
-        model = EsmModel.from_pretrained(full_model_name, use_auth_token=token)
-        tokenizer = EsmTokenizer.from_pretrained(full_model_name, use_auth_token=token)
-        model = model.to(device)
-
-    return model, tokenizer, device
-
-
-def preprocess_sequence_for_prott5(sequence: str) -> str:
-    """
-    Preprocesses a protein sequence for ProtT5 models.
-
-    Args:
-        sequence: Raw protein sequence
-
-    Returns:
-        Preprocessed sequence with spaces between amino acids and rare AAs mapped to X
-    """
-    # Map rare amino acids to X and add spaces between amino acids
-    sequence = re.sub(r"[UZOB]", "X", sequence.upper())
-    return " ".join(list(sequence))
-
-
-def get_batch_embeddings(
-    batch_sequences: list[str],
-    model: Union[
-        EsmModel,
-        ESMC,
-        DataParallel[Module],
-        ESM3,
-        T5Model,
-    ],
-    tokenizer_or_alphabet: Union[EsmTokenizer, T5Tokenizer, None],
-    device: torch.device,
-    pool_embeddings: bool = True,
-) -> list[NDArray[np.float64]]:
-    """
-    Generates mean-pooled embeddings for a batch of sequences.
-    Supports ESM++, ESM-2, ESM-3 and ProtT5 models.
-
-    Args:
-        batch_sequences (list[str]): List of sequence strings.
-        model: Loaded model (could be wrapped in DataParallel).
-        tokenizer_or_alphabet: Tokenizer if needed.
-        device: Inference device (CPU/GPU).
-        pool_embeddings (bool): Whether to average embeddings across the sequence length.
-
-    Returns:
-        List of embeddings as NumPy arrays.
-    """
-    # First, determine the base model type
-    base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
-
-    if isinstance(base_model, ESMC):
-        # For ESMC models
-        embedding_list: List[NDArray[np.float64]] = []
-        with torch.no_grad():
-            for sequence in batch_sequences:
-                protein = ESMProtein(sequence=sequence)
-                # Use the model directly - DataParallel handles internal distribution
-                protein_tensor = base_model.encode(protein)
-                logits_output = base_model.logits(
-                    protein_tensor, LogitsConfig(sequence=True, return_embeddings=True)
-                )
-                if logits_output.embeddings is None:
-                    raise ValueError(
-                        "Model did not return embeddings. Check LogitsConfig settings."
-                    )
-                embeddings = logits_output.embeddings.cpu().numpy()
-                if pool_embeddings:
-                    embeddings = embeddings.mean(axis=1)
-                embedding_list.append(embeddings[0])
-        return embedding_list
-    elif isinstance(base_model, ESM3):
-        # For ESM3 models
-        embedding_list_esm3: List[NDArray[np.float64]] = []
-        with torch.no_grad():
-            for sequence in batch_sequences:
-                protein = ESMProtein(sequence=sequence)
-                sequence_encoding = base_model.encode(protein)
-                result = base_model.forward_and_sample(
-                    sequence_encoding,
-                    SamplingConfig(return_per_residue_embeddings=True),
-                )
-                if result is None or result.per_residue_embedding is None:
-                    raise ValueError("Model did not return embeddings")
-                embeddings = (
-                    result.per_residue_embedding.to(torch.float32).cpu().numpy()
-                )
-                if pool_embeddings:
-                    embeddings = embeddings.mean(axis=0)
-                embedding_list_esm3.append(embeddings)
-        return embedding_list_esm3
-    elif isinstance(base_model, T5Model):
-        # For ProtT5 models
-        assert tokenizer_or_alphabet is not None, "Tokenizer required for ProtT5 models"
-        assert isinstance(
-            tokenizer_or_alphabet, T5Tokenizer
-        ), "T5Tokenizer required for ProtT5 models"
-
-        # Preprocess sequences for ProtT5
-        processed_sequences = [
-            preprocess_sequence_for_prott5(seq) for seq in batch_sequences
-        ]
-
-        inputs = tokenizer_or_alphabet.batch_encode_plus(
-            processed_sequences,
-            add_special_tokens=True,
-            padding="longest",
-            return_tensors="pt",
-        )
-
-        # Move inputs to device
-        input_ids = inputs["input_ids"].to(device)
-        attention_mask = inputs["attention_mask"].to(device)
-
-        with torch.no_grad():
-            # For ProtT5, use encoder embeddings for feature extraction
-            # Create dummy decoder inputs (just the pad token)
-            batch_size = input_ids.shape[0]
-            decoder_input_ids = torch.full(
-                (batch_size, 1),
-                tokenizer_or_alphabet.pad_token_id or 0,
-                dtype=torch.long,
-                device=device,
-            )
-
-            outputs = base_model(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                decoder_input_ids=decoder_input_ids,
-            )
-
-            # Get encoder last hidden state (encoder embeddings)
-            hidden_states = outputs.encoder_last_hidden_state.cpu().numpy()
-
-        if pool_embeddings:
-            # Mean pooling across sequence length, excluding padding tokens
-            prott5_embedding_list: List[NDArray[np.float64]] = []
-            for i, hidden_state in enumerate(hidden_states):
-                # Get actual sequence length (excluding padding)
-                attention_mask_np = attention_mask[i].cpu().numpy()
-                seq_len = attention_mask_np.sum()
-                # Pool only over actual sequence tokens
-                pooled_embedding = hidden_state[:seq_len].mean(axis=0)
-                prott5_embedding_list.append(pooled_embedding)
-            return prott5_embedding_list
-        return list(hidden_states)
-    else:
-        # ESM-2 logic
-        assert tokenizer_or_alphabet is not None, "Tokenizer required for ESM-2 models"
-        assert isinstance(
-            tokenizer_or_alphabet, EsmTokenizer
-        ), "EsmTokenizer required for ESM-2 models"
-        inputs = tokenizer_or_alphabet(
-            batch_sequences, padding=True, truncation=True, return_tensors="pt"
-        ).to(device)
-        with torch.no_grad():
-            outputs = base_model(**inputs, output_hidden_states=True)
-
-        # Get last hidden state for each sequence
-        hidden_states = outputs.last_hidden_state.cpu().numpy()
-
-        if pool_embeddings:
-            # Mean pooling across sequence length
-            return [embedding.mean(axis=0) for embedding in hidden_states]
-        return list(hidden_states)
-
-
-def calculate_single_sequence_embedding_last_hidden_state(
-    sequence: str,
-    device: torch.device = torch.device("cuda:0"),
-    model_name: str = "facebook/esm2_t33_650M_UR50D",
-) -> NDArray[np.float64]:
-    """
-    Calculates an embedding for a single sequence.
-
-    Args:
-        sequence: Input protein sequence
-        model_name: Name of the ESM model to use
-
-    Returns:
-        NDArray[np.float64]: Normalized embedding vector for the sequence
-    """
-    model, tokenizer, device = load_model_and_tokenizer(model_name, device)
-    return get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
-
-
-def calculate_single_sequence_embedding_all_layers(
-    sequence: str,
-    device: torch.device,
-    model_name: str = "facebook/esm2_t33_650M_UR50D",
-) -> NDArray[np.float64]:
-    """
-    Calculates embeddings for a single sequence across all layers.
-
-    Args:
-        sequence: Input protein sequence
-        model_name: Name of the ESM model to use
-
-    Returns:
-        NDArray[np.float64]: A numpy array containing layer embeddings for the sequence.
-    """
-    model, tokenizer, device = load_model_and_tokenizer(model_name, device)
-    return get_single_embedding_all_layers(sequence, model, tokenizer, device)
-
-
-def get_single_embedding_last_hidden_state(
-    sequence: str, model: Any, tokenizer: Any, device: torch.device
-) -> NDArray[np.float64]:
-    """Generate embeddings for a single sequence using the last hidden state.
-
-    Args:
-        sequence (str): The protein sequence to embed
-        model (Any): The transformer model to use
-        tokenizer (Any): The tokenizer for the model
-        device (torch.device): The device to run the model on (CPU/GPU)
-
-    Returns:
-        np.ndarray: Normalized embeddings for each token in the sequence
-    """
-    from esm.models.esmc import ESMC
-
-    with torch.no_grad():
-        if isinstance(model, ESMC):
-            # ESM-3 logic
-            from esm.sdk.api import ESMProtein, LogitsConfig
-
-            protein = ESMProtein(sequence=sequence)
-            protein_tensor = model.encode(protein)
-            logits_output = model.logits(
-                protein_tensor,
-                LogitsConfig(
-                    sequence=True,
-                    return_embeddings=True,
-                    return_hidden_states=True,
-                ),
-            )
-            # Ensure hidden_states is not None before accessing it
-            if logits_output.hidden_states is None:
-                raise ValueError(
-                    "Model did not return hidden states. Check LogitsConfig settings."
-                )
-
-            embedding = (
-                logits_output.hidden_states[-1][0].to(torch.float32).cpu().numpy()
-            )
-        elif isinstance(model, T5Model):
-            # ProtT5 logic
-            processed_sequence = preprocess_sequence_for_prott5(sequence)
-            inputs = tokenizer.encode_plus(
-                processed_sequence, add_special_tokens=True, return_tensors="pt"
-            )
-
-            input_ids = inputs["input_ids"].to(device)
-            attention_mask = inputs["attention_mask"].to(device)
-
-            # Create dummy decoder inputs
-            decoder_input_ids = torch.full(
-                (1, 1), tokenizer.pad_token_id or 0, dtype=torch.long, device=device
-            )
-
-            outputs = model(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                decoder_input_ids=decoder_input_ids,
-            )
-
-            # Get encoder last hidden state including special tokens
-            embedding = outputs.encoder_last_hidden_state[0].detach().cpu().numpy()
-        else:
-            # ESM-2 logic
-            inputs = tokenizer(sequence, return_tensors="pt").to(device)
-            outputs = model(**inputs)
-            embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy()
-
-    # Ensure embedding is a numpy array with proper dtype and normalize it
-    embedding = np.asarray(embedding, dtype=np.float64)
-    norm = np.linalg.norm(embedding, axis=1, keepdims=True)
-    norm[norm == 0] = 1.0  # Handle zero norm case
-    normalized_embedding = embedding / norm
-    return np.asarray(normalized_embedding, dtype=np.float64)
-
-
-def get_single_embedding_all_layers(
-    sequence: str, model: Any, tokenizer: Any, device: torch.device
-) -> NDArray[np.float64]:
-    """
-    Generates normalized embeddings for each token in the sequence across all layers.
-
-    For ESM-3 (ESMC) models, it assumes that passing
-    LogitsConfig(return_hidden_states=True) returns a collection of layer embeddings.
-    For ESM-2 models, it sets output_hidden_states=True.
-    For ProtT5 models, it gets encoder hidden states.
-
-    Args:
-        sequence (str): The protein sequence to embed.
-        model (Any): The transformer model to use.
-        tokenizer (Any): The tokenizer for the model (None for ESMC).
-        device (torch.device): The device to run the model on (CPU/GPU).
-
-    Returns:
-        NDArray[np.float64]: A numpy array containing the normalized token embeddings
-        concatenated across all layers.
-    """
-    embeddings_list: List[NDArray[np.float64]] = []
-    with torch.no_grad():
-        if isinstance(model, ESMC):
-            # For ESM-3: Use ESMProtein and request hidden states via LogitsConfig
-            protein = ESMProtein(sequence=sequence)
-            protein_tensor = model.encode(protein)
-            logits_output = model.logits(
-                protein_tensor,
-                LogitsConfig(
-                    sequence=True,
-                    return_embeddings=True,
-                    return_hidden_states=True,
-                ),
-            )
-            # Ensure hidden_states is not None before iterating
-            if logits_output.hidden_states is None:
-                raise ValueError(
-                    "Model did not return hidden states. Check if return_hidden_states=True is supported."
-                )
-
-            # logits_output.hidden_states should be a tuple of tensors: (layer, batch, seq_len, hidden_dim)
-            for layer_tensor in logits_output.hidden_states:
-                # Remove batch dimension and (if applicable) any special tokens
-                emb = layer_tensor[0].to(torch.float32).cpu().numpy()
-                # If your model adds special tokens, adjust the slicing (e.g., emb[1:-1])
-                emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
-                embeddings_list.append(emb)
-
-        elif isinstance(model, T5Model):
-            # For ProtT5: Get encoder hidden states
-            processed_sequence = preprocess_sequence_for_prott5(sequence)
-            inputs = tokenizer.encode_plus(
-                processed_sequence, add_special_tokens=True, return_tensors="pt"
-            )
-
-            input_ids = inputs["input_ids"].to(device)
-            attention_mask = inputs["attention_mask"].to(device)
-
-            # Create dummy decoder inputs
-            decoder_input_ids = torch.full(
-                (1, 1), tokenizer.pad_token_id or 0, dtype=torch.long, device=device
-            )
-
-            outputs = model(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                decoder_input_ids=decoder_input_ids,
-                output_hidden_states=True,
-            )
-
-            # Get all encoder hidden states
-            encoder_hidden_states = outputs.encoder_hidden_states
-            for layer_tensor in encoder_hidden_states:
-                # Remove batch dimension but keep special tokens
-                emb = layer_tensor[0].detach().cpu().numpy()
-                emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
-                embeddings_list.append(emb)
-
-        else:
-            # For ESM-2: Get hidden states with output_hidden_states=True
-            inputs = tokenizer(sequence, return_tensors="pt").to(device)
-            outputs = model(**inputs, output_hidden_states=True)
-            hidden_states = (
-                outputs.hidden_states
-            )  # Tuple: (layer0, layer1, ..., layerN)
-            for layer_tensor in hidden_states:
-                # Remove batch dimension and special tokens ([CLS] and [SEP])
-                emb = layer_tensor[0, 1:-1, :].detach().cpu().numpy()
-                emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
-                embeddings_list.append(emb)
-
-    return np.array(embeddings_list)
-
-
-def calculate_single_sequence_embedding_first_layer(
-    sequence: str,
-    model_name: str = "facebook/esm2_t33_650M_UR50D",
-    device: torch.device = torch.device("cuda:0"),
-) -> NDArray[np.float64]:
-    """
-    Calculates an embedding for a single sequence using the first layer.
-    """
-    model, tokenizer, device = load_model_and_tokenizer(model_name, device)
-    return get_single_embedding_first_layer(sequence, model, tokenizer, device)
-
-
-def get_single_embedding_first_layer(
-    sequence: str, model: Any, tokenizer: Any, device: torch.device
-) -> NDArray[np.float64]:
-    """
-    Generates normalized embeddings for each token in the sequence using the first layer.
-    """
-    embedding: NDArray[np.float64]
-
-    with torch.no_grad():
-        if isinstance(model, ESMC):
-            # ESM-3 logic
-            from esm.sdk.api import ESMProtein, LogitsConfig
-
-            protein = ESMProtein(sequence=sequence)
-            protein_tensor = model.encode(protein)
-            logits_output = model.logits(
-                protein_tensor,
-                LogitsConfig(
-                    sequence=True,
-                    return_embeddings=True,
-                    return_hidden_states=True,
-                ),
-            )
-            if logits_output.hidden_states is None:
-                raise ValueError(
-                    "Model did not return hidden states. Check LogitsConfig settings."
-                )
-            embedding = (
-                logits_output.hidden_states[0][0].to(torch.float32).cpu().numpy()
-            )
-
-        elif isinstance(model, ESM3):
-            # ESM-3 logic
-            from esm.sdk.api import ESMProtein, SamplingConfig
-
-            protein = ESMProtein(sequence=sequence)
-            protein_tensor = model.encode(protein)
-            result = model.forward_and_sample(
-                protein_tensor,
-                SamplingConfig(return_per_residue_embeddings=True),
-            )
-            if result is None or result.per_residue_embedding is None:
-                raise ValueError("Model did not return embeddings")
-            embedding = result.per_residue_embedding.to(torch.float32).cpu().numpy()
-
-        elif isinstance(model, T5Model):
-            # ProtT5 logic - get first layer embedding
-            processed_sequence = preprocess_sequence_for_prott5(sequence)
-            inputs = tokenizer.encode_plus(
-                processed_sequence, add_special_tokens=True, return_tensors="pt"
-            )
-
-            input_ids = inputs["input_ids"].to(device)
-            attention_mask = inputs["attention_mask"].to(device)
-
-            # Create dummy decoder inputs
-            decoder_input_ids = torch.full(
-                (1, 1), tokenizer.pad_token_id or 0, dtype=torch.long, device=device
-            )
-
-            outputs = model(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                decoder_input_ids=decoder_input_ids,
-                output_hidden_states=True,
-            )
-
-            # Get first encoder hidden state including special tokens
-            embedding = outputs.encoder_hidden_states[0][0].detach().cpu().numpy()
-
-        else:
-            # ESM-2 logic
-            inputs = tokenizer(sequence, return_tensors="pt").to(device)
-            outputs = model(**inputs, output_hidden_states=True)
-            # Get the first layer's hidden states for all residues (excluding special tokens)
-            embedding = outputs.hidden_states[0][0, 1:-1, :].detach().cpu().numpy()
-
-    # Ensure embedding is a numpy array with proper dtype and normalize it
-    embedding = np.asarray(embedding, dtype=np.float64)
-    norm = np.linalg.norm(embedding, axis=1, keepdims=True)
-    norm[norm == 0] = 1.0  # Handle zero norm case
-    normalized_embedding = embedding / norm
-    return np.asarray(normalized_embedding, dtype=np.float64)
-
-
-def free_memory() -> None:
-    """
-    Frees up memory by invoking garbage collection and clearing GPU caches.
-    """
-    gc.collect()
-    if torch.backends.mps.is_available():
-        torch.mps.empty_cache()
-    elif torch.cuda.is_available():
-        torch.cuda.empty_cache()
-
-
-def update_protein_embeddings_in_db(
-    db: DatabaseConnector,
-    accessions: list[str],
-    embeddings_batch: list[NDArray[np.float64]],
-) -> None:
-    """
-    Updates the embeddings for a batch of proteins in the database.
-
-    Args:
-        db (DatabaseConnector): The database connector.
-        accessions (list[str]): The accessions of the proteins to update.
-        embeddings_batch (list[NDArray[np.float64]]): The embeddings to update.
-    """
-    # Prepare the data for batch update
-    updates = [
-        {"accession": acc, "embedding": emb.tolist()}
-        for acc, emb in zip(accessions, embeddings_batch)
-    ]
-
-    # Cypher query for batch update
-    query = """
-    UNWIND $updates AS update
-    MATCH (p:Protein {accession_id: update.accession})
-    SET p.embedding = update.embedding
-    """
-
-    # Execute the update query with parameters
-    db.execute_write(query, {"updates": updates})
diff --git a/src/pyeed/embeddings/models/esm2.py b/src/pyeed/embeddings/models/esm2.py
index 2da08b66..fca5e4b2 100644
--- a/src/pyeed/embeddings/models/esm2.py
+++ b/src/pyeed/embeddings/models/esm2.py
@@ -55,20 +55,24 @@ def get_batch_embeddings(
         model = cast(EsmModel, self.model)
         tokenizer = cast(EsmTokenizer, self.tokenizer)
 
-        inputs = tokenizer(
-            sequences, padding=True, truncation=True, return_tensors="pt"
-        ).to(self.device)
+        embeddings = []
+        for sequence in sequences:
+            inputs = tokenizer(
+                sequence, padding=True, truncation=True, return_tensors="pt"
+            ).to(self.device)
 
-        with torch.no_grad():
-            outputs = model(**inputs, output_hidden_states=True)
+            with torch.no_grad():
+                outputs = model(**inputs, output_hidden_states=True)
 
-        # Get last hidden state for each sequence
-        hidden_states = outputs.last_hidden_state.cpu().numpy()
+            # Get last hidden state for each sequence
+            hidden_states = outputs.last_hidden_state.cpu().numpy()
 
-        if pool_embeddings:
-            # Mean pooling across sequence length
-            return [embedding.mean(axis=0) for embedding in hidden_states]
-        return list(hidden_states)
+            if pool_embeddings:
+                # Mean pooling across sequence length
+                embeddings.append(hidden_states.mean(axis=0))
+            else:
+                embeddings.append(hidden_states)
+        return embeddings
 
     def get_single_embedding_last_hidden_state(
         self, sequence: str

From b23831993c6de6337a2ee917f833e617d16af4e3 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Wed, 4 Jun 2025 07:41:32 +0000
Subject: [PATCH 10/11] fixes in esm2

---
 src/pyeed/embeddings/models/esm2.py | 6 ++++--
 src/pyeed/embeddings/processor.py   | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/pyeed/embeddings/models/esm2.py b/src/pyeed/embeddings/models/esm2.py
index fca5e4b2..04da50ce 100644
--- a/src/pyeed/embeddings/models/esm2.py
+++ b/src/pyeed/embeddings/models/esm2.py
@@ -7,6 +7,7 @@
 import numpy as np
 import torch
 from numpy.typing import NDArray
+from loguru import logger
 from transformers import EsmModel, EsmTokenizer
 
 from ..base import BaseEmbeddingModel, normalize_embedding
@@ -56,6 +57,7 @@ def get_batch_embeddings(
         tokenizer = cast(EsmTokenizer, self.tokenizer)
 
         embeddings = []
+
         for sequence in sequences:
             inputs = tokenizer(
                 sequence, padding=True, truncation=True, return_tensors="pt"
@@ -68,8 +70,8 @@ def get_batch_embeddings(
             hidden_states = outputs.last_hidden_state.cpu().numpy()
 
             if pool_embeddings:
-                # Mean pooling across sequence length
-                embeddings.append(hidden_states.mean(axis=0))
+                # Mean pooling across sequence length (axis=1)
+                embeddings.append(hidden_states.mean(axis=1)[0])
             else:
                 embeddings.append(hidden_states)
         return embeddings
diff --git a/src/pyeed/embeddings/processor.py b/src/pyeed/embeddings/processor.py
index ab376fea..693f3838 100644
--- a/src/pyeed/embeddings/processor.py
+++ b/src/pyeed/embeddings/processor.py
@@ -194,6 +194,7 @@ def _process_batch_single_device(
 
             accessions, sequences = zip(*batch)
             current_batch_size = len(sequences)
+            logger.info(f"Processing {len(sequences)} sequences")
 
             while current_batch_size > 0:
                 try:

From cfd284d9fdd7a39e1e3d7819f0f738c02b963470 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Wed, 4 Jun 2025 07:45:12 +0000
Subject: [PATCH 11/11] update ruff and mypy

---
 pyproject.toml                      | 1 +
 src/pyeed/embeddings/models/esm2.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index bf00381f..9e77bcce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,7 @@ pysam = "0.23.0"
 types-requests = "2.32.0.20250328"
 ipywidgets = "^8.1.7"
 sentencepiece = "^0.2.0"
+umap = "^0.1.1"
 
 [tool.poetry.group.dev.dependencies]
 mkdocstrings = {extras = ["python"], version = "^0.26.2"}
diff --git a/src/pyeed/embeddings/models/esm2.py b/src/pyeed/embeddings/models/esm2.py
index 04da50ce..0db0b25a 100644
--- a/src/pyeed/embeddings/models/esm2.py
+++ b/src/pyeed/embeddings/models/esm2.py
@@ -7,7 +7,6 @@
 import numpy as np
 import torch
 from numpy.typing import NDArray
-from loguru import logger
 from transformers import EsmModel, EsmTokenizer
 
 from ..base import BaseEmbeddingModel, normalize_embedding