From 7deeca700d2fe02f13d1c8ebc078b9126852d751 Mon Sep 17 00:00:00 2001 From: kraysent Date: Thu, 13 Nov 2025 14:35:22 +0000 Subject: [PATCH 1/2] #325: use table_name everywhere --- app/data/model/table.py | 2 +- app/data/repositories/layer0/modifiers.py | 27 ++++++++++++--- app/data/repositories/layer0/records.py | 8 ++++- app/data/repositories/layer0/repository.py | 24 +++++--------- app/data/repositories/layer0/tables.py | 33 ++++--------------- app/domain/adminapi/table_upload.py | 10 +++--- app/domain/unification/marking.py | 2 -- app/presentation/adminapi/interface.py | 2 +- app/tasks/crossmatch.py | 4 --- .../layer0_tables_repository_test.py | 4 +-- tests/integration/layer1_repository_test.py | 4 +-- tests/integration/layer2_import_test.py | 8 ++--- tests/integration/rawdata_table_test.py | 18 +++++----- tests/unit/data/layer0_repository_test.py | 3 +- tests/unit/domain/table_upload_test.py | 6 ++-- 15 files changed, 73 insertions(+), 82 deletions(-) diff --git a/app/data/model/table.py b/app/data/model/table.py index 8378ceb6..5f8dcee8 100644 --- a/app/data/model/table.py +++ b/app/data/model/table.py @@ -10,7 +10,7 @@ @dataclass class Layer0RawData: - table_id: int + table_name: str data: pandas.DataFrame diff --git a/app/data/repositories/layer0/modifiers.py b/app/data/repositories/layer0/modifiers.py index a63c515c..f7c2f1d5 100644 --- a/app/data/repositories/layer0/modifiers.py +++ b/app/data/repositories/layer0/modifiers.py @@ -1,11 +1,18 @@ import json -from app.data import model +from app.data import model, template from app.lib.storage import postgres +from app.lib.web.errors import DatabaseError class Layer0ModifiersRepository(postgres.TransactionalPGRepository): - def get_modifiers(self, table_id: int) -> list[model.Modifier]: + def get_modifiers(self, table_name: str) -> list[model.Modifier]: + table_id_row = self._storage.query_one(template.FETCH_RAWDATA_REGISTRY, params=[table_name]) + if table_id_row is None: + raise DatabaseError(f"unable to fetch table with name {table_name}") + + table_id = table_id_row["id"] + rows = self._storage.query( """ SELECT column_name, modifier_name, params @@ -21,7 +28,13 @@ def get_modifiers(self, table_id: int) -> list[model.Modifier]: for row in rows ] - def add_modifiers(self, table_id: int, modifiers: list[model.Modifier]) -> None: + def add_modifiers(self, table_name: str, modifiers: list[model.Modifier]) -> None: + table_id_row = self._storage.query_one(template.FETCH_RAWDATA_REGISTRY, params=[table_name]) + if table_id_row is None: + raise DatabaseError(f"unable to fetch table with name {table_name}") + + table_id = table_id_row["id"] + query = """ INSERT INTO layer0.column_modifiers (table_id, column_name, modifier_name, params, sequence) VALUES""" @@ -29,7 +42,13 @@ def add_modifiers(self, table_id: int, modifiers: list[model.Modifier]) -> None: values = [] for sequence, modifier in enumerate(modifiers): params.extend( - [table_id, modifier.column_name, modifier.modifier_name, json.dumps(modifier.params), sequence] + [ + table_id, + modifier.column_name, + modifier.modifier_name, + json.dumps(modifier.params), + sequence, + ] ) values.append("(%s, %s, %s, %s, %s)") diff --git a/app/data/repositories/layer0/records.py b/app/data/repositories/layer0/records.py index 31de53e5..2a1079ae 100644 --- a/app/data/repositories/layer0/records.py +++ b/app/data/repositories/layer0/records.py @@ -8,10 +8,16 @@ class Layer0RecordRepository(postgres.TransactionalPGRepository): - def register_records(self, table_id: int, record_ids: list[str]) -> None: + def register_records(self, table_name: str, record_ids: list[str]) -> None: if len(record_ids) == 0: raise RuntimeError("no records to upsert") + table_id_row = self._storage.query_one(template.FETCH_RAWDATA_REGISTRY, params=[table_name]) + if table_id_row is None: + raise DatabaseError(f"unable to fetch table with name {table_name}") + + table_id = table_id_row["id"] + query = "INSERT INTO layer0.records (id, table_id) VALUES " params = [] values = [] diff --git a/app/data/repositories/layer0/repository.py b/app/data/repositories/layer0/repository.py index 9c6b3c00..75ee10be 100644 --- a/app/data/repositories/layer0/repository.py +++ b/app/data/repositories/layer0/repository.py @@ -35,17 +35,17 @@ def fetch_table( def fetch_raw_data( self, - table_id: int, + table_name: str, offset: str | None = None, columns: list[str] | None = None, order_column: str | None = None, order_direction: str = "asc", limit: int | None = None, ) -> model.Layer0RawData: - return self.table_repo.fetch_raw_data(table_id, offset, columns, order_column, order_direction, limit) + return self.table_repo.fetch_raw_data(table_name, offset, columns, order_column, order_direction, limit) - def fetch_metadata(self, table_id: int) -> model.Layer0TableMeta: - return self.table_repo.fetch_metadata(table_id) + def fetch_metadata(self, table_name: str) -> model.Layer0TableMeta: + return self.table_repo.fetch_metadata(table_name) def fetch_metadata_by_name(self, table_name: str) -> model.Layer0TableMeta: return self.table_repo.fetch_metadata_by_name(table_name) @@ -53,8 +53,8 @@ def fetch_metadata_by_name(self, table_name: str) -> model.Layer0TableMeta: def update_column_metadata(self, table_name: str, column_description: model.ColumnDescription) -> None: return self.table_repo.update_column_metadata(table_name, column_description) - def register_records(self, table_id: int, record_ids: list[str]) -> None: - return self.records_repo.register_records(table_id, record_ids) + def register_records(self, table_name: str, record_ids: list[str]) -> None: + return self.records_repo.register_records(table_name, record_ids) def get_table_statistics(self, table_name: str) -> model.TableStatistics: return self.records_repo.get_table_statistics(table_name) @@ -88,15 +88,7 @@ def add_homogenization_params(self, params: list[model.HomogenizationParams]) -> return self.homogenization_repo.add_homogenization_params(params) def get_modifiers(self, table_name: str) -> list[model.Modifier]: - meta = self.fetch_metadata_by_name(table_name) - if meta.table_id is None: - raise RuntimeError(f"{table_name} has no table_id") - - return self.modifier_repo.get_modifiers(meta.table_id) + return self.modifier_repo.get_modifiers(table_name) def add_modifier(self, table_name: str, modifiers: list[model.Modifier]) -> None: - meta = self.fetch_metadata_by_name(table_name) - if meta.table_id is None: - raise RuntimeError(f"{table_name} has no table_id") - - return self.modifier_repo.add_modifiers(meta.table_id, modifiers) + return self.modifier_repo.add_modifiers(table_name, modifiers) diff --git a/app/data/repositories/layer0/tables.py b/app/data/repositories/layer0/tables.py index 5e53ce25..016d7e32 100644 --- a/app/data/repositories/layer0/tables.py +++ b/app/data/repositories/layer0/tables.py @@ -72,15 +72,9 @@ def insert_raw_data(self, data: model.Layer0RawData) -> None: """ if len(data.data) == 0: - log.warn("trying to insert 0 rows into the table", table_id=data.table_id) + log.warn("trying to insert 0 rows into the table", table_name=data.table_name) return - row = self._storage.query_one(template.GET_RAWDATA_TABLE, params=[data.table_id]) - table_name = row.get("table_name") - - if table_name is None: - raise DatabaseError(f"unable to fetch table with id {data.table_id}") - fields = data.data.columns values = [] @@ -99,7 +93,7 @@ def insert_raw_data(self, data: model.Layer0RawData) -> None: fields = [f'"{field}"' for field in fields] query = f""" - INSERT INTO rawdata."{table_name}" ({",".join(fields)}) + INSERT INTO rawdata."{data.table_name}" ({",".join(fields)}) VALUES {",".join(values)} ON CONFLICT DO NOTHING """ @@ -168,7 +162,7 @@ def fetch_table( def fetch_raw_data( self, - table_id: int, + table_name: str, offset: str | None = None, columns: list[str] | None = None, order_column: str | None = None, @@ -176,7 +170,7 @@ def fetch_raw_data( limit: int | None = None, ) -> model.Layer0RawData: """ - :param table_id: ID of the raw table + :param table_name: Name of the raw table :param columns: select only given columns :param order_column: orders result by a provided column :param order_direction: if `order_column` is specified, sets order direction. Either `asc` or `desc`. @@ -184,12 +178,6 @@ def fetch_raw_data( :param limit: allows to retrieve no more than `limit` rows :return: Layer0RawData """ - row = self._storage.query_one(template.GET_RAWDATA_TABLE, params=[table_id]) - table_name = row.get("table_name") - - if table_name is None: - raise DatabaseError(f"unable to fetch table with id {table_id}") - columns_str = ",".join(columns or ["*"]) params = [] @@ -209,17 +197,10 @@ def fetch_raw_data( params.append(limit) rows = self._storage.query(query, params=params) - return model.Layer0RawData(table_id, pandas.DataFrame(rows)) - - def fetch_metadata(self, table_id: int) -> model.Layer0TableMeta: - row = self._storage.query_one(template.GET_RAWDATA_TABLE, params=[table_id]) - table_name = row.get("table_name") - modification_dt: datetime.datetime | None = row.get("modification_dt") + return model.Layer0RawData(table_name, pandas.DataFrame(rows)) - if table_name is None: - raise DatabaseError(f"unable to fetch table with id {table_id}") - - return self._fetch_metadata_by_name(table_name, modification_dt) + def fetch_metadata(self, table_name: str) -> model.Layer0TableMeta: + return self.fetch_metadata_by_name(table_name) def fetch_metadata_by_name(self, table_name: str) -> model.Layer0TableMeta: row = self._storage.query_one(template.FETCH_RAWDATA_REGISTRY, params=[table_name]) diff --git a/app/domain/adminapi/table_upload.py b/app/domain/adminapi/table_upload.py index 8c19b009..ec33b2b5 100644 --- a/app/domain/adminapi/table_upload.py +++ b/app/domain/adminapi/table_upload.py @@ -79,7 +79,7 @@ def patch_table(self, r: adminapi.PatchTableRequest) -> adminapi.PatchTableRespo def add_data(self, r: adminapi.AddDataRequest) -> adminapi.AddDataResponse: data_df = pandas.DataFrame.from_records(r.data) - data_df[repositories.INTERNAL_ID_COLUMN_NAME] = data_df.apply(_get_hash_func(r.table_id), axis=1) + data_df[repositories.INTERNAL_ID_COLUMN_NAME] = data_df.apply(_get_hash_func(r.table_name), axis=1) data_df = data_df.drop_duplicates(subset=repositories.INTERNAL_ID_COLUMN_NAME, keep="last") with self.layer0_repo.with_tx(): @@ -87,13 +87,13 @@ def add_data(self, r: adminapi.AddDataRequest) -> adminapi.AddDataResponse: errgr.run( self.layer0_repo.insert_raw_data, model.Layer0RawData( - table_id=r.table_id, + table_name=r.table_name, data=data_df, ), ) errgr.run( self.layer0_repo.register_records, - r.table_id, + r.table_name, record_ids=data_df[repositories.INTERNAL_ID_COLUMN_NAME].tolist(), ) @@ -242,7 +242,7 @@ def _column_description_to_presentation(columns: list[model.ColumnDescription]) return res -def _get_hash_func(table_id: int) -> Callable[[pandas.Series], str]: +def _get_hash_func(table_name: str) -> Callable[[pandas.Series], str]: def _compute_hash(row: pandas.Series) -> str: """ This function applies special algorithm to an iterable to compute stable hash. @@ -256,7 +256,7 @@ def _compute_hash(row: pandas.Series) -> str: data = sorted(data, key=lambda t: t[0]) data_string = json.dumps(data, separators=(",", ":")) - return _hashfunc(f"{table_id}_{data_string}") + return _hashfunc(f"{table_name}_{data_string}") return _compute_hash diff --git a/app/domain/unification/marking.py b/app/domain/unification/marking.py index 2efecba1..320864df 100644 --- a/app/domain/unification/marking.py +++ b/app/domain/unification/marking.py @@ -61,8 +61,6 @@ def mark_objects( ignore_homogenization_errors: bool = True, ) -> None: meta = layer0_repo.fetch_metadata_by_name(table_name) - if meta.table_id is None: - raise RuntimeError(f"Table {table_name} has no table_id") h = get_homogenization(layer0_repo, meta, ignore_errors=ignore_homogenization_errors) modificator = get_modificator(layer0_repo, meta.table_name) diff --git a/app/presentation/adminapi/interface.py b/app/presentation/adminapi/interface.py index bd3cfd83..86a890c5 100644 --- a/app/presentation/adminapi/interface.py +++ b/app/presentation/adminapi/interface.py @@ -93,7 +93,7 @@ class CreateTableResponse(pydantic.BaseModel): class AddDataRequest(pydantic.BaseModel): - table_id: int + table_name: str data: list[dict[str, Any]] = pydantic.Field( description="""Actual data to append. Keys in this dictionary must be a subset of the columns in the table. If not specified, column will be set to NULL. diff --git a/app/tasks/crossmatch.py b/app/tasks/crossmatch.py index 4a3e3fb4..fbf4ee83 100644 --- a/app/tasks/crossmatch.py +++ b/app/tasks/crossmatch.py @@ -72,10 +72,6 @@ def prepare(self, config: interface.Config): self.solver = crossmatch.create_solver(test_solver_config, solvers) def run(self): - table_meta = self.layer0_repo.fetch_metadata_by_name(self.table_name) - if table_meta.table_id is None: - raise RuntimeError(f"Table {self.table_name} has no table_id") - ctx = {"table_name": self.table_name} offset = None diff --git a/tests/integration/layer0_tables_repository_test.py b/tests/integration/layer0_tables_repository_test.py index 32ae49a2..a121cae8 100644 --- a/tests/integration/layer0_tables_repository_test.py +++ b/tests/integration/layer0_tables_repository_test.py @@ -32,9 +32,9 @@ def test_write_and_fetch_table(self): self.bib_id, ) - table_resp = self.layer0_repo.create_table(table_meta) + _ = self.layer0_repo.create_table(table_meta) test_data = pd.DataFrame({"ra": [12.1, 11.1], "dec": [1.0, 2.0]}) - raw_data = model.Layer0RawData(table_resp.table_id, test_data) + raw_data = model.Layer0RawData(table_meta.table_name, test_data) self.layer0_repo.insert_raw_data(raw_data) diff --git a/tests/integration/layer1_repository_test.py b/tests/integration/layer1_repository_test.py index 68d5b5f7..ed962cec 100644 --- a/tests/integration/layer1_repository_test.py +++ b/tests/integration/layer1_repository_test.py @@ -27,7 +27,7 @@ def test_icrs(self): ] bib_id = self.common_repo.create_bibliography("123456", 2000, ["test"], "test") - table_resp = self.layer0_repo.create_table( + _ = self.layer0_repo.create_table( model.Layer0TableMeta( "test_table", [ @@ -40,7 +40,7 @@ def test_icrs(self): enums.DataType.REGULAR, ) ) - self.layer0_repo.register_records(table_resp.table_id, ["111", "112"]) + self.layer0_repo.register_records("test_table", ["111", "112"]) self.layer1_repo.save_data(objects) result = self.pg_storage.storage.query("SELECT ra FROM icrs.data ORDER BY ra") diff --git a/tests/integration/layer2_import_test.py b/tests/integration/layer2_import_test.py index 42b948e8..055fc8bd 100644 --- a/tests/integration/layer2_import_test.py +++ b/tests/integration/layer2_import_test.py @@ -36,9 +36,9 @@ def _get_table(self, table_name: str) -> int: return table_resp.table_id def test_import_two_catalogs(self): - table_id = self._get_table("test_import_two_catalogs") + _ = self._get_table("test_import_two_catalogs") self.layer0_repo.register_records( - table_id, + "test_import_two_catalogs", ["123", "124"], ) @@ -80,9 +80,9 @@ def test_import_two_catalogs(self): def test_updated_objects(self): self.test_import_two_catalogs() - table_id = self._get_table("test_updated_objects") + _ = self._get_table("test_updated_objects") self.layer0_repo.register_records( - table_id, + "test_updated_objects", ["125", "126"], ) self.layer0_repo.upsert_pgc({"125": 1234, "126": 1234}) diff --git a/tests/integration/rawdata_table_test.py b/tests/integration/rawdata_table_test.py index ebd41cea..0b54cc58 100644 --- a/tests/integration/rawdata_table_test.py +++ b/tests/integration/rawdata_table_test.py @@ -63,7 +63,7 @@ def test_create_table_happy_case(self): self.manager.add_data( presentation.AddDataRequest( - table_id=table_resp.id, + table_name="test_table", data=[ {"ra": 5.5, "dec": 88}, {"ra": 5.0, "dec": -50}, @@ -111,7 +111,7 @@ def test_create_table_with_nulls(self): self.manager.add_data( presentation.AddDataRequest( - table_id=table_resp.id, + table_name="test_table", data=[{"ra": 5.5}, {"ra": 5.0}], ), ) @@ -193,7 +193,7 @@ def test_add_data_to_unknown_column(self): with self.assertRaises(psycopg.errors.UndefinedColumn): self.manager.add_data( presentation.AddDataRequest( - table_id=table_resp.id, + table_name="test_table", data=[{"totally_nonexistent_column": 5.5}], ), ) @@ -201,7 +201,7 @@ def test_add_data_to_unknown_column(self): def test_fetch_raw_table(self): data = DataFrame({"col0": [1, 2, 3, 4], "col1": ["ad", "ad", "a", "he"]}) bib_id = self.manager.common_repo.create_bibliography("2024arXiv240411942F", 1999, ["ade"], "title") - table_resp = self.manager.layer0_repo.create_table( + _ = self.manager.layer0_repo.create_table( model.Layer0TableMeta( "test_table", [model.ColumnDescription("col0", TYPE_INTEGER), model.ColumnDescription("col1", TYPE_TEXT)], @@ -209,12 +209,12 @@ def test_fetch_raw_table(self): enums.DataType.REGULAR, ), ) - self.manager.layer0_repo.insert_raw_data(model.Layer0RawData(table_resp.table_id, data)) - expected = self.manager.layer0_repo.fetch_raw_data(table_resp.table_id) + self.manager.layer0_repo.insert_raw_data(model.Layer0RawData("test_table", data)) + expected = self.manager.layer0_repo.fetch_raw_data("test_table") self.assertTrue(expected.data.equals(data)) - expected = self.manager.layer0_repo.fetch_raw_data(table_resp.table_id, columns=["col1"]) + expected = self.manager.layer0_repo.fetch_raw_data("test_table", columns=["col1"]) self.assertTrue(expected.data.equals(data.drop(["col0"], axis=1))) def test_fetch_metadata(self): @@ -226,9 +226,9 @@ def test_fetch_metadata(self): bib_id, enums.DataType.REGULAR, ) - table_resp = self.manager.layer0_repo.create_table(expected) + _ = self.manager.layer0_repo.create_table(expected) - actual = self.manager.layer0_repo.fetch_metadata(table_resp.table_id) + actual = self.manager.layer0_repo.fetch_metadata("test_table") self.assertEqual(expected.table_name, actual.table_name) self.assertEqual(expected.column_descriptions, actual.column_descriptions) diff --git a/tests/unit/data/layer0_repository_test.py b/tests/unit/data/layer0_repository_test.py index 7b561cba..cd1e8f8c 100644 --- a/tests/unit/data/layer0_repository_test.py +++ b/tests/unit/data/layer0_repository_test.py @@ -43,10 +43,9 @@ def setUp(self) -> None: ] ) def test_fetch_raw_data(self, name: str, kwargs: dict, expected_query: str): - lib.returns(self.storage_mock.query_one, {"table_name": "ironman"}) lib.returns(self.storage_mock.query, {"haha": [1, 2]}) - _ = self.repo.fetch_raw_data(10, **kwargs) + _ = self.repo.fetch_raw_data("ironman", **kwargs) args, _ = self.storage_mock.query.call_args def transform(s): diff --git a/tests/unit/domain/table_upload_test.py b/tests/unit/domain/table_upload_test.py index ad565dcc..27f6b32e 100644 --- a/tests/unit/domain/table_upload_test.py +++ b/tests/unit/domain/table_upload_test.py @@ -25,7 +25,7 @@ def setUp(self): def test_add_data(self): request = presentation.AddDataRequest( - table_id=42, + table_name="test_table", data=[ { "test": "row", @@ -46,12 +46,12 @@ def test_add_data(self): self.assertListEqual(list(request.args[0].data["number"]), [41, 43]) self.assertListEqual( list(request.args[0].data["hyperleda_internal_id"]), - ["e75d9505-36d1-26c6-23e9-54f663ce35a2", "27fb0c18-b72b-3457-0bff-56b40182638a"], + ["1b4bbb6e-27d8-f7b8-2a5e-3a37b1c3248e", "a62b5fd9-9b6a-964c-406d-3fa4fc3471d7"], ) def test_add_data_identical_rows(self): request = presentation.AddDataRequest( - table_id=42, + table_name="test_table", data=[ { "test": "row", From 84fef65c8ee506c370090eaf409ba6d53ccf0536 Mon Sep 17 00:00:00 2001 From: kraysent Date: Thu, 13 Nov 2025 14:39:57 +0000 Subject: [PATCH 2/2] fix regtests --- tests/regression/upload_simple_table.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/regression/upload_simple_table.py b/tests/regression/upload_simple_table.py index f2a9e258..6e0aa8d9 100644 --- a/tests/regression/upload_simple_table.py +++ b/tests/regression/upload_simple_table.py @@ -70,7 +70,7 @@ def create_bibliography(session: requests.Session) -> str: @lib.test_logging_decorator -def create_table(session: requests.Session, bib_id: str) -> tuple[int, str]: +def create_table(session: requests.Session, bib_id: str) -> str: table_name = f"test_{str(uuid.uuid4())[:8]}" request_data = adminapi.CreateTableRequest( @@ -106,15 +106,14 @@ def create_table(session: requests.Session, bib_id: str) -> tuple[int, str]: response = session.post("/v1/table", json=request_data.model_dump(mode="json")) response.raise_for_status() - table_id = response.json()["data"]["id"] - return table_id, table_name + return table_name @lib.test_logging_decorator def upload_data( session: requests.Session, - table_id: int, + table_name: str, objects_num: int, ra_center: float, dec_center: float, @@ -132,7 +131,7 @@ def upload_data( df = pandas.DataFrame.from_records(synthetic_data) request_data = adminapi.AddDataRequest( - table_id=table_id, + table_name=table_name, data=df.to_dict("records"), # type: ignore ) @@ -357,10 +356,10 @@ def run(): code = create_bibliography(adminapi) # ---- Create table with all `new` objects and upload it to layer 2 ---- - table_id, table_name = create_table(adminapi, code) + table_name = create_table(adminapi, code) upload_data( adminapi, - table_id, + table_name, objects_num=OBJECTS_NUM, ra_center=COORD_RA_CENTER, dec_center=COORD_DEC_CENTER, @@ -382,10 +381,10 @@ def run(): check_dataapi_coord_query(dataapi, COORD_RA_CENTER, COORD_DEC_CENTER, COORD_RADIUS / 2) # ---- Create table with all `existing` objects and upload it to layer 2 ---- - table_id_2, table_name_2 = create_table(adminapi, code) + table_name_2 = create_table(adminapi, code) upload_data( adminapi, - table_id_2, + table_name_2, objects_num=OBJECTS_NUM, ra_center=COORD_RA_CENTER, dec_center=COORD_DEC_CENTER, @@ -394,7 +393,7 @@ def run(): ) upload_data( adminapi, - table_id_2, + table_name_2, objects_num=SMALL_CLUSTER_OBJECTS_NUM, ra_center=SMALL_CLUSTER_RA_CENTER, dec_center=SMALL_CLUSTER_DEC_CENTER, @@ -411,10 +410,10 @@ def run(): layer2_import() # ---- Create table with `collided` objects ---- - table_id_3, table_name_3 = create_table(adminapi, code) + table_name_3 = create_table(adminapi, code) upload_data( adminapi, - table_id_3, + table_name_3, objects_num=SMALL_CLUSTER_OBJECTS_NUM // 2, ra_center=SMALL_CLUSTER_RA_CENTER, dec_center=SMALL_CLUSTER_DEC_CENTER,