From 4d4bc141f08704bebe48952c2b0d67f3a44b0966 Mon Sep 17 00:00:00 2001 From: kraysent Date: Wed, 12 Nov 2025 20:01:54 +0000 Subject: [PATCH 1/4] #329: add PGC table and andd it to objects table --- .github/workflows/regression-testing.yml | 1 + postgres/migrations/V013__pgc.sql | 33 ++++++++++++ postgres/migrations/V013_pgc.sql | 67 ------------------------ 3 files changed, 34 insertions(+), 67 deletions(-) create mode 100644 postgres/migrations/V013__pgc.sql delete mode 100644 postgres/migrations/V013_pgc.sql diff --git a/.github/workflows/regression-testing.yml b/.github/workflows/regression-testing.yml index 20fcb42f..49e97b07 100644 --- a/.github/workflows/regression-testing.yml +++ b/.github/workflows/regression-testing.yml @@ -5,6 +5,7 @@ on: jobs: test-ubuntu: runs-on: ubuntu-latest + timeout-minutes: 10 steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/postgres/migrations/V013__pgc.sql b/postgres/migrations/V013__pgc.sql new file mode 100644 index 00000000..4b1e2748 --- /dev/null +++ b/postgres/migrations/V013__pgc.sql @@ -0,0 +1,33 @@ +/* pgmigrate-encoding: utf-8 */ + +CREATE TABLE common.pgc ( + id serial PRIMARY KEY +); + +COMMENT ON TABLE common.pgc IS 'The list of existing PGC-numbers' ; +COMMENT ON COLUMN common.pgc.id IS 'Unique PGC-number' ; + +ALTER TABLE rawdata.objects +ADD COLUMN pgc integer REFERENCES common.pgc (id) ON DELETE restrict ON UPDATE cascade, +ADD COLUMN modification_time timestamp without time zone ; + +COMMENT ON TABLE rawdata.objects IS 'The registry of all objects in original data tables' ; +COMMENT ON COLUMN rawdata.objects.id IS 'The record id' ; +COMMENT ON COLUMN rawdata.objects.table_id IS 'The table in which the record is located' ; +COMMENT ON COLUMN rawdata.objects.pgc IS 'Crossidentification of the record with the PGC-number' ; +COMMENT ON COLUMN rawdata.objects.modification_time IS 'Time of PGC-number assignment to the record' ; + +INSERT INTO common.pgc (id) +SELECT id +FROM rawdata.pgc +ORDER BY rawdata.pgc.id; + +ALTER SEQUENCE common.pgc_id_seq RESTART WITH 6775395 ; + +UPDATE rawdata.objects +SET + pgc=rawdata.pgc.id +FROM rawdata.pgc +WHERE + rawdata.pgc.object_id=rawdata.objects.id +; diff --git a/postgres/migrations/V013_pgc.sql b/postgres/migrations/V013_pgc.sql deleted file mode 100644 index b51f8fcf..00000000 --- a/postgres/migrations/V013_pgc.sql +++ /dev/null @@ -1,67 +0,0 @@ -BEGIN; - --- ALTER ROLE dim WITH SUPERUSER ; - -------------------------------------------------------------- --- Сохранять время модификации PGC нет необходимости, --- т.к. есть время привязки объекта с записями и этого достаточно для отслеживания модификаций --- --- добавление нового PGC номера делается командой --- INSERT INTO common.pgc DEFAULT VALUES ; -------------------------------------------------------------- - -CREATE TABLE common.pgc ( - id Serial PRIMARY KEY -); - -COMMENT ON TABLE common.pgc IS 'The list of existing PGC-numbers' ; -COMMENT ON COLUMN common.pgc.id IS 'Unique PGC-number' ; - - --------------------------- --- Ассоциация записей с PGC номерами переносится в таблицу objects --------------------------- - -ALTER TABLE rawdata.objects -ADD COLUMN pgc Integer REFERENCES common.pgc (id) ON DELETE restrict ON UPDATE cascade, -ADD COLUMN modification_time Timestamp Without Time Zone ; - -COMMENT ON TABLE rawdata.objects IS 'The register of all objects in original data tables' ; -COMMENT ON COLUMN rawdata.objects.id IS 'The record id' ; -COMMENT ON COLUMN rawdata.objects.table_id IS 'The table in which the record is located' ; -COMMENT ON COLUMN rawdata.objects.pgc IS 'Corssidentification of the record with the PGC-number' ; -COMMENT ON COLUMN rawdata.objects.modification_time IS 'Time of PGC-number assignment to the record' ; - - ---------------------------- --- Перенос данных ---------------------------- - -INSERT INTO common.pgc (id) -SELECT id -FROM rawdata.pgc -ORDER BY rawdata.pgc.id -; - -ALTER SEQUENCE common.pgc_id_seq RESTART WITH 6775395 ; - -UPDATE rawdata.objects -SET - pgc=rawdata.pgc.id -FROM rawdata.pgc -WHERE - rawdata.pgc.object_id=rawdata.objects.id -; - - --------------------------------------- --- В дальнейшем нужно будет --- 1. Перенести связь между записями и PGC в таблицу objects --- 2. Удалить таблицу rawdata.pgc --- 3. Перименовать таблицу objects в records --- 4. Перенести таблицы records и tables в схему layer0 --------------------------------------- - --- ROLLBACK; -COMMIT; - From 9c1ceca97e09b7d16ec73f3dc74ed5580e2dc853 Mon Sep 17 00:00:00 2001 From: kraysent Date: Wed, 12 Nov 2025 20:03:12 +0000 Subject: [PATCH 2/4] add timeouts to all workflow jobs --- .github/workflows/app-deploy.yaml | 2 ++ .github/workflows/build-docker.yaml | 1 + .github/workflows/documentation-build.yml | 1 + .github/workflows/documentation-deploy.yml | 1 + .github/workflows/testing.yml | 1 + 5 files changed, 6 insertions(+) diff --git a/.github/workflows/app-deploy.yaml b/.github/workflows/app-deploy.yaml index 78d5ce1d..284b6ff5 100644 --- a/.github/workflows/app-deploy.yaml +++ b/.github/workflows/app-deploy.yaml @@ -7,6 +7,7 @@ jobs: build: name: Build Docker image runs-on: ubuntu-latest + timeout-minutes: 10 steps: - name: Checkout code uses: actions/checkout@v4 @@ -32,6 +33,7 @@ jobs: name: Push Docker image to GitHub Packages needs: build runs-on: ubuntu-latest + timeout-minutes: 10 permissions: contents: read packages: write diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index a35b1dde..8a1b6810 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -7,6 +7,7 @@ jobs: build: name: Build Docker image runs-on: ubuntu-latest + timeout-minutes: 10 steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/documentation-build.yml b/.github/workflows/documentation-build.yml index c53494fd..5e85aad0 100644 --- a/.github/workflows/documentation-build.yml +++ b/.github/workflows/documentation-build.yml @@ -5,6 +5,7 @@ on: jobs: build: runs-on: ubuntu-latest + timeout-minutes: 10 steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/documentation-deploy.yml b/.github/workflows/documentation-deploy.yml index f6f0d92a..9d7196b1 100644 --- a/.github/workflows/documentation-deploy.yml +++ b/.github/workflows/documentation-deploy.yml @@ -7,6 +7,7 @@ on: jobs: deploy: runs-on: ubuntu-latest + timeout-minutes: 10 steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 90bc4b00..99dcf485 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -5,6 +5,7 @@ on: jobs: test-ubuntu: runs-on: ubuntu-latest + timeout-minutes: 10 steps: - name: Checkout code uses: actions/checkout@v4 From 64be21f851e2172f5e3d9dc1db0e1e66994618fc Mon Sep 17 00:00:00 2001 From: kraysent Date: Wed, 12 Nov 2025 21:07:07 +0000 Subject: [PATCH 3/4] #329: remove pgc table --- app/data/repositories/layer0/objects.py | 45 ++++++++++++------- app/data/repositories/layer1.py | 16 +++---- app/lib/storage/postgres/postgres_storage.py | 1 + .../V014__common_pgc_connections.sql | 23 ++++++++++ 4 files changed, 62 insertions(+), 23 deletions(-) create mode 100644 postgres/migrations/V014__common_pgc_connections.sql diff --git a/app/data/repositories/layer0/objects.py b/app/data/repositories/layer0/objects.py index 7556107d..ae561770 100644 --- a/app/data/repositories/layer0/objects.py +++ b/app/data/repositories/layer0/objects.py @@ -173,21 +173,36 @@ def add_crossmatch_result(self, data: dict[str, model.CIResult]) -> None: self._storage.exec(query, params=params) def upsert_pgc(self, pgcs: dict[str, int | None]) -> None: - values = [] - params = [] + pgcs_to_insert: dict[str, int] = {} + + new_objects = [object_id for object_id, pgc in pgcs.items() if pgc is None] + + if new_objects: + result = self._storage.query( + f"""INSERT INTO common.pgc + VALUES {",".join(["(DEFAULT)"] * len(new_objects))} + RETURNING id""", + ) + + ids = [row["id"] for row in result] + + for object_id, pgc_id in zip(new_objects, ids, strict=False): + pgcs_to_insert[object_id] = pgc_id for object_id, pgc in pgcs.items(): - params.append(object_id) - if pgc is None: - values.append("(%s, DEFAULT)") - else: + if pgc is not None: + pgcs_to_insert[object_id] = pgc + + if pgcs_to_insert: + update_query = "UPDATE rawdata.objects SET pgc = v.pgc FROM (VALUES " + params = [] + values = [] + + for object_id, pgc_id in pgcs_to_insert.items(): values.append("(%s, %s)") - params.append(pgc) - - self._storage.exec( - f""" - INSERT INTO rawdata.pgc (object_id, id) VALUES {",".join(values)} - ON CONFLICT (object_id) DO UPDATE SET id = EXCLUDED.id - """, - params=params, - ) + params.extend([object_id, pgc_id]) + + update_query += ",".join(values) + update_query += ") AS v(object_id, pgc) WHERE rawdata.objects.id = v.object_id" + + self._storage.exec(update_query, params=params) diff --git a/app/data/repositories/layer1.py b/app/data/repositories/layer1.py index cfeb82f4..115ff304 100644 --- a/app/data/repositories/layer1.py +++ b/app/data/repositories/layer1.py @@ -70,16 +70,16 @@ def get_new_observations( query = f"""SELECT * FROM {object_cls.layer1_table()} AS l1 - JOIN rawdata.pgc AS pgc ON l1.object_id = pgc.object_id - WHERE id IN ( - SELECT DISTINCT id + JOIN rawdata.objects AS o ON l1.object_id = o.id + WHERE o.pgc IN ( + SELECT DISTINCT o.pgc FROM {object_cls.layer1_table()} AS l1 - JOIN rawdata.pgc AS pgc ON l1.object_id = pgc.object_id - WHERE modification_time > %s AND pgc.id > %s - ORDER BY id + JOIN rawdata.objects AS o ON l1.object_id = o.id + WHERE o.modification_time > %s AND o.pgc > %s + ORDER BY o.pgc LIMIT %s ) - ORDER BY pgc.id ASC""" + ORDER BY o.pgc ASC""" rows = self._storage.query(query, params=[dt, offset, limit]) @@ -87,7 +87,7 @@ def get_new_observations( for row in rows: object_id = row.pop("object_id") - pgc = int(row.pop("id")) + pgc = int(row.pop("pgc")) catalog_object = object_cls.from_layer1(row) key = (pgc, object_id) diff --git a/app/lib/storage/postgres/postgres_storage.py b/app/lib/storage/postgres/postgres_storage.py index 1740ba4b..47fc8e01 100644 --- a/app/lib/storage/postgres/postgres_storage.py +++ b/app/lib/storage/postgres/postgres_storage.py @@ -44,6 +44,7 @@ def __init__(self, cfg: config.PgStorageConfig, logger: structlog.stdlib.BoundLo self._connection: psycopg.Connection | None = None self._logger = logger + def connect(self) -> None: self._connection = psycopg.connect(self._config.get_dsn(), row_factory=rows.dict_row, autocommit=True) if self._connection is None: diff --git a/postgres/migrations/V014__common_pgc_connections.sql b/postgres/migrations/V014__common_pgc_connections.sql new file mode 100644 index 00000000..2449e0c6 --- /dev/null +++ b/postgres/migrations/V014__common_pgc_connections.sql @@ -0,0 +1,23 @@ +ALTER TABLE layer2.cz +ADD CONSTRAINT cz_pgc_fkey FOREIGN KEY (pgc) REFERENCES common.pgc(id) ON DELETE RESTRICT ON UPDATE CASCADE; +ALTER TABLE layer2.designation +ADD CONSTRAINT cz_pgc_fkey FOREIGN KEY (pgc) REFERENCES common.pgc(id) ON DELETE RESTRICT ON UPDATE CASCADE; +ALTER TABLE layer2.icrs +ADD CONSTRAINT cz_pgc_fkey FOREIGN KEY (pgc) REFERENCES common.pgc(id) ON DELETE RESTRICT ON UPDATE CASCADE; + +CREATE OR REPLACE FUNCTION rawdata_set_modification_time() +RETURNS TRIGGER AS $$ +BEGIN + IF NEW.pgc IS DISTINCT FROM OLD.pgc THEN + NEW.modification_time := now(); + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER set_modification_time_on_pgc_update +BEFORE UPDATE OF pgc ON rawdata.objects +FOR EACH ROW +EXECUTE FUNCTION rawdata_set_modification_time(); + +DROP TABLE rawdata.pgc; From b4292ce74a1a22a637c402302a084c91324477cc Mon Sep 17 00:00:00 2001 From: kraysent Date: Wed, 12 Nov 2025 21:24:29 +0000 Subject: [PATCH 4/4] fix tests --- app/data/repositories/common.py | 6 ++++++ app/lib/storage/postgres/postgres_storage.py | 1 - tests/integration/layer2_import_test.py | 1 + tests/integration/layer2_repository_test.py | 7 +++++++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/app/data/repositories/common.py b/app/data/repositories/common.py index 63946f84..b82117c2 100644 --- a/app/data/repositories/common.py +++ b/app/data/repositories/common.py @@ -38,3 +38,9 @@ def get_source_by_id(self, source_id: int) -> model.Bibliography: row = self._storage.query_one(template.GET_SOURCE_BY_ID, params=[source_id]) return model.Bibliography(**row) + + def register_pgcs(self, pgcs: list[int]): + self._storage.exec( + f"INSERT INTO common.pgc (id) VALUES {','.join(['(%s)'] * len(pgcs))} ON CONFLICT (id) DO NOTHING", + params=pgcs, + ) diff --git a/app/lib/storage/postgres/postgres_storage.py b/app/lib/storage/postgres/postgres_storage.py index 47fc8e01..1740ba4b 100644 --- a/app/lib/storage/postgres/postgres_storage.py +++ b/app/lib/storage/postgres/postgres_storage.py @@ -44,7 +44,6 @@ def __init__(self, cfg: config.PgStorageConfig, logger: structlog.stdlib.BoundLo self._connection: psycopg.Connection | None = None self._logger = logger - def connect(self) -> None: self._connection = psycopg.connect(self._config.get_dsn(), row_factory=rows.dict_row, autocommit=True) if self._connection is None: diff --git a/tests/integration/layer2_import_test.py b/tests/integration/layer2_import_test.py index 64dc0044..42b948e8 100644 --- a/tests/integration/layer2_import_test.py +++ b/tests/integration/layer2_import_test.py @@ -42,6 +42,7 @@ def test_import_two_catalogs(self): ["123", "124"], ) + self.common_repo.register_pgcs([1234, 1245]) self.layer0_repo.upsert_pgc({"123": 1234, "124": 1245}) self.layer1_repo.save_data( [ diff --git a/tests/integration/layer2_repository_test.py b/tests/integration/layer2_repository_test.py index 92e13d27..bae56135 100644 --- a/tests/integration/layer2_repository_test.py +++ b/tests/integration/layer2_repository_test.py @@ -12,6 +12,7 @@ class Layer2RepositoryTest(unittest.TestCase): def setUpClass(cls) -> None: cls.pg_storage = lib.TestPostgresStorage.get() + cls.common_repo = repositories.CommonRepository(cls.pg_storage.get_storage(), structlog.get_logger()) cls.layer2_repo = repositories.Layer2Repository(cls.pg_storage.get_storage(), structlog.get_logger()) def tearDown(self): @@ -23,6 +24,7 @@ def test_one_object(self): model.Layer2CatalogObject(2, model.DesignationCatalogObject(design="test2")), ] + self.common_repo.register_pgcs([1, 2]) self.layer2_repo.save_data(objects) actual = self.layer2_repo.query( @@ -42,6 +44,7 @@ def test_several_objects(self): model.Layer2CatalogObject(2, model.ICRSCatalogObject(ra=11, dec=11, e_ra=0.1, e_dec=0.1)), ] + self.common_repo.register_pgcs([1, 2]) self.layer2_repo.save_data(objects) actual = self.layer2_repo.query( @@ -65,6 +68,7 @@ def test_several_catalogs(self): model.Layer2CatalogObject(2, model.DesignationCatalogObject(design="test2")), ] + self.common_repo.register_pgcs([1, 2]) self.layer2_repo.save_data(objects) actual = self.layer2_repo.query( @@ -94,6 +98,7 @@ def test_several_filters(self): model.Layer2CatalogObject(1, model.DesignationCatalogObject(design="test")), ] + self.common_repo.register_pgcs([1, 2]) self.layer2_repo.save_data(objects) actual = self.layer2_repo.query( @@ -134,6 +139,7 @@ def test_pagination(self): model.Layer2CatalogObject(5, model.ICRSCatalogObject(ra=14, dec=14, e_ra=0.1, e_dec=0.1)), ] + self.common_repo.register_pgcs([1, 2, 3, 4, 5]) self.layer2_repo.save_data(objects) actual = self.layer2_repo.query( @@ -155,6 +161,7 @@ def test_batch_query(self): model.Layer2CatalogObject(5, model.ICRSCatalogObject(ra=14, dec=14, e_ra=0.1, e_dec=0.1)), ] + self.common_repo.register_pgcs([1, 2, 3, 4, 5]) self.layer2_repo.save_data(objects) actual = self.layer2_repo.query_batch(