From c0ba785ca7dda604c2bbe8fc5093ea9ace10849a Mon Sep 17 00:00:00 2001 From: kraysent Date: Sat, 15 Nov 2025 13:38:35 +0000 Subject: [PATCH 1/3] #5 use TAP/SQL to obtain the data --- plugins/vizier_v2.py | 6 +----- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/plugins/vizier_v2.py b/plugins/vizier_v2.py index ab2803b..92f87b2 100644 --- a/plugins/vizier_v2.py +++ b/plugins/vizier_v2.py @@ -139,11 +139,7 @@ def prepare(self) -> None: pass def get_table_name(self) -> str: - t = self.client.get_table(self.table_name, row_num=1) - if not hasattr(t, "meta") or t.meta is None: - raise RuntimeError("unable to get table name") - - return str(t.meta["ID"]) + return _sanitize_filename(self.table_name) def get_bibcode(self) -> str: resp = self.client.get_catalog_metadata(catalog=self.catalog_name) diff --git a/pyproject.toml b/pyproject.toml index 88f840b..3ee646e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "openapi-python-client>=0.27.1", "pandas>=2.3.3", "numpy>=2.3.4", + "pyvo>=1.8", ] [tool.pytest.ini_options] From ac86df3d2d08f63d3f9aca772212a3f9a6d63fe8 Mon Sep 17 00:00:00 2001 From: kraysent Date: Sat, 15 Nov 2025 14:17:06 +0000 Subject: [PATCH 2/3] use tap for query --- plugins/vizier_v2.py | 64 +++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/plugins/vizier_v2.py b/plugins/vizier_v2.py index 92f87b2..8354d8b 100644 --- a/plugins/vizier_v2.py +++ b/plugins/vizier_v2.py @@ -6,7 +6,8 @@ import numpy as np import pandas from astropy import table -from astroquery import utils, vizier +from astroquery import vizier +from pyvo import registry import app from app.gen.client.adminapi import models, types @@ -24,6 +25,21 @@ def _sanitize_filename(string: str) -> str: ) +def _build_where_clause(constraints: list[tuple[str, str, str]]) -> str: + if not constraints: + return "" + + conditions = [] + for column, sign, value in constraints: + if any(char in column for char in "()[]."): + quoted_column = f'"{column}"' + else: + quoted_column = column + conditions.append(f"{quoted_column} {sign} {value}") + + return " WHERE " + " AND ".join(conditions) + + def dtype_to_datatype(dtype: str | np.dtype) -> models.DatatypeEnum: dtype_str = str(dtype).lower() @@ -50,20 +66,21 @@ def dtype_to_datatype(dtype: str | np.dtype) -> models.DatatypeEnum: class CachedVizierClient: + TAP_ENDPOINT = "https://tapvizier.cds.unistra.fr/TAPVizieR/tap/sync" + def __init__(self, cache_path: str = ".vizier_cache/"): self.cache_path = cache_path self._client = vizier.Vizier() - self._client.ROW_LIMIT = -1 def _obtain_cache_path( - self, catalog_name: str, row_num: int | None = None, constraints: dict[str, str] | None = None + self, catalog_name: str, row_num: int | None = None, constraints: list[tuple[str, str, str]] | None = None ) -> pathlib.Path: filename = f"{catalog_name}.vot" if row_num is not None: filename = f"{catalog_name}_rows_{row_num}.vot" if constraints: - sorted_constraints = sorted(constraints.items()) - constraint_str = "_".join(f"{k}_{v}" for k, v in sorted_constraints) + sorted_constraints = sorted(constraints) + constraint_str = "_".join(f"{col}_{sign}_{val}" for col, sign, val in sorted_constraints) filename = f"{catalog_name}_constraints_{constraint_str}.vot" filename = _sanitize_filename(filename) @@ -72,7 +89,7 @@ def _obtain_cache_path( return path def _write_catalog_cache( - self, catalog_name: str, row_num: int | None = None, constraints: dict[str, str] | None = None + self, catalog_name: str, row_num: int | None = None, constraints: list[tuple[str, str, str]] | None = None ) -> None: app.logger.info( "downloading catalog from Vizier", @@ -80,24 +97,27 @@ def _write_catalog_cache( row_num=row_num, constraints=constraints, ) - client = self._client + + where_clause = _build_where_clause(constraints) if constraints else "" + if row_num is not None: - client = vizier.Vizier() - client.ROW_LIMIT = row_num - query_kwargs = {"catalog": catalog_name} - if constraints: - query_kwargs.update(constraints) - catalogs: utils.TableList = client.query_constraints(**query_kwargs) # pyright: ignore[reportAttributeAccessIssue] + select_clause = f"SELECT TOP {row_num} *" + else: + select_clause = "SELECT *" + + query = f'{select_clause}\nFROM "{catalog_name}"{where_clause}' - if not catalogs: - raise ValueError("catalog not found") + app.logger.info("Running query", query=query) + data = registry.regtap.RegistryQuery(self.TAP_ENDPOINT, query) + result = data.execute() + tbl = result.to_table() cache_filename = self._obtain_cache_path(catalog_name, row_num, constraints) - catalogs[0].write(str(cache_filename), format="votable") + tbl.write(str(cache_filename), format="votable") app.logger.debug("wrote catalog cache", location=str(cache_filename)) def get_table( - self, catalog_name: str, row_num: int | None = None, constraints: dict[str, str] | None = None + self, catalog_name: str, row_num: int | None = None, constraints: list[tuple[str, str, str]] | None = None ) -> table.Table: cache_path = self._obtain_cache_path(catalog_name, row_num, constraints) if not cache_path.exists(): @@ -125,11 +145,11 @@ def __init__( cache_path: str = ".vizier_cache/", batch_size: int = 10, ): - if len(constraints) % 2 != 0: - raise ValueError("constraints must be provided in pairs (column, constraint_value)") - self.constraints: dict[str, str] = {} - for i in range(0, len(constraints), 2): - self.constraints[constraints[i]] = constraints[i + 1] + if len(constraints) % 3 != 0: + raise ValueError("constraints must be provided in groups of three (column, sign, value)") + self.constraints: list[tuple[str, str, str]] = [] + for i in range(0, len(constraints), 3): + self.constraints.append((constraints[i], constraints[i + 1], constraints[i + 2])) self.catalog_name = catalog_name self.table_name = table_name self.batch_size = batch_size From e3df788a68b936a06b26905bb9396441d010a9b5 Mon Sep 17 00:00:00 2001 From: kraysent Date: Sat, 15 Nov 2025 14:23:19 +0000 Subject: [PATCH 3/3] remove client var --- plugins/vizier_v2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/vizier_v2.py b/plugins/vizier_v2.py index 8354d8b..ef856ba 100644 --- a/plugins/vizier_v2.py +++ b/plugins/vizier_v2.py @@ -70,7 +70,6 @@ class CachedVizierClient: def __init__(self, cache_path: str = ".vizier_cache/"): self.cache_path = cache_path - self._client = vizier.Vizier() def _obtain_cache_path( self, catalog_name: str, row_num: int | None = None, constraints: list[tuple[str, str, str]] | None = None @@ -127,7 +126,7 @@ def get_table( return table.Table.read(cache_path, format="votable") def get_catalog_metadata(self, catalog: str) -> dict: - return self._client.get_catalog_metadata(catalog=catalog) + return vizier.Vizier().get_catalog_metadata(catalog=catalog) @final