-
Notifications
You must be signed in to change notification settings - Fork 411
Make REST catalog namespace separator configurable #2826
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| Any, | ||
| Union, | ||
| ) | ||
| from urllib.parse import quote, unquote | ||
|
|
||
| from pydantic import Field, field_validator | ||
| from requests import HTTPError, Session | ||
|
|
@@ -131,7 +132,8 @@ class IdentifierKind(Enum): | |
| AUTH = "auth" | ||
| CUSTOM = "custom" | ||
|
|
||
| NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) | ||
| NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator" | ||
| DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) | ||
|
|
||
|
|
||
| def _retry_hook(retry_state: RetryCallState) -> None: | ||
|
|
@@ -214,6 +216,7 @@ class ListViewsResponse(IcebergBaseModel): | |
| class RestCatalog(Catalog): | ||
| uri: str | ||
| _session: Session | ||
| _namespace_separator: str | ||
|
|
||
| def __init__(self, name: str, **properties: str): | ||
| """Rest Catalog. | ||
|
|
@@ -228,6 +231,10 @@ def __init__(self, name: str, **properties: str): | |
| self.uri = properties[URI] | ||
| self._fetch_config() | ||
| self._session = self._create_session() | ||
| separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) | ||
| if not separator_from_properties: | ||
| raise ValueError("Namespace separator cannot be an empty string") | ||
| self._namespace_separator = unquote(separator_from_properties) | ||
|
Comment on lines
+234
to
+237
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we move this logic inside |
||
|
|
||
| def _create_session(self) -> Session: | ||
| """Create a request session with provided catalog configuration.""" | ||
|
|
@@ -351,6 +358,16 @@ def _extract_optional_oauth_params(self) -> dict[str, str]: | |
|
|
||
| return optional_oauth_param | ||
|
|
||
| def _encode_namespace_path(self, namespace: Identifier) -> str: | ||
| """ | ||
| Encode a namespace for use as a path parameter in a URL. | ||
|
|
||
| Each part of the namespace is URL-encoded using `urllib.parse.quote` | ||
| (ensuring characters like '/' are encoded) and then joined by the | ||
| configured namespace separator. | ||
| """ | ||
| return self._namespace_separator.join(quote(part, safe="") for part in namespace) | ||
|
|
||
| def _fetch_config(self) -> None: | ||
| params = {} | ||
| if warehouse_location := self.properties.get(WAREHOUSE_LOCATION): | ||
|
|
@@ -382,10 +399,16 @@ def _split_identifier_for_path( | |
| self, identifier: str | Identifier | TableIdentifier, kind: IdentifierKind = IdentifierKind.TABLE | ||
| ) -> Properties: | ||
| if isinstance(identifier, TableIdentifier): | ||
| return {"namespace": NAMESPACE_SEPARATOR.join(identifier.namespace.root), kind.value: identifier.name} | ||
| return { | ||
| "namespace": self._encode_namespace_path(tuple(identifier.namespace.root)), | ||
| kind.value: quote(identifier.name, safe=""), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. curious why |
||
| } | ||
| identifier_tuple = self._identifier_to_validated_tuple(identifier) | ||
|
|
||
| return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]), kind.value: identifier_tuple[-1]} | ||
| return { | ||
| "namespace": self._encode_namespace_path(identifier_tuple[:-1]), | ||
| kind.value: quote(identifier_tuple[-1], safe=""), | ||
| } | ||
|
|
||
| def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str, Identifier | str]: | ||
| identifier_tuple = self._identifier_to_validated_tuple(identifier) | ||
|
|
@@ -600,7 +623,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) - | |
| @retry(**_RETRY_ARGS) | ||
| def list_tables(self, namespace: str | Identifier) -> list[Identifier]: | ||
| namespace_tuple = self._check_valid_namespace_identifier(namespace) | ||
| namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) | ||
| namespace_concat = self._encode_namespace_path(namespace_tuple) | ||
| response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat)) | ||
| try: | ||
| response.raise_for_status() | ||
|
|
@@ -681,7 +704,7 @@ def _remove_catalog_name_from_table_request_identifier(self, table_request: Comm | |
| @retry(**_RETRY_ARGS) | ||
| def list_views(self, namespace: str | Identifier) -> list[Identifier]: | ||
| namespace_tuple = self._check_valid_namespace_identifier(namespace) | ||
| namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) | ||
| namespace_concat = self._encode_namespace_path(namespace_tuple) | ||
| response = self._session.get(self.url(Endpoints.list_views, namespace=namespace_concat)) | ||
| try: | ||
| response.raise_for_status() | ||
|
|
@@ -748,7 +771,7 @@ def create_namespace(self, namespace: str | Identifier, properties: Properties = | |
| @retry(**_RETRY_ARGS) | ||
| def drop_namespace(self, namespace: str | Identifier) -> None: | ||
| namespace_tuple = self._check_valid_namespace_identifier(namespace) | ||
| namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) | ||
| namespace = self._encode_namespace_path(namespace_tuple) | ||
| response = self._session.delete(self.url(Endpoints.drop_namespace, namespace=namespace)) | ||
| try: | ||
| response.raise_for_status() | ||
|
|
@@ -760,7 +783,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: | |
| namespace_tuple = self.identifier_to_tuple(namespace) | ||
| response = self._session.get( | ||
| self.url( | ||
| f"{Endpoints.list_namespaces}?parent={NAMESPACE_SEPARATOR.join(namespace_tuple)}" | ||
| f"{Endpoints.list_namespaces}?parent={self._encode_namespace_path(namespace_tuple)}" | ||
| if namespace_tuple | ||
| else Endpoints.list_namespaces | ||
| ), | ||
|
|
@@ -775,7 +798,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: | |
| @retry(**_RETRY_ARGS) | ||
| def load_namespace_properties(self, namespace: str | Identifier) -> Properties: | ||
| namespace_tuple = self._check_valid_namespace_identifier(namespace) | ||
| namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) | ||
| namespace = self._encode_namespace_path(namespace_tuple) | ||
| response = self._session.get(self.url(Endpoints.load_namespace_metadata, namespace=namespace)) | ||
| try: | ||
| response.raise_for_status() | ||
|
|
@@ -789,7 +812,7 @@ def update_namespace_properties( | |
| self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties = EMPTY_DICT | ||
| ) -> PropertiesUpdateSummary: | ||
| namespace_tuple = self._check_valid_namespace_identifier(namespace) | ||
| namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) | ||
| namespace = self._encode_namespace_path(namespace_tuple) | ||
| payload = {"removals": list(removals or []), "updates": updates} | ||
| response = self._session.post(self.url(Endpoints.update_namespace_properties, namespace=namespace), json=payload) | ||
| try: | ||
|
|
@@ -806,7 +829,7 @@ def update_namespace_properties( | |
| @retry(**_RETRY_ARGS) | ||
| def namespace_exists(self, namespace: str | Identifier) -> bool: | ||
| namespace_tuple = self._check_valid_namespace_identifier(namespace) | ||
| namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) | ||
| namespace = self._encode_namespace_path(namespace_tuple) | ||
| response = self._session.head(self.url(Endpoints.namespace_exists, namespace=namespace)) | ||
|
|
||
| if response.status_code == 404: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.