diff --git a/README.md b/README.md index 3b9d98f..26c6c42 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A Python package allowing developers to connect to Dataverse environments for DD - Bulk update — Provide a list of IDs with a single patch (broadcast) or a list of per‑record patches to `update(...)`; internally uses the bound `UpdateMultiple` action; returns nothing. Each record must include the primary key attribute when sent to UpdateMultiple. - Retrieve multiple (paging) — Generator-based `get(...)` that yields pages, supports `$top` and Prefer: `odata.maxpagesize` (`page_size`). - Upload files — Call `upload_file(logical_name, ...)` and an upload method will be auto picked (you can override the mode). See https://learn.microsoft.com/en-us/power-apps/developer/data-platform/file-column-data?tabs=sdk#upload-files -- Metadata helpers — Create/inspect/delete simple custom tables (EntityDefinitions + Attributes). +- Metadata helpers — Create/inspect/delete tables and create/delete columns (EntityDefinitions + Attributes). - Pandas helpers — Convenience DataFrame oriented wrappers for quick prototyping/notebooks. - Auth — Azure Identity (`TokenCredential`) injection. @@ -16,7 +16,7 @@ A Python package allowing developers to connect to Dataverse environments for DD - Simple `DataverseClient` facade for CRUD, SQL (read-only), and table metadata. - SQL-over-API: Constrained SQL (single SELECT with limited WHERE/TOP/ORDER BY) via native Web API `?sql=` parameter. -- Table metadata ops: create simple custom tables (supports string/int/decimal/float/datetime/bool/optionset) and delete them. +- Table metadata ops: create/delete simple custom tables (supports string/int/decimal/float/datetime/bool/optionset) and create/delete columns. - Bulk create via `CreateMultiple` (collection-bound) by passing `list[dict]` to `create(logical_name, payloads)`; returns list of created IDs. - Bulk update via `UpdateMultiple` (invoked internally) by calling unified `update(logical_name, ids, patch|patches)`; returns nothing. - Retrieve multiple with server-driven paging: `get(...)` yields lists (pages) following `@odata.nextLink`. Control total via `$top` and per-page via `page_size` (Prefer: `odata.maxpagesize`). @@ -42,9 +42,11 @@ Auth: | `delete` | `delete(logical_name, list[id])` | `None` | Delete many (sequential). | | `query_sql` | `query_sql(sql)` | `list[dict]` | Constrained read-only SELECT via `?sql=`. | | `create_table` | `create_table(tablename, schema)` | `dict` | Creates custom table + columns. Friendly name (e.g. `SampleItem`) becomes schema `new_SampleItem`; explicit schema name (contains `_`) used as-is. | +| `create_column` | `create_column(tablename, columns)` | `list[str]` | Adds columns using a `{name: type}` mapping (same shape as `create_table` schema). Returns schema names for the created columns. | | `get_table_info` | `get_table_info(schema_name)` | `dict | None` | Basic table metadata by schema name (e.g. `new_SampleItem`). Friendly names not auto-converted. | | `list_tables` | `list_tables()` | `list[dict]` | Lists non-private tables. | | `delete_table` | `delete_table(tablename)` | `None` | Drops custom table. Accepts friendly or schema name; friendly converted to `new_`. | +| `delete_column` | `delete_column(tablename, columns)` | `list[str]` | Deletes one or more columns; returns schema names (accepts string or list[str]). | | `PandasODataClient.create_df` | `create_df(logical_name, series)` | `str` | Create one record (returns GUID). | | `PandasODataClient.update` | `update(logical_name, id, series)` | `None` | Returns None; ignored if Series empty. | | `PandasODataClient.get_ids` | `get_ids(logical_name, ids, select=None)` | `DataFrame` | One row per ID (errors inline). | @@ -310,6 +312,10 @@ info = client.create_table( }, ) +# Create or delete columns +client.create_column("SampleItem", {"category": "string"}) # returns ["new_Category"] +client.delete_column("SampleItem", "category") # returns ["new_Category"] + logical = info["entity_logical_name"] # e.g., "new_sampleitem" # Create a record in the new table diff --git a/examples/quickstart.py b/examples/quickstart.py index 891b62a..e1c3a63 100644 --- a/examples/quickstart.py +++ b/examples/quickstart.py @@ -7,6 +7,7 @@ sys.path.append(str(Path(__file__).resolve().parents[1] / "src")) from dataverse_sdk import DataverseClient +from dataverse_sdk.errors import MetadataError from enum import IntEnum from azure.identity import InteractiveBrowserCredential import traceback @@ -64,7 +65,7 @@ def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403 break if last_exc: raise last_exc - + # Enum demonstrating local option set creation with multilingual labels (for French labels to work, enable French language in the environment first) class Status(IntEnum): Active = 1 @@ -141,7 +142,14 @@ class Status(IntEnum): pass # Fail fast: all operations must use the custom table sys.exit(1) +entity_schema = table_info.get("entity_schema") or "new_SampleItem" logical = table_info.get("entity_logical_name") +metadata_id = table_info.get("metadata_id") +if not metadata_id: + refreshed_info = client.get_table_info(entity_schema) or {} + metadata_id = refreshed_info.get("metadata_id") + if metadata_id: + table_info["metadata_id"] = metadata_id # Derive attribute logical name prefix from the entity logical name (segment before first underscore) attr_prefix = logical.split("_", 1)[0] if "_" in logical else logical @@ -527,9 +535,88 @@ def _del_one(rid: str) -> tuple[str, bool, str | None]: except Exception as e: print(f"Delete failed: {e}") +pause("Next: column metadata helpers") + +# 6) Column metadata helpers: column create/delete +print("Column metadata helpers (create/delete column):") +scratch_column = f"scratch_{int(time.time())}" +column_payload = {scratch_column: "string"} +try: + log_call(f"client.create_column('{entity_schema}', {repr(column_payload)})") + column_create = client.create_columns(entity_schema, column_payload) + if not isinstance(column_create, list) or not column_create: + raise RuntimeError("create_column did not return schema list") + created_details = column_create + if not all(isinstance(item, str) for item in created_details): + raise RuntimeError("create_column entries were not schema strings") + attribute_schema = created_details[0] + odata_client = client._get_odata() + exists_after_create = None + exists_after_delete = None + attr_type_before = None + if metadata_id and attribute_schema: + _ready_message = "Column metadata not yet available" + def _metadata_after_create(): + meta = odata_client._get_attribute_metadata( + metadata_id, + attribute_schema, + extra_select="@odata.type,AttributeType", + ) + if not meta or not meta.get("MetadataId"): + raise RuntimeError(_ready_message) + return meta + + ready_meta = backoff_retry( + _metadata_after_create, + delays=(0, 1, 2, 4, 8), + retry_http_statuses=(), + retry_if=lambda exc: isinstance(exc, RuntimeError) and str(exc) == _ready_message, + ) + exists_after_create = bool(ready_meta) + raw_type = ready_meta.get("@odata.type") or ready_meta.get("AttributeType") + if isinstance(raw_type, str): + attr_type_before = raw_type + lowered = raw_type.lower() + log_call(f"client.delete_column('{entity_schema}', '{scratch_column}')") + column_delete = client.delete_columns(entity_schema, scratch_column) + if not isinstance(column_delete, list) or not column_delete: + raise RuntimeError("delete_column did not return schema list") + deleted_details = column_delete + if not all(isinstance(item, str) for item in deleted_details): + raise RuntimeError("delete_column entries were not schema strings") + if attribute_schema not in deleted_details: + raise RuntimeError("delete_column response missing expected schema name") + if metadata_id and attribute_schema: + _delete_message = "Column metadata still present after delete" + def _ensure_removed(): + meta = odata_client._get_attribute_metadata(metadata_id, attribute_schema) + if meta: + raise RuntimeError(_delete_message) + return True + + removed = backoff_retry( + _ensure_removed, + delays=(0, 1, 2, 4, 8), + retry_http_statuses=(), + retry_if=lambda exc: isinstance(exc, RuntimeError) and str(exc) == _delete_message, + ) + exists_after_delete = not removed + print({ + "created_column": scratch_column, + "create_summary": created_details, + "delete_summary": deleted_details, + "attribute_type_before_delete": attr_type_before, + "exists_after_create": exists_after_create, + "exists_after_delete": exists_after_delete, + }) +except MetadataError as meta_err: + print({"column_metadata_error": str(meta_err)}) +except Exception as exc: + print({"column_metadata_unexpected": str(exc)}) + pause("Next: Cleanup table") -# 6) Cleanup: delete the custom table if it exists +# 7) Cleanup: delete the custom table if it exists print("Cleanup (Metadata):") if delete_table_at_end: try: diff --git a/src/dataverse_sdk/client.py b/src/dataverse_sdk/client.py index f0b0127..4a7bd72 100644 --- a/src/dataverse_sdk/client.py +++ b/src/dataverse_sdk/client.py @@ -241,6 +241,68 @@ def list_tables(self) -> list[str]: A list of table names. """ return self._get_odata()._list_tables() + + def create_columns( + self, + tablename: str, + columns: Dict[str, Any], + ) -> List[str]: + """ + Create one or more columns on an existing table using a schema-style mapping. + + :param tablename: Friendly name ("SampleItem") or full schema name ("new_SampleItem"). + :type tablename: str + :param columns: Mapping of logical names (without prefix) to supported types. Primitive types include + ``string``, ``int``, ``decimal``, ``float``, ``datetime``, and ``bool``. Enum subclasses (IntEnum preferred) + generate a local option set and can specify localized labels via ``__labels__``. + :type columns: Dict[str, Any] + :returns: Schema names for the columns that were created. + :rtype: list[str] + Example: + Create two columns on the custom table:: + + created = client.create_columns( + "new_SampleItem", + { + "scratch": "string", + "flags": "bool", + }, + ) + print(created) + """ + return self._get_odata()._create_columns( + tablename, + columns, + ) + + def delete_columns( + self, + tablename: str, + columns: Union[str, List[str]], + ) -> List[str]: + """ + Delete one or more columns from a table. + + :param tablename: Friendly or schema name of the table. + :type tablename: str + :param columns: Column name or list of column names to remove. Friendly names are normalized to schema + names using the same prefix logic as ``create_columns``. + :type columns: str | list[str] + :returns: Schema names for the columns that were removed. + :rtype: list[str] + Example: + Remove two custom columns by schema name: + + removed = client.delete_columns( + "new_SampleItem", + ["new_Scratch", "new_Flags"], + ) + print(removed) + """ + return self._get_odata()._delete_columns( + tablename, + columns, + ) # File upload def upload_file( diff --git a/src/dataverse_sdk/error_codes.py b/src/dataverse_sdk/error_codes.py index 12b35aa..2646ca4 100644 --- a/src/dataverse_sdk/error_codes.py +++ b/src/dataverse_sdk/error_codes.py @@ -43,6 +43,7 @@ METADATA_ENTITYSET_NAME_MISSING = "metadata_entityset_name_missing" METADATA_TABLE_NOT_FOUND = "metadata_table_not_found" METADATA_TABLE_ALREADY_EXISTS = "metadata_table_already_exists" +METADATA_COLUMN_NOT_FOUND = "metadata_column_not_found" METADATA_ATTRIBUTE_RETRY_EXHAUSTED = "metadata_attribute_retry_exhausted" METADATA_PICKLIST_RETRY_EXHAUSTED = "metadata_picklist_retry_exhausted" diff --git a/src/dataverse_sdk/odata.py b/src/dataverse_sdk/odata.py index 5709d51..c7fccc3 100644 --- a/src/dataverse_sdk/odata.py +++ b/src/dataverse_sdk/odata.py @@ -615,6 +615,11 @@ def _to_pascal(self, name: str) -> str: parts = re.split(r"[^A-Za-z0-9]+", name) return "".join(p[:1].upper() + p[1:] for p in parts if p) + def _normalize_entity_schema(self, tablename: str) -> str: + if "_" in tablename: + return tablename + return f"new_{self._to_pascal(tablename)}" + def _get_entity_by_schema(self, schema_name: str) -> Optional[Dict[str, Any]]: url = f"{self.api}/EntityDefinitions" # Escape single quotes in schema name @@ -661,6 +666,50 @@ def _wait_for_entity_ready(self, schema_name: str, delays: Optional[List[int]] = return ent return ent + def _normalize_attribute_schema(self, entity_schema: str, column_name: str) -> str: + # Use same publisher prefix segment as entity_schema if present; else default to 'new_'. + if not isinstance(column_name, str) or not column_name.strip(): + raise ValueError("column_name must be a non-empty string") + publisher = entity_schema.split("_", 1)[0] if "_" in entity_schema else "new" + expected_prefix = f"{publisher}_" + if column_name.lower().startswith(expected_prefix.lower()): + return column_name + return f"{publisher}_{self._to_pascal(column_name)}" + + def _get_attribute_metadata( + self, + entity_metadata_id: str, + schema_name: str, + extra_select: Optional[str] = None, + ) -> Optional[Dict[str, Any]]: + attr_escaped = self._escape_odata_quotes(schema_name) + url = f"{self.api}/EntityDefinitions({entity_metadata_id})/Attributes" + select_fields = ["MetadataId", "LogicalName", "SchemaName"] + if extra_select: + for piece in extra_select.split(","): + piece = piece.strip() + if not piece or piece in select_fields: + continue + if piece.startswith("@"): + continue + if piece not in select_fields: + select_fields.append(piece) + params = { + "$select": ",".join(select_fields), + "$filter": f"SchemaName eq '{attr_escaped}'", + } + r = self._request("get", url, params=params) + try: + body = r.json() if r.text else {} + except ValueError: + return None + items = body.get("value") if isinstance(body, dict) else None + if isinstance(items, list) and items: + item = items[0] + if isinstance(item, dict): + return item + return None + # ---------------------- Enum / Option Set helpers ------------------ def _build_localizedlabels_payload(self, translations: Dict[int, str]) -> Dict[str, Any]: """Build a Dataverse Label object from {: } entries. @@ -1026,8 +1075,7 @@ def _list_tables(self) -> List[Dict[str, Any]]: return r.json().get("value", []) def _delete_table(self, tablename: str) -> None: - schema_name = tablename if "_" in tablename else f"new_{self._to_pascal(tablename)}" - entity_schema = schema_name + entity_schema = self._normalize_entity_schema(tablename) ent = self._get_entity_by_schema(entity_schema) if not ent or not ent.get("MetadataId"): raise MetadataError( @@ -1041,7 +1089,7 @@ def _delete_table(self, tablename: str) -> None: def _create_table(self, tablename: str, schema: Dict[str, Any]) -> Dict[str, Any]: # Accept a friendly name and construct a default schema under 'new_'. # If a full SchemaName is passed (contains '_'), use as-is. - entity_schema = tablename if "_" in tablename else f"new_{self._to_pascal(tablename)}" + entity_schema = self._normalize_entity_schema(tablename) ent = self._get_entity_by_schema(entity_schema) if ent: @@ -1055,12 +1103,7 @@ def _create_table(self, tablename: str, schema: Dict[str, Any]) -> Dict[str, Any attributes: List[Dict[str, Any]] = [] attributes.append(self._attribute_payload(primary_attr_schema, "string", is_primary_name=True)) for col_name, dtype in schema.items(): - # Use same publisher prefix segment as entity_schema if present; else default to 'new_'. - publisher = entity_schema.split("_", 1)[0] if "_" in entity_schema else "new" - if col_name.lower().startswith(f"{publisher}_"): - attr_schema = col_name - else: - attr_schema = f"{publisher}_{self._to_pascal(col_name)}" + attr_schema = self._normalize_attribute_schema(entity_schema, col_name) payload = self._attribute_payload(attr_schema, dtype) if not payload: raise ValueError(f"Unsupported column type '{dtype}' for '{col_name}'.") @@ -1078,6 +1121,104 @@ def _create_table(self, tablename: str, schema: Dict[str, Any]) -> Dict[str, Any "metadata_id": metadata_id, "columns_created": created_cols, } + + def _create_columns( + self, + tablename: str, + columns: Dict[str, Any], + ) -> List[str]: + if not isinstance(columns, dict) or not columns: + raise TypeError("columns must be a non-empty dict[name -> type]") + entity_schema = self._normalize_entity_schema(tablename) + ent = self._get_entity_by_schema(entity_schema) + if not ent or not ent.get("MetadataId"): + raise MetadataError( + f"Table '{entity_schema}' not found.", + subcode=ec.METADATA_TABLE_NOT_FOUND, + ) + + metadata_id = ent.get("MetadataId") + created: List[str] = [] + needs_picklist_flush = False + + for column_name, column_type in columns.items(): + schema_name = self._normalize_attribute_schema(entity_schema, column_name) + payload = self._attribute_payload(schema_name, column_type) + if not payload: + raise ValueError(f"Unsupported column type '{column_type}' for '{schema_name}'.") + + url = f"{self.api}/EntityDefinitions({metadata_id})/Attributes" + self._request("post", url, json=payload) + + created.append(schema_name) + + if "OptionSet" in payload: + needs_picklist_flush = True + + if needs_picklist_flush: + self._flush_cache("picklist") + + return created + + def _delete_columns( + self, + tablename: str, + columns: Union[str, List[str]], + ) -> List[str]: + if isinstance(columns, str): + names = [columns] + elif isinstance(columns, list): + names = columns + else: + raise TypeError("columns must be str or list[str]") + + for name in names: + if not isinstance(name, str) or not name.strip(): + raise ValueError("column names must be non-empty strings") + + entity_schema = self._normalize_entity_schema(tablename) + ent = self._get_entity_by_schema(entity_schema) + if not ent or not ent.get("MetadataId"): + raise MetadataError( + f"Table '{entity_schema}' not found.", + subcode=ec.METADATA_TABLE_NOT_FOUND, + ) + + metadata_id = ent.get("MetadataId") + deleted: List[str] = [] + needs_picklist_flush = False + + for column_name in names: + schema_name = self._normalize_attribute_schema(entity_schema, column_name) + attr_meta = self._get_attribute_metadata(metadata_id, schema_name, extra_select="@odata.type,AttributeType") + if not attr_meta: + raise MetadataError( + f"Column '{schema_name}' not found on table '{entity_schema}'.", + subcode=ec.METADATA_COLUMN_NOT_FOUND, + ) + + attr_metadata_id = attr_meta.get("MetadataId") + if not attr_metadata_id: + raise RuntimeError( + f"Metadata incomplete for column '{schema_name}' (missing MetadataId)." + ) + + attr_url = f"{self.api}/EntityDefinitions({metadata_id})/Attributes({attr_metadata_id})" + self._request("delete", attr_url, headers={"If-Match": "*"}) + + attr_type = attr_meta.get("@odata.type") or attr_meta.get("AttributeType") + if isinstance(attr_type, str): + attr_type_l = attr_type.lower() + if "picklist" in attr_type_l or "optionset" in attr_type_l: + needs_picklist_flush = True + + deleted.append(schema_name) + + if needs_picklist_flush: + self._flush_cache("picklist") + + return deleted + # ---------------------- Cache maintenance ------------------------- def _flush_cache( self,