diff --git a/src/dataverse_sdk/__init__.py b/src/dataverse_sdk/__init__.py index 9c593a4..d50aaeb 100644 --- a/src/dataverse_sdk/__init__.py +++ b/src/dataverse_sdk/__init__.py @@ -1,3 +1,45 @@ +""" +Microsoft Dataverse SDK for Python. + +This package provides a high-level Python client for interacting with Microsoft Dataverse +environments through the Web API. It supports CRUD operations, SQL queries, table metadata +management, and file uploads with Azure Identity authentication. + +Key Features: + - OData CRUD operations (create, read, update, delete) + - SQL query support via Web API + - Table metadata operations (create, inspect, delete custom tables) + - File column upload capabilities + - Pandas integration for DataFrame-based operations + - Azure Identity credential support + +.. note:: + This SDK requires Azure Identity credentials for authentication. See the + `Azure Identity documentation `_ + for supported credential types. + +Example: + Basic client initialization and usage:: + + from azure.identity import DefaultAzureCredential + from dataverse_sdk import DataverseClient + + credential = DefaultAzureCredential() + client = DataverseClient( + "https://org.crm.dynamics.com", + credential + ) + + # Create a record + account_id = client.create("account", {"name": "Contoso"})[0] + + # Query records + accounts = client.get("account", filter="name eq 'Contoso'") + for batch in accounts: + for record in batch: + print(record["name"]) +""" + from .__version__ import __version__ from .client import DataverseClient diff --git a/src/dataverse_sdk/auth.py b/src/dataverse_sdk/auth.py index 6173799..e41f874 100644 --- a/src/dataverse_sdk/auth.py +++ b/src/dataverse_sdk/auth.py @@ -7,12 +7,26 @@ @dataclass class TokenPair: + """ + Container for an OAuth2 access token and its associated resource scope. + + :param resource: The OAuth2 scope/resource for which the token was acquired. + :type resource: str + :param access_token: The access token string. + :type access_token: str + """ resource: str access_token: str class AuthManager: - """Azure Identity-based authentication helper for Dataverse.""" + """ + Azure Identity-based authentication manager for Dataverse. + + :param credential: Azure Identity credential implementation. + :type credential: ~azure.core.credentials.TokenCredential + :raises TypeError: If ``credential`` does not implement :class:`~azure.core.credentials.TokenCredential`. + """ def __init__(self, credential: TokenCredential) -> None: if not isinstance(credential, TokenCredential): @@ -22,6 +36,14 @@ def __init__(self, credential: TokenCredential) -> None: self.credential: TokenCredential = credential def acquire_token(self, scope: str) -> TokenPair: - """Acquire an access token for the given scope using Azure Identity.""" + """ + Acquire an access token for the specified OAuth2 scope. + + :param scope: OAuth2 scope string, typically ``"https://.crm.dynamics.com/.default"``. + :type scope: str + :return: Token pair containing the scope and access token. + :rtype: ~dataverse_sdk.auth.TokenPair + :raises ~azure.core.exceptions.ClientAuthenticationError: If token acquisition fails. + """ token = self.credential.get_token(scope) return TokenPair(resource=scope, access_token=token.token) diff --git a/src/dataverse_sdk/client.py b/src/dataverse_sdk/client.py index f0b0127..21391bc 100644 --- a/src/dataverse_sdk/client.py +++ b/src/dataverse_sdk/client.py @@ -10,30 +10,60 @@ class DataverseClient: - """High-level client for Dataverse operations. - - This client exposes a simple, stable surface for: - - OData CRUD: create, get, update, delete records - - SQL (read-only): query SQL via ?sql parameter in Web API - - Table metadata: create, inspect, and delete simple custom tables - - The client owns authentication (Azure Identity) and configuration, and delegates - requests to an internal OData client responsible for HTTP calls and URL shaping. - - Parameters - ---------- - base_url : str - Your Dataverse environment URL, for example: - ``"https://.crm.dynamics.com"``. A trailing slash is ignored. - credential : azure.core.credentials.TokenCredential - Azure Identity credential. - config : DataverseConfig | None, optional - Optional configuration (language code, SQL API name, HTTP timeouts/retries). - - Raises - ------ - ValueError - If ``base_url`` is missing or empty after trimming. + """ + High-level client for Microsoft Dataverse operations. + + This client provides a simple, stable interface for interacting with Dataverse environments + through the Web API. It handles authentication via Azure Identity and delegates HTTP operations + to an internal :class:`~dataverse_sdk.odata.ODataClient`. + + Key capabilities: + - OData CRUD operations: create, read, update, delete records + - SQL queries: execute read-only SQL via Web API ``?sql`` parameter + - Table metadata: create, inspect, and delete custom tables + - File uploads: upload files to file columns with chunking support + + :param base_url: Your Dataverse environment URL, for example + ``"https://org.crm.dynamics.com"``. Trailing slash is automatically removed. + :type base_url: str + :param credential: Azure Identity credential for authentication. + :type credential: ~azure.core.credentials.TokenCredential + :param config: Optional configuration for language, timeouts, and retries. + If not provided, defaults are loaded from :meth:`~dataverse_sdk.config.DataverseConfig.from_env`. + :type config: ~dataverse_sdk.config.DataverseConfig or None + + :raises ValueError: If ``base_url`` is missing or empty after trimming. + + .. note:: + The client lazily initializes its internal OData client on first use, allowing + lightweight construction without immediate network calls. + + Example: + Create a client and perform basic operations:: + + from azure.identity import DefaultAzureCredential + from dataverse_sdk import DataverseClient + + credential = DefaultAzureCredential() + client = DataverseClient( + "https://org.crm.dynamics.com", + credential + ) + + # Create a record + record_ids = client.create("account", {"name": "Contoso Ltd"}) + print(f"Created account: {record_ids[0]}") + + # Update a record + client.update("account", record_ids[0], {"telephone1": "555-0100"}) + + # Query records + for batch in client.get("account", filter="name eq 'Contoso Ltd'"): + for account in batch: + print(account["name"]) + + # Delete a record + client.delete("account", record_ids[0]) """ def __init__( @@ -50,12 +80,14 @@ def __init__( self._odata: Optional[ODataClient] = None def _get_odata(self) -> ODataClient: - """Get or create the internal OData client instance. + """ + Get or create the internal OData client instance. + + This method implements lazy initialization of the low-level OData client, + deferring construction until the first API call. - Returns - ------- - ODataClient - The lazily-initialized low-level client used to perform requests. + :return: The lazily-initialized low-level client used to perform HTTP requests. + :rtype: ~dataverse_sdk.odata.ODataClient """ if self._odata is None: self._odata = ODataClient( @@ -67,19 +99,36 @@ def _get_odata(self) -> ODataClient: # ---------------- Unified CRUD: create/update/delete ---------------- def create(self, logical_name: str, records: Union[Dict[str, Any], List[Dict[str, Any]]]) -> List[str]: - """Create one or many records by logical (singular) name; returns list[str] of created IDs. - - Parameters - ---------- - logical_name : str - Logical (singular) entity name, e.g. "account". - records : dict | list[dict] - A single record dict or a list of record dicts. - - Returns - ------- - list[str] - List of created GUIDs (length 1 for single input). + """ + Create one or more records by logical (singular) entity name. + + :param logical_name: Logical (singular) entity name, e.g. ``"account"`` or ``"contact"``. + :type logical_name: str + :param records: A single record dictionary or a list of record dictionaries. + Each dictionary should contain attribute logical names as keys. + :type records: dict or list[dict] + + :return: List of created record GUIDs. Returns a single-element list for a single input. + :rtype: list[str] + + :raises TypeError: If ``records`` is not a dict or list[dict], or if the internal + client returns an unexpected type. + + Example: + Create a single record:: + + client = DataverseClient(base_url, credential) + ids = client.create("account", {"name": "Contoso"}) + print(f"Created: {ids[0]}") + + Create multiple records:: + + records = [ + {"name": "Contoso"}, + {"name": "Fabrikam"} + ] + ids = client.create("account", records) + print(f"Created {len(ids)} accounts") """ od = self._get_odata() entity_set = od._entity_set_from_logical(logical_name) @@ -97,17 +146,49 @@ def create(self, logical_name: str, records: Union[Dict[str, Any], List[Dict[str raise TypeError("records must be dict or list[dict]") def update(self, logical_name: str, ids: Union[str, List[str]], changes: Union[Dict[str, Any], List[Dict[str, Any]]]) -> None: - """Update one or many records. Returns None. + """ + Update one or more records. + + This method supports three usage patterns: + + 1. Single record update: ``update("account", "guid", {"name": "New Name"})`` + 2. Broadcast update: ``update("account", [id1, id2], {"status": 1})`` - applies same changes to all IDs + 3. Paired updates: ``update("account", [id1, id2], [changes1, changes2])`` - one-to-one mapping + + :param logical_name: Logical (singular) entity name, e.g. ``"account"``. + :type logical_name: str + :param ids: Single GUID string or list of GUID strings to update. + :type ids: str or list[str] + :param changes: Dictionary of changes for single/broadcast mode, or list of dictionaries + for paired mode. When ``ids`` is a list and ``changes`` is a single dict, + the same changes are broadcast to all records. When both are lists, they must + have equal length for one-to-one mapping. + :type changes: dict or list[dict] + + :raises TypeError: If ``ids`` is not str or list[str], or if ``changes`` type doesn't match usage pattern. + + .. note:: + Single updates discard the response representation for better performance. + For broadcast or paired updates, the method delegates to the internal client's + batch update logic. + + Example: + Single record update:: + + client.update("account", account_id, {"telephone1": "555-0100"}) - Usage patterns: - update("accounts", some_id, {"telephone1": "555"}) - update("accounts", [id1, id2], {"statecode": 1}) # broadcast - update("accounts", [id1, id2], [{"name": "A"}, {"name": "B"}]) # 1:1 + Broadcast same changes to multiple records:: - Rules: - - If ids is a list and changes is a single dict -> broadcast. - - If both are lists they must have equal length. - - Single update discards representation (performance-focused). + client.update("account", [id1, id2, id3], {"statecode": 1}) + + Update multiple records with different values:: + + ids = [id1, id2] + changes = [ + {"name": "Updated Name 1"}, + {"name": "Updated Name 2"} + ] + client.update("account", ids, changes) """ od = self._get_odata() if isinstance(ids, str): @@ -121,7 +202,25 @@ def update(self, logical_name: str, ids: Union[str, List[str]], changes: Union[D return None def delete(self, logical_name: str, ids: Union[str, List[str]]) -> None: - """Delete one or many records (GUIDs). Returns None.""" + """ + Delete one or more records by GUID. + + :param logical_name: Logical (singular) entity name, e.g. ``"account"``. + :type logical_name: str + :param ids: Single GUID string or list of GUID strings to delete. + :type ids: str or list[str] + + :raises TypeError: If ``ids`` is not str or list[str]. + + Example: + Delete a single record:: + + client.delete("account", account_id) + + Delete multiple records:: + + client.delete("account", [id1, id2, id3]) + """ od = self._get_odata() if isinstance(ids, str): od._delete(logical_name, ids) @@ -142,7 +241,57 @@ def get( expand: Optional[List[str]] = None, page_size: Optional[int] = None, ) -> Union[Dict[str, Any], Iterable[List[Dict[str, Any]]]]: - """Fetch single record by ID or multiple records as a generator.""" + """ + Fetch a single record by ID or query multiple records. + + When ``record_id`` is provided, returns a single record dictionary. + When ``record_id`` is None, returns a generator yielding batches of records. + + :param logical_name: Logical (singular) entity name, e.g. ``"account"``. + :type logical_name: str + :param record_id: Optional GUID to fetch a specific record. If None, queries multiple records. + :type record_id: str or None + :param select: Optional list of attribute logical names to retrieve. + :type select: list[str] or None + :param filter: Optional OData filter string, e.g. ``"name eq 'Contoso'"``. + :type filter: str or None + :param orderby: Optional list of attributes to sort by, e.g. ``["name asc", "createdon desc"]``. + :type orderby: list[str] or None + :param top: Optional maximum number of records to return. + :type top: int or None + :param expand: Optional list of navigation properties to expand. + :type expand: list[str] or None + :param page_size: Optional number of records per page for pagination. + :type page_size: int or None + + :return: Single record dict if ``record_id`` is provided, otherwise a generator + yielding lists of record dictionaries (one list per page). + :rtype: dict or Iterable[list[dict]] + + :raises TypeError: If ``record_id`` is provided but not a string. + + Example: + Fetch a single record:: + + record = client.get("account", record_id=account_id, select=["name", "telephone1"]) + print(record["name"]) + + Query multiple records with filtering:: + + for batch in client.get("account", filter="name eq 'Contoso'", select=["name"]): + for account in batch: + print(account["name"]) + + Query with sorting and pagination:: + + for batch in client.get( + "account", + orderby=["createdon desc"], + top=100, + page_size=50 + ): + print(f"Batch size: {len(batch)}") + """ od = self._get_odata() if record_id is not None: if not isinstance(record_id, str): @@ -164,81 +313,154 @@ def get( # SQL via Web API sql parameter def query_sql(self, sql: str): - """Execute a read-only SQL query using the Dataverse Web API `?sql=` capability. + """ + Execute a read-only SQL query using the Dataverse Web API ``?sql`` capability. + + The SQL query must follow the supported subset: a single SELECT statement with + optional WHERE, TOP (integer literal), ORDER BY (column names only), and a simple + table alias after FROM. + + :param sql: Supported SQL SELECT statement. + :type sql: str - The query must follow the currently supported subset: single SELECT with optional WHERE, - TOP (integer), ORDER BY (columns only), and simple alias after FROM. Example: - ``SELECT TOP 3 accountid, name FROM account ORDER BY name DESC`` + :return: List of result row dictionaries. Returns an empty list if no rows match. + :rtype: list[dict] - Parameters - ---------- - sql : str - Supported single SELECT statement. + :raises ~dataverse_sdk.errors.SQLParseError: If the SQL query uses unsupported syntax. + :raises ~dataverse_sdk.errors.HttpError: If the Web API returns an error. - Returns - ------- - list[dict] - Result rows (empty list if none). + .. note:: + The SQL support is limited to read-only queries. Complex joins, subqueries, + and certain SQL functions may not be supported. Consult the Dataverse + documentation for the current feature set. + + Example: + Basic SQL query:: + + sql = "SELECT TOP 10 accountid, name FROM account WHERE name LIKE 'C%' ORDER BY name" + results = client.query_sql(sql) + for row in results: + print(row["name"]) + + Query with alias:: + + sql = "SELECT a.name, a.telephone1 FROM account AS a WHERE a.statecode = 0" + results = client.query_sql(sql) """ return self._get_odata()._query_sql(sql) # Table metadata helpers def get_table_info(self, tablename: str) -> Optional[Dict[str, Any]]: - """Get basic metadata for a custom table if it exists. - - Parameters - ---------- - tablename : str - Friendly name (e.g., ``"SampleItem"``) or full schema name - (e.g., ``"new_SampleItem"``). - - Returns - ------- - dict | None - Dict with keys like ``entity_schema``, ``entity_logical_name``, - ``entity_set_name``, and ``metadata_id``; ``None`` if not found. + """ + Get basic metadata for a custom table if it exists. + + :param tablename: Table friendly name (e.g. ``"SampleItem"``) or full schema name + (e.g. ``"new_SampleItem"``). + :type tablename: str + + :return: Dictionary containing table metadata with keys ``entity_schema``, + ``entity_logical_name``, ``entity_set_name``, and ``metadata_id``. + Returns None if the table is not found. + :rtype: dict or None + + Example: + Retrieve table metadata:: + + info = client.get_table_info("SampleItem") + if info: + print(f"Logical name: {info['entity_logical_name']}") + print(f"Entity set: {info['entity_set_name']}") """ return self._get_odata()._get_table_info(tablename) def create_table(self, tablename: str, schema: Dict[str, Any]) -> Dict[str, Any]: - """Create a simple custom table. - - Parameters - ---------- - tablename : str - Friendly name (``"SampleItem"``) or a full schema name (``"new_SampleItem"``). - schema : dict[str, Any] - Column definitions mapping logical names (without prefix) to types. - Supported: - - Primitive types: ``string``, ``int``, ``decimal``, ``float``, ``datetime``, ``bool`` - - Enum subclass (IntEnum preferred): generates a local option set. - Optional multilingual labels via ``__labels__ = {1033: {"Active": "Active"}, 1036: {"Active": "Actif"}}`` - - Returns - ------- - dict - Metadata summary including ``entity_schema``, ``entity_set_name``, - ``entity_logical_name``, ``metadata_id``, and ``columns_created``. + """ + Create a simple custom table with specified columns. + + :param tablename: Table friendly name (e.g. ``"SampleItem"``) or full schema name + (e.g. ``"new_SampleItem"``). If a publisher prefix is not included, the default + publisher prefix will be applied. + :type tablename: str + :param schema: Dictionary mapping column logical names (without prefix) to their types. + Supported types: + + - Primitive types: ``"string"``, ``"int"``, ``"decimal"``, ``"float"``, ``"datetime"``, ``"bool"`` + - Enum subclass (IntEnum preferred): Creates a local option set. Optional multilingual + labels can be provided via ``__labels__`` class attribute, defined inside the Enum subclass:: + + class ItemStatus(IntEnum): + ACTIVE = 1 + INACTIVE = 2 + __labels__ = { + 1033: {"Active": "Active", "Inactive": "Inactive"}, + 1036: {"Active": "Actif", "Inactive": "Inactif"} + } + + :type schema: dict[str, Any] + + :return: Dictionary containing table metadata including ``entity_schema``, + ``entity_set_name``, ``entity_logical_name``, ``metadata_id``, and ``columns_created``. + :rtype: dict + + :raises ~dataverse_sdk.errors.MetadataError: If table creation fails or the schema is invalid. + + Example: + Create a table with simple columns:: + + from enum import IntEnum + + class ItemStatus(IntEnum): + ACTIVE = 1 + INACTIVE = 2 + + schema = { + "title": "string", + "quantity": "int", + "price": "decimal", + "available": "bool", + "status": ItemStatus + } + + result = client.create_table("SampleItem", schema) + print(f"Created table: {result['entity_logical_name']}") + print(f"Columns: {result['columns_created']}") """ return self._get_odata()._create_table(tablename, schema) def delete_table(self, tablename: str) -> None: - """Delete a custom table by name. + """ + Delete a custom table by name. + + :param tablename: Table friendly name (e.g. ``"SampleItem"``) or full schema name + (e.g. ``"new_SampleItem"``). + :type tablename: str + + :raises ~dataverse_sdk.errors.MetadataError: If the table does not exist or deletion fails. + + .. warning:: + This operation is irreversible and will delete all records in the table along + with the table definition. Use with caution. + + Example: + Delete a custom table:: - Parameters - ---------- - tablename : str - Friendly name (``"SampleItem"``) or a full schema name (``"new_SampleItem"``). + client.delete_table("SampleItem") """ self._get_odata()._delete_table(tablename) def list_tables(self) -> list[str]: - """List all custom tables in the Dataverse environment. + """ + List all custom tables in the Dataverse environment. + + :return: List of custom table names. + :rtype: list[str] - Returns - ------- - list[str] - A list of table names. + Example: + List all custom tables:: + + tables = client.list_tables() + for table in tables: + print(table) """ return self._get_odata()._list_tables() @@ -253,31 +475,57 @@ def upload_file( mime_type: Optional[str] = None, if_none_match: bool = True, ) -> None: - """Upload a file to a Dataverse file column using a logical (singular) name. - - Parameters - ---------- - logical_name : str - Singular logical table name, e.g. "account". - record_id : str - GUID of the target record. - file_name_attribute : str - Logical name of the file column attribute. - path : str - Local filesystem path to the file. Stored filename will be the basename of this path. - mode : str | None, keyword-only, optional - Upload strategy: "auto" (default), "small", or "chunk". - mime_type : str | None, keyword-only, optional - Explicit MIME type to persist with the file (e.g. "application/pdf"). - if_none_match : bool, keyword-only, optional - When True (default), sends ``If-None-Match: null`` to only succeed if the column is - currently empty. Set False to always overwrite (uses ``If-Match: *``). - Used for "small" and "chunk" modes only. - - Returns - ------- - None - Returns nothing on success. Raises on failure. + """ + Upload a file to a Dataverse file column. + + :param logical_name: Singular logical table name, e.g. ``"account"``. + :type logical_name: str + :param record_id: GUID of the target record. + :type record_id: str + :param file_name_attribute: Logical name of the file column attribute. + :type file_name_attribute: str + :param path: Local filesystem path to the file. The stored filename will be + the basename of this path. + :type path: str + :param mode: Upload strategy: ``"auto"`` (default), ``"small"``, or ``"chunk"``. + Auto mode selects small or chunked upload based on file size. + :type mode: str or None + :param mime_type: Explicit MIME type to store with the file (e.g. ``"application/pdf"``). + If not provided, the MIME type may be inferred from the file extension. + :type mime_type: str or None + :param if_none_match: When True (default), sends ``If-None-Match: null`` header to only + succeed if the column is currently empty. Set False to always overwrite using + ``If-Match: *``. Used for small and chunk modes only. + :type if_none_match: bool + + :raises ~dataverse_sdk.errors.HttpError: If the upload fails or the file column is not empty + when ``if_none_match=True``. + :raises FileNotFoundError: If the specified file path does not exist. + + .. note:: + Large files are automatically chunked to avoid request size limits. The chunk + mode performs multiple requests with resumable upload support. + + Example: + Upload a PDF file:: + + client.upload_file( + logical_name="account", + record_id=account_id, + file_name_attribute="new_contract", + path="/path/to/contract.pdf", + mime_type="application/pdf" + ) + + Upload with auto mode selection:: + + client.upload_file( + logical_name="email", + record_id=email_id, + file_name_attribute="new_attachment", + path="/path/to/large_file.zip", + mode="auto" + ) """ od = self._get_odata() entity_set = od._entity_set_from_logical(logical_name) @@ -294,22 +542,25 @@ def upload_file( # Cache utilities def flush_cache(self, kind) -> int: - """Flush cached client metadata/state. + """ + Flush cached client metadata or state. + + :param kind: Cache kind to flush. Currently supported values: + + - ``"picklist"``: Clears picklist label cache used for label-to-integer conversion - Currently supported kinds: - - 'picklist': clears entries from the picklist label cache used by label -> int conversion. + Future kinds (e.g. ``"entityset"``, ``"primaryid"``) may be added without + breaking this signature. + :type kind: str - Parameters - ---------- - kind : str - Cache kind to flush. Only 'picklist' is implemented today. Future kinds - (e.g. 'entityset', 'primaryid') can be added without breaking the signature. + :return: Number of cache entries removed. + :rtype: int - Returns - ------- - int - Number of cache entries removed. + Example: + Clear the picklist cache:: + removed = client.flush_cache("picklist") + print(f"Cleared {removed} cached picklist entries") """ return self._get_odata()._flush_cache(kind) diff --git a/src/dataverse_sdk/config.py b/src/dataverse_sdk/config.py index 2f53fcb..42bd051 100644 --- a/src/dataverse_sdk/config.py +++ b/src/dataverse_sdk/config.py @@ -6,6 +6,18 @@ @dataclass(frozen=True) class DataverseConfig: + """ + Configuration settings for Dataverse client operations. + + :param language_code: LCID (Locale ID) for localized labels and messages. Default is 1033 (English - United States). + :type language_code: int + :param http_retries: Optional maximum number of retry attempts for transient HTTP errors. Reserved for future use. + :type http_retries: int or None + :param http_backoff: Optional backoff multiplier (in seconds) between retry attempts. Reserved for future use. + :type http_backoff: float or None + :param http_timeout: Optional request timeout in seconds. Reserved for future use. + :type http_timeout: float or None + """ language_code: int = 1033 # Optional HTTP tuning (not yet wired everywhere; reserved for future use) @@ -15,6 +27,12 @@ class DataverseConfig: @classmethod def from_env(cls) -> "DataverseConfig": + """ + Create a configuration instance with default settings. + + :return: Configuration instance with default values. + :rtype: ~dataverse_sdk.config.DataverseConfig + """ # Environment-free defaults return cls( language_code=1033, diff --git a/src/dataverse_sdk/errors.py b/src/dataverse_sdk/errors.py index 4dfddeb..a61489b 100644 --- a/src/dataverse_sdk/errors.py +++ b/src/dataverse_sdk/errors.py @@ -3,7 +3,24 @@ import datetime as _dt class DataverseError(Exception): - """Base structured error for the Dataverse SDK.""" + """ + Base structured exception for the Dataverse SDK. + + :param message: Human-readable error message. + :type message: str + :param code: Error category code (e.g. ``"validation_error"``, ``"http_error"``). + :type code: str + :param subcode: Optional subcategory or specific error identifier. + :type subcode: str or None + :param status_code: Optional HTTP status code if the error originated from an HTTP response. + :type status_code: int or None + :param details: Optional dictionary containing additional diagnostic information. + :type details: dict or None + :param source: Error source, either ``"client"`` or ``"server"``. + :type source: str + :param is_transient: Whether the error is potentially transient and may succeed on retry. + :type is_transient: bool + """ def __init__( self, message: str, @@ -26,6 +43,12 @@ def __init__( self.timestamp = _dt.datetime.utcnow().isoformat() + "Z" def to_dict(self) -> Dict[str, Any]: + """ + Convert the error to a dictionary representation. + + :return: Dictionary containing all error properties. + :rtype: dict + """ return { "message": self.message, "code": self.code, @@ -41,18 +64,74 @@ def __repr__(self) -> str: # pragma: no cover return f"{self.__class__.__name__}(code={self.code!r}, subcode={self.subcode!r}, message={self.message!r})" class ValidationError(DataverseError): + """ + Exception raised for client-side validation failures. + + :param message: Human-readable validation error message. + :type message: str + :param subcode: Optional specific validation error identifier. + :type subcode: str or None + :param details: Optional dictionary with additional validation context. + :type details: dict or None + """ def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): super().__init__(message, code="validation_error", subcode=subcode, details=details, source="client") class MetadataError(DataverseError): + """ + Exception raised for metadata operation failures. + + :param message: Human-readable metadata error message. + :type message: str + :param subcode: Optional specific metadata error identifier. + :type subcode: str or None + :param details: Optional dictionary with additional metadata context. + :type details: dict or None + """ def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): super().__init__(message, code="metadata_error", subcode=subcode, details=details, source="client") class SQLParseError(DataverseError): + """ + Exception raised for SQL query parsing failures. + + :param message: Human-readable SQL parsing error message. + :type message: str + :param subcode: Optional specific SQL parsing error identifier. + :type subcode: str or None + :param details: Optional dictionary with SQL query context and parse information. + :type details: dict or None + """ def __init__(self, message: str, *, subcode: Optional[str] = None, details: Optional[Dict[str, Any]] = None): super().__init__(message, code="sql_parse_error", subcode=subcode, details=details, source="client") class HttpError(DataverseError): + """ + Exception raised for HTTP request failures from the Dataverse Web API. + + :param message: Human-readable HTTP error message, typically from the API error response. + :type message: str + :param status_code: HTTP status code (e.g. 400, 404, 500). + :type status_code: int + :param is_transient: Whether the error is transient (429, 503, 504) and may succeed on retry. + :type is_transient: bool + :param subcode: Optional HTTP status category (e.g. ``"4xx"``, ``"5xx"``). + :type subcode: str or None + :param service_error_code: Optional Dataverse-specific error code from the API response. + :type service_error_code: str or None + :param correlation_id: Optional correlation ID for tracking requests across services. + :type correlation_id: str or None + :param request_id: Optional request ID from the API response headers. + :type request_id: str or None + :param traceparent: Optional W3C trace context for distributed tracing. + :type traceparent: str or None + :param body_excerpt: Optional excerpt of the response body for diagnostics. + :type body_excerpt: str or None + :param retry_after: Optional number of seconds to wait before retrying (from Retry-After header). + :type retry_after: int or None + :param details: Optional additional diagnostic details. + :type details: dict or None + """ def __init__( self, message: str, diff --git a/src/dataverse_sdk/odata_pandas_wrappers.py b/src/dataverse_sdk/odata_pandas_wrappers.py index 3f857cb..d373ba8 100644 --- a/src/dataverse_sdk/odata_pandas_wrappers.py +++ b/src/dataverse_sdk/odata_pandas_wrappers.py @@ -1,26 +1,8 @@ -"""Pandas-friendly wrappers around the low-level `ODataClient`. - -These helpers allow using pandas DataFrames / Series / Indexes as inputs and -outputs for common CRUD + query operations. - -Design notes: -* All methods are thin convenience wrappers that iterate row-by-row; no OData - batch requests are issued (future enhancement opportunity). -* create_df: creates one record per row, returning a new DataFrame with an - added id column (default name 'id'). -* update_df: updates records based on an id column; returns a DataFrame with - per-row success booleans and optional error messages. -* delete_ids: deletes a collection of ids (Series, list, or Index) returning a - DataFrame summarizing success/failure. -* get_ids: fetches a set of ids returning a DataFrame of the merged JSON - objects (outer union of keys). Missing keys are NaN. -* query_sql_df: runs a SQL query via the Web API `?sql=` parameter and returns the result rows as - a DataFrame (empty DataFrame if no rows). - -Edge cases & behaviors: -* Empty inputs return empty DataFrames without calling the API. -* Errors on individual rows are captured instead of aborting the whole batch. -* The default id column name is 'id' but can be overridden. +""" +Pandas-friendly wrappers for Dataverse OData operations. + +This module provides :class:`PandasODataClient`, a high-level wrapper that enables +DataFrame-based CRUD and query operations. """ from __future__ import annotations @@ -37,17 +19,24 @@ @dataclass class RowError: + """ + Container for row-level error information. + + :param index: Zero-based row index where the error occurred. + :type index: int + :param message: Error message describing the failure. + :type message: str + """ index: int message: str class PandasODataClient: - """High-level convenience wrapper exposing pandas-friendly methods. + """ + High-level pandas-friendly wrapper for Dataverse OData operations. - Parameters - ---------- - odata_client : ODataClient - An initialized low-level client (token acquisition & base URL ready). + :param odata_client: Initialized low-level OData client with authentication configured. + :type odata_client: ~dataverse_sdk.odata.ODataClient """ def __init__(self, odata_client: ODataClient) -> None: @@ -55,19 +44,17 @@ def __init__(self, odata_client: ODataClient) -> None: # ---------------------------- Create --------------------------------- def create_df(self, logical_name: str, record: pd.Series) -> str: - """Create a single record from a pandas Series and return the GUID. - - Parameters - ---------- - logical_name : str - Logical (singular) entity name, e.g. "account". - record : pandas.Series - Series whose index labels are field logical names. - - Returns - ------- - str - The created record's GUID. + """ + Create a single record from a pandas Series and return the GUID. + + :param logical_name: Logical (singular) entity name, e.g. ``"account"``. + :type logical_name: str + :param record: Series whose index labels are field logical names and values are field values. + :type record: pandas.Series + :return: The created record's GUID. + :rtype: str + :raises TypeError: If ``record`` is not a pandas Series. + :raises RuntimeError: If the internal create operation returns an unexpected format. """ if not isinstance(record, pd.Series): raise TypeError("record must be a pandas Series") @@ -79,24 +66,16 @@ def create_df(self, logical_name: str, record: pd.Series) -> str: # ---------------------------- Update --------------------------------- def update(self, logical_name: str, record_id: str, entity_data: pd.Series) -> None: - """Update a single record (returns None). - - Parameters - ---------- - logical_name : str - Logical (singular) entity name. - record_id : str - GUID of the record to update. - entity_data : pandas.Series - Series whose index labels are field logical names; any null (NaN) values - are ignored (not sent). An 'id' key, if present, is ignored. - - Raises - ------ - TypeError - If entity_data is not a Series. - Exception - Propagates underlying HTTP errors from the OData client. + """ + Update a single record with values from a pandas Series. + + :param logical_name: Logical (singular) entity name, e.g. ``"account"``. + :type logical_name: str + :param record_id: GUID of the record to update. + :type record_id: str + :param entity_data: Series whose index labels are field logical names. NaN values are ignored. + :type entity_data: pandas.Series + :raises TypeError: If ``entity_data`` is not a pandas Series. """ if not isinstance(entity_data, pd.Series): raise TypeError("entity_data must be a pandas Series") @@ -107,19 +86,15 @@ def update(self, logical_name: str, record_id: str, entity_data: pd.Series) -> N # ---------------------------- Delete --------------------------------- def delete_ids(self, logical_name: str, ids: Sequence[str] | pd.Series | pd.Index) -> pd.DataFrame: - """Delete a collection of record IDs and return a summary DataFrame. - - Parameters - ---------- - logical_name : str - Logical (singular) entity name. - ids : sequence[str] | pandas.Series | pandas.Index - Collection of GUIDs to delete. - - Returns - ------- - pandas.DataFrame - Columns: id, success (bool), error (str nullable) + """ + Delete a collection of record IDs and return a summary DataFrame. + + :param logical_name: Logical (singular) entity name, e.g. ``"account"``. + :type logical_name: str + :param ids: Collection of GUIDs to delete. Can be a list, pandas Series, or pandas Index. + :type ids: Sequence[str] or pandas.Series or pandas.Index + :return: DataFrame with columns: ``id`` (str), ``success`` (bool), ``error`` (str or None). + :rtype: pandas.DataFrame """ if isinstance(ids, (pd.Series, pd.Index)): id_list = [str(x) for x in ids.tolist()] @@ -136,9 +111,17 @@ def delete_ids(self, logical_name: str, ids: Sequence[str] | pd.Series | pd.Inde # ------------------------------ Get ---------------------------------- def get_ids(self, logical_name: str, ids: Sequence[str] | pd.Series | pd.Index, select: Optional[Iterable[str]] = None) -> pd.DataFrame: - """Fetch multiple records by ID and return a DataFrame. - - Missing records are included with NaN for fields and an error column entry. + """ + Fetch multiple records by ID and return a DataFrame. + + :param logical_name: Logical (singular) entity name, e.g. ``"account"``. + :type logical_name: str + :param ids: Collection of GUIDs to fetch. Can be a list, pandas Series, or pandas Index. + :type ids: Sequence[str] or pandas.Series or pandas.Index + :param select: Optional iterable of field logical names to retrieve. If None, all fields are returned. + :type select: Iterable[str] or None + :return: DataFrame containing fetched records. Failed fetches will have an ``error`` column. + :rtype: pandas.DataFrame """ if isinstance(ids, (pd.Series, pd.Index)): id_list = [str(x) for x in ids.tolist()] @@ -165,10 +148,14 @@ def get_ids(self, logical_name: str, ids: Sequence[str] | pd.Series | pd.Index, # --------------------------- Query SQL ------------------------------- def query_sql_df(self, sql: str) -> pd.DataFrame: - """Execute a SQL query via the Dataverse Web API `?sql=` parameter and return a DataFrame. + """ + Execute a SQL query via the Dataverse Web API and return a DataFrame. - The statement must adhere to the supported subset (single SELECT, optional WHERE/TOP/ORDER BY, no joins). - Empty result -> empty DataFrame (columns inferred only if rows present). + :param sql: SQL SELECT statement following Dataverse Web API SQL syntax. + :type sql: str + :return: DataFrame containing query results. Returns an empty DataFrame if no rows match. + :rtype: pandas.DataFrame + :raises ValueError: If the API returns a malformed JSON response. """ rows: Any = self._c.query_sql(sql)