diff --git a/CHANGELOG.md b/CHANGELOG.md index 8828d4f..4fcf8b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,9 +23,6 @@ All notable changes to this project will be documented in this file. **Table Management:** - Table metadata operations (create, inspect, delete custom tables) -**Integration & Analysis:** -- Pandas DataFrame integration for seamless data analysis workflows - **Reliability & Error Handling:** - Comprehensive error handling with specific exception types (`DataverseError`, `AuthenticationError`, etc.) - HTTP retry logic with exponential backoff for resilient operations diff --git a/README.md b/README.md index 1d34165..0e22790 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,6 @@ A Python client library for Microsoft Dataverse that provides a unified interfac - **📎 File Operations**: Upload files to Dataverse file columns with automatic chunking for large files - **🔐 Azure Identity**: Built-in authentication using Azure Identity credential providers with comprehensive support - **🛡️ Error Handling**: Structured exception hierarchy with detailed error context and retry guidance -- **🐼 Pandas Integration**: Preliminary DataFrame-oriented operations for data analysis workflows ## Getting started @@ -264,7 +263,6 @@ Explore our comprehensive examples in the [`examples/`](examples/) directory: **🚀 Advanced Usage:** - **[Complete Walkthrough](examples/advanced/complete_walkthrough.py)** - Full feature demonstration with production patterns - **[File Upload](examples/advanced/file_upload.py)** - Upload files to Dataverse file columns -- **[Pandas Integration](examples/advanced/pandas_integration.py)** - DataFrame-based operations for data analysis 📖 See the [examples README](examples/README.md) for detailed guidance and learning progression. diff --git a/examples/README.md b/examples/README.md index 15c05ff..dec7427 100644 --- a/examples/README.md +++ b/examples/README.md @@ -32,7 +32,7 @@ Start here for getting up and running with the SDK: ### 🔬 Advanced Examples (`advanced/`) Deep-dive into production-ready patterns and specialized functionality: -- **`complete_walkthrough.py`** - **COMPREHENSIVE DEMO** 🚀 +- **`walkthrough.py`** - **COMPREHENSIVE DEMO** 🚀 - Full SDK feature demonstration with production-ready patterns - Table creation with custom schemas and enums - Single and bulk CRUD operations with error handling @@ -44,9 +44,6 @@ Deep-dive into production-ready patterns and specialized functionality: - File upload to Dataverse file columns with chunking - Advanced file handling patterns -- **`pandas_integration.py`** - **DATA ANALYSIS** 📊 - - DataFrame-based operations for data analysis - - Pandas integration patterns ## 🚀 Getting Started diff --git a/examples/advanced/pandas_integration.py b/examples/advanced/pandas_integration.py deleted file mode 100644 index fc62e06..0000000 --- a/examples/advanced/pandas_integration.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -""" -PowerPlatform Dataverse Client - Pandas Integration Example - -This example demonstrates advanced DataFrame-based operations using the -PowerPlatform-Dataverse-Client SDK with pandas integration. - -Prerequisites: - pip install PowerPlatform-Dataverse-Client - pip install azure-identity - pip install pandas - -For local development, you can also run from source by uncommenting the sys.path line below. -""" - -import sys -from pathlib import Path -import os - -# Uncomment for local development from source -# sys.path.append(str(Path(__file__).resolve().parents[2] / "src")) - -from PowerPlatform.Dataverse import DataverseClient -from PowerPlatform.Dataverse.utils.pandas_adapter import PandasODataClient -from azure.identity import InteractiveBrowserCredential -import traceback -import requests -import time -import pandas as pd - -if not sys.stdin.isatty(): - print("Interactive input required for org URL. Run this script in a TTY.") - sys.exit(1) -entered = input("Enter Dataverse org URL (e.g. https://yourorg.crm.dynamics.com): ").strip() -if not entered: - print("No URL entered; exiting.") - sys.exit(1) -base_url = entered.rstrip('/') -client = DataverseClient(base_url=base_url, credential=InteractiveBrowserCredential()) -# Use the internal OData client for pandas helpers -PANDAS = PandasODataClient(client._get_odata()) - -# Small generic backoff helper used only in this quickstart -# Include common transient statuses like 429/5xx to improve resilience. -def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403, 404, 409, 412, 429, 500, 502, 503, 504), retry_if=None): - last_exc = None - for delay in delays: - if delay: - time.sleep(delay) - try: - return op() - except Exception as ex: - print(f'Request failed: {ex}') - last_exc = ex - if retry_if and retry_if(ex): - continue - if isinstance(ex, requests.exceptions.HTTPError): - code = getattr(getattr(ex, 'response', None), 'status_code', None) - if code in retry_http_statuses: - continue - break - if last_exc: - raise last_exc - -print("(Pandas) Ensure custom table exists (Metadata):") -table_info = None -created_this_run = False - -# First check for existing table -existing = client.get_table_info("new_SampleItem") -if existing: - table_info = existing - created_this_run = False - print({ - "table": table_info.get("table_schema_name"), - "existed": True, - "entity_set": table_info.get("entity_set_name"), - "logical": table_info.get("table_logical_name"), - "metadata_id": table_info.get("metadata_id"), - }) - -else: - # Create it since it doesn't exist - try: - table_info = client.create_table( - "new_SampleItem", - { - "new_Code": "string", - "new_Count": "int", - "new_Amount": "decimal", - "new_When": "datetime", - "new_Active": "bool", - }, - ) - created_this_run = True if table_info and table_info.get("columns_created") else False - print({ - "table": table_info.get("table_schema_name") if table_info else None, - "existed": False, - "entity_set": table_info.get("entity_set_name") if table_info else None, - "logical": table_info.get("table_logical_name") if table_info else None, - "metadata_id": table_info.get("metadata_id") if table_info else None, - }) - except Exception as e: - # Print full stack trace and any HTTP response details if present - print("Create table failed:") - traceback.print_exc() - resp = getattr(e, 'response', None) - if resp is not None: - try: - print({ - "status": resp.status_code, - "url": getattr(resp, 'url', None), - "body": resp.text[:2000] if getattr(resp, 'text', None) else None, - }) - except Exception: - pass - # Fail fast: all operations must use the custom table - sys.exit(1) - -logical = table_info.get("table_logical_name") -# Derive attribute logical name prefix from the entity logical name -attr_prefix = logical.split("_", 1)[0] if "_" in logical else logical -record_data = { - f"{attr_prefix}_name": "Sample X", - f"{attr_prefix}_code": "X001", - f"{attr_prefix}_count": 42, - f"{attr_prefix}_amount": 123.45, - f"{attr_prefix}_when": "2025-01-01", - f"{attr_prefix}_active": True, -} - -# 2) Create a record in the new table -print("(Pandas) Create record (OData via Pandas wrapper):") -record_id = None -try: - record_id = backoff_retry(lambda: PANDAS.create_df(logical, pd.Series(record_data))) - print({"entity": logical, "created_id": record_id}) -except Exception as e: - print(f"Create failed: {e}") - sys.exit(1) - -# 3) Read record via OData -print("(Pandas) Read (OData via Pandas wrapper):") -try: - if record_id: - df = backoff_retry(lambda: PANDAS.get_ids(logical, pd.Series([record_id]))) - print(df.head()) - id_key = f"{logical}id" - rid = df.iloc[0].get(id_key) if not df.empty else None - print({"entity": logical, "read": True, "id": rid}) - else: - raise RuntimeError("No record created; skipping read.") -except Exception as e: - print(f"Get failed: {e}") - -# 3.5) Update record, then read again and verify -print("(Pandas) Update (OData via Pandas wrapper) and verify:") -try: - if not record_id: - raise RuntimeError("No record created; skipping update.") - - update_data = { - f"{attr_prefix}_code": "X002", - f"{attr_prefix}_count": 99, - f"{attr_prefix}_amount": 543.21, - f"{attr_prefix}_when": "2025-02-02", - f"{attr_prefix}_active": False, - } - expected_checks = { - f"{attr_prefix}_code": "X002", - f"{attr_prefix}_count": 99, - f"{attr_prefix}_active": False, - } - amount_key = f"{attr_prefix}_amount" - - # Perform update via Pandas wrapper (returns None), then re-fetch to verify - backoff_retry(lambda: PANDAS.update(logical, record_id, pd.Series(update_data))) - print({"entity": logical, "updated": True}) - - # Re-read and verify from DataFrame - after_df = backoff_retry(lambda: PANDAS.get_ids(logical, pd.Series([record_id]))) - row = after_df.iloc[0] if not after_df.empty else {} - - # Verify string/int/bool fields - for k, v in expected_checks.items(): - gv = row.get(k) if hasattr(row, 'get') else None - assert gv == v, f"Field {k} expected {v}, got {gv}" - - # Verify decimal with tolerance - got = row.get(amount_key) if hasattr(row, 'get') else None - got_f = float(got) if got is not None else None - assert got_f is not None and abs(got_f - 543.21) < 1e-6, f"Field {amount_key} expected 543.21, got {got}" - - print({"entity": logical, "verified": True}) -except Exception as e: - print(f"Update/verify failed: {e}") - sys.exit(1) - -# 4) Query records via SQL (Web API ?sql=) -print("(Pandas) Query (SQL via Web API ?sql=):") -try: - import time - - def _run_query(): - id_key = f"{logical}id" - cols = f"{id_key}, {attr_prefix}_code, {attr_prefix}_amount, {attr_prefix}_when" - return PANDAS.query_sql_df(f"SELECT TOP 3 {cols} FROM {logical} ORDER BY {attr_prefix}_amount DESC") - def _retry_if(ex: Exception) -> bool: - msg = str(ex) if ex else "" - return ("Invalid table name" in msg) or ("Invalid object name" in msg) - df_rows = backoff_retry(_run_query, delays=(0, 2, 5), retry_http_statuses=(), retry_if=_retry_if) - id_key = f"{logical}id" - ids = df_rows[id_key].dropna().tolist() if (df_rows is not None and id_key in df_rows.columns) else [] - print({"entity": logical, "rows": (0 if df_rows is None else len(df_rows)), "ids": ids}) -except Exception as e: - print(f"SQL query failed: {e}") - -# 5) Delete record -print("(Pandas) Delete (OData via Pandas wrapper):") -try: - if record_id: - backoff_retry(lambda: PANDAS.delete_ids(logical, record_id)) - print({"entity": logical, "deleted": True}) - else: - raise RuntimeError("No record created; skipping delete.") -except Exception as e: - print(f"Delete failed: {e}") - -# 6) Cleanup: delete the custom table if it exists -print("Cleanup (Metadata):") -try: - # Delete if present, regardless of whether it was created in this run - info = client.get_table_info("new_SampleItem") - if info: - client.delete_table("new_SampleItem") - print({"table_deleted": True}) - else: - print({"table_deleted": False, "reason": "not found"}) -except Exception as e: - print(f"Delete table failed: {e}") diff --git a/examples/basic/installation_example.py b/examples/basic/installation_example.py index 1974a64..ffa803c 100644 --- a/examples/basic/installation_example.py +++ b/examples/basic/installation_example.py @@ -79,9 +79,6 @@ def validate_imports(): from PowerPlatform.Dataverse.core.config import DataverseConfig print(f" ✅ Core config: DataverseConfig") - from PowerPlatform.Dataverse.utils.pandas_adapter import PandasODataClient - print(f" ✅ Utils: PandasODataClient") - from PowerPlatform.Dataverse.data.odata import ODataClient print(f" ✅ Data layer: ODataClient") diff --git a/pyproject.toml b/pyproject.toml index 5f0c806..318a282 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ dependencies = [ "azure-identity>=1.17.0", "azure-core>=1.30.2", "requests>=2.32.0", - "pandas>=2.2.0", ] [project.urls] diff --git a/src/PowerPlatform/Dataverse/utils/__init__.py b/src/PowerPlatform/Dataverse/utils/__init__.py index d12c4e0..e08d110 100644 --- a/src/PowerPlatform/Dataverse/utils/__init__.py +++ b/src/PowerPlatform/Dataverse/utils/__init__.py @@ -4,7 +4,7 @@ """ Utilities and adapters for the Dataverse SDK. -This module contains adapters (like Pandas integration). +Placeholder module for future utility adapters. """ __all__ = [] \ No newline at end of file diff --git a/src/PowerPlatform/Dataverse/utils/pandas_adapter.py b/src/PowerPlatform/Dataverse/utils/pandas_adapter.py deleted file mode 100644 index 4458457..0000000 --- a/src/PowerPlatform/Dataverse/utils/pandas_adapter.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -""" -Pandas-friendly wrappers for Dataverse OData operations. - -This module provides :class:`PowerPlatform.Dataverse.utils.pandas_adapter.PandasODataClient`, -a high-level wrapper that enables DataFrame-based CRUD and query operations. -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Iterable, List, Optional, Sequence, Any -import re -import json - -import pandas as pd - -from ..data.odata import ODataClient - - -@dataclass -class RowError: - """ - Container for row-level error information. - - :param index: Zero-based row index where the error occurred. - :type index: ``int`` - :param message: Error message describing the failure. - :type message: ``str`` - """ - index: int - message: str - - -class PandasODataClient: - """ - High-level pandas-friendly wrapper for Dataverse OData operations. - - :param odata_client: Initialized low-level OData client with authentication configured. - :type odata_client: ~PowerPlatform.Dataverse.data.odata.ODataClient - """ - - def __init__(self, odata_client: ODataClient) -> None: - self._c = odata_client - - # ---------------------------- Create --------------------------------- - def create_df(self, logical_name: str, record: pd.Series) -> str: - """ - Create a single record from a pandas Series and return the GUID. - - :param logical_name: Logical (singular) entity name, e.g. ``"account"``. - :type logical_name: ``str`` - :param record: Series whose index labels are field logical names and values are field values. - :type record: ``pandas.Series`` - :return: The created record's GUID. - :rtype: ``str`` - :raises TypeError: If ``record`` is not a pandas Series. - :raises RuntimeError: If the internal create operation returns an unexpected format. - """ - if not isinstance(record, pd.Series): - raise TypeError("record must be a pandas Series") - payload = {k: v for k, v in record.items()} - created_ids = self._c.create(logical_name, payload) - if not isinstance(created_ids, list) or len(created_ids) != 1 or not isinstance(created_ids[0], str): - raise RuntimeError("Unexpected create return shape (expected single-element list of GUID str)") - return created_ids[0] - - # ---------------------------- Update --------------------------------- - def update(self, logical_name: str, record_id: str, entity_data: pd.Series) -> None: - """ - Update a single record with values from a pandas Series. - - :param logical_name: Logical (singular) entity name, e.g. ``"account"``. - :type logical_name: ``str`` - :param record_id: GUID of the record to update. - :type record_id: ``str`` - :param entity_data: Series whose index labels are field logical names. NaN values are ignored. - :type entity_data: ``pandas.Series`` - :raises TypeError: If ``entity_data`` is not a pandas Series. - """ - if not isinstance(entity_data, pd.Series): - raise TypeError("entity_data must be a pandas Series") - payload = {k: v for k, v in entity_data.items()} - if not payload: - return # nothing to send - self._c.update(logical_name, record_id, payload) - - # ---------------------------- Delete --------------------------------- - def delete_ids(self, logical_name: str, ids: Sequence[str] | pd.Series | pd.Index) -> pd.DataFrame: - """ - Delete a collection of record IDs and return a summary DataFrame. - - :param logical_name: Logical (singular) entity name, e.g. ``"account"``. - :type logical_name: ``str`` - :param ids: Collection of GUIDs to delete. Can be a list, pandas Series, or pandas Index. - :type ids: ``Sequence[str]`` or ``pandas.Series`` or ``pandas.Index`` - :return: DataFrame with columns: ``id`` (``str``), ``success`` (``bool``), ``error`` (``str`` | ``None``). - :rtype: ``pandas.DataFrame`` - """ - if isinstance(ids, (pd.Series, pd.Index)): - id_list = [str(x) for x in ids.tolist()] - else: - id_list = [str(x) for x in ids] - results = [] - for rid in id_list: - try: - self._c.delete(logical_name, rid) - results.append({"id": rid, "success": True, "error": None}) - except Exception as e: # noqa: BLE001 - results.append({"id": rid, "success": False, "error": str(e)}) - return pd.DataFrame(results) - - # ------------------------------ Get ---------------------------------- - def get_ids(self, logical_name: str, ids: Sequence[str] | pd.Series | pd.Index, select: Optional[Iterable[str]] = None) -> pd.DataFrame: - """ - Fetch multiple records by ID and return a DataFrame. - - :param logical_name: Logical (singular) entity name, e.g. ``"account"``. - :type logical_name: ``str`` - :param ids: Collection of GUIDs to fetch. Can be a list, pandas Series, or pandas Index. - :type ids: ``Sequence[str]`` or ``pandas.Series`` or ``pandas.Index`` - :param select: Optional iterable of field logical names to retrieve. If None, all fields are returned. - :type select: ``Iterable[str]`` | ``None`` - :return: DataFrame containing fetched records. Failed fetches will have an ``error`` column. - :rtype: ``pandas.DataFrame`` - """ - if isinstance(ids, (pd.Series, pd.Index)): - id_list = [str(x) for x in ids.tolist()] - else: - id_list = [str(x) for x in ids] - rows = [] - any_errors = False - select_arg = None - if select: - # ensure iterable of strings -> list -> join - select_list = [str(c) for c in select] - if select_list: - select_arg = ",".join(select_list) - for rec_id in id_list: - try: - data = self._c.get(logical_name, rec_id, select=select_arg) - rows.append(data) - except Exception as e: # noqa: BLE001 - any_errors = True - rows.append({"id": rec_id, "error": str(e)}) - if not rows: - return pd.DataFrame(columns=["id"]) - return pd.DataFrame(rows) - - # --------------------------- Query SQL ------------------------------- - def query_sql_df(self, sql: str) -> pd.DataFrame: - """ - Execute a SQL query via the Dataverse Web API and return a DataFrame. - - :param sql: SQL SELECT statement following Dataverse Web API SQL syntax. - :type sql: ``str`` - :return: DataFrame containing query results. Returns an empty DataFrame if no rows match. - :rtype: ``pandas.DataFrame`` - :raises ValueError: If the API returns a malformed JSON response. - """ - rows: Any = self._c.query_sql(sql) - - # If API returned a JSON string, parse it - if isinstance(rows, str): - try: - rows = json.loads(rows) - except json.JSONDecodeError as e: # noqa: BLE001 - raise ValueError("query_sql returned a string that is not valid JSON") from e - - # If a dict wrapper came back, try common shapes - if isinstance(rows, dict): - # Shape: {"rows": [...], "columns": [...]} (some APIs) - if "rows" in rows and "columns" in rows and isinstance(rows["rows"], list): - return pd.DataFrame(rows["rows"], columns=rows.get("columns")) - # Shape: {"value": [...]} - if "value" in rows and isinstance(rows["value"], list): - rows = rows["value"] - else: - # Treat single dict payload as one-row result - rows = [rows] - - # Now rows should ideally be a list - if not rows: - return pd.DataFrame() - - if isinstance(rows, list): - if len(rows) == 0: - return pd.DataFrame() - # All dicts -> normal tabular expansion - if all(isinstance(r, dict) for r in rows): - return pd.DataFrame(rows) - # Mixed or scalar list -> single column DataFrame - return pd.DataFrame({"value": rows}) - - # Fallback: wrap anything else - return pd.DataFrame({"value": [rows]}) - -__all__ = ["PandasODataClient"]