From df27f18d4da46caed0b55911a374631aaefbe32a Mon Sep 17 00:00:00 2001 From: Chandra Date: Wed, 28 Jan 2026 15:29:13 +0000 Subject: [PATCH 1/5] feat: Move Zonal Buckets features of _experimental --- .../storage/_experimental/asyncio/_utils.py | 48 +- .../asyncio/async_abstract_object_stream.py | 74 +-- .../asyncio/async_appendable_object_writer.py | 600 +----------------- .../asyncio/async_grpc_client.py | 105 +-- .../asyncio/async_multi_range_downloader.py | 511 +-------------- .../asyncio/async_read_object_stream.py | 203 +----- .../asyncio/async_write_object_stream.py | 241 +------ .../_experimental/asyncio/retry/_helpers.py | 130 +--- .../asyncio/retry/base_strategy.py | 90 +-- .../retry/bidi_stream_retry_manager.py | 74 +-- .../retry/reads_resumption_strategy.py | 162 +---- .../retry/writes_resumption_strategy.py | 152 +---- .../storage/_experimental/grpc_client.py | 129 +--- google/cloud/storage/asyncio/_utils.py | 41 ++ .../asyncio/async_abstract_object_stream.py | 67 ++ .../asyncio/async_appendable_object_writer.py | 595 +++++++++++++++++ .../storage/asyncio/async_grpc_client.py | 100 +++ .../asyncio/async_multi_range_downloader.py | 506 +++++++++++++++ .../asyncio/async_read_object_stream.py | 198 ++++++ .../asyncio/async_write_object_stream.py | 236 +++++++ .../cloud/storage/asyncio/retry/_helpers.py | 125 ++++ .../storage/asyncio/retry/base_strategy.py | 83 +++ .../retry/bidi_stream_retry_manager.py | 69 ++ .../retry/reads_resumption_strategy.py | 157 +++++ .../retry/writes_resumption_strategy.py | 147 +++++ ...rage_create_and_write_appendable_object.py | 4 +- ...orage_finalize_appendable_object_upload.py | 4 +- ...orage_open_multiple_objects_ranged_read.py | 9 +- ...torage_open_object_multiple_ranged_read.py | 4 +- .../storage_open_object_read_full_object.py | 4 +- .../storage_open_object_single_ranged_read.py | 4 +- ...rage_pause_and_resume_appendable_upload.py | 4 +- .../storage_read_appendable_object_tail.py | 6 +- .../zonal_buckets/zonal_snippets_test.py | 4 +- tests/system/test_zonal.py | 6 +- .../retry/test_bidi_stream_retry_manager.py | 4 +- .../retry/test_reads_resumption_strategy.py | 4 +- .../retry/test_writes_resumption_strategy.py | 4 +- .../test_async_appendable_object_writer.py | 12 +- tests/unit/asyncio/test_async_grpc_client.py | 4 +- .../test_async_multi_range_downloader.py | 30 +- .../asyncio/test_async_read_object_stream.py | 52 +- .../asyncio/test_async_write_object_stream.py | 10 +- 43 files changed, 2518 insertions(+), 2494 deletions(-) create mode 100644 google/cloud/storage/asyncio/_utils.py create mode 100644 google/cloud/storage/asyncio/async_abstract_object_stream.py create mode 100644 google/cloud/storage/asyncio/async_appendable_object_writer.py create mode 100644 google/cloud/storage/asyncio/async_grpc_client.py create mode 100644 google/cloud/storage/asyncio/async_multi_range_downloader.py create mode 100644 google/cloud/storage/asyncio/async_read_object_stream.py create mode 100644 google/cloud/storage/asyncio/async_write_object_stream.py create mode 100644 google/cloud/storage/asyncio/retry/_helpers.py create mode 100644 google/cloud/storage/asyncio/retry/base_strategy.py create mode 100644 google/cloud/storage/asyncio/retry/bidi_stream_retry_manager.py create mode 100644 google/cloud/storage/asyncio/retry/reads_resumption_strategy.py create mode 100644 google/cloud/storage/asyncio/retry/writes_resumption_strategy.py diff --git a/google/cloud/storage/_experimental/asyncio/_utils.py b/google/cloud/storage/_experimental/asyncio/_utils.py index 170a0cfae..7e81a4bc7 100644 --- a/google/cloud/storage/_experimental/asyncio/_utils.py +++ b/google/cloud/storage/_experimental/asyncio/_utils.py @@ -1,41 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -import google_crc32c +# Import everything from the new stable module +from google.cloud.storage.asyncio._utils import * # noqa -from google.api_core import exceptions - - -def raise_if_no_fast_crc32c(): - """Check if the C-accelerated version of google-crc32c is available. - - If not, raise an error to prevent silent performance degradation. - - raises google.api_core.exceptions.FailedPrecondition: If the C extension is not available. - returns: True if the C extension is available. - rtype: bool - - """ - if google_crc32c.implementation != "c": - raise exceptions.FailedPrecondition( - "The google-crc32c package is not installed with C support. " - "C extension is required for faster data integrity checks." - "For more information, see https://github.com/googleapis/python-crc32c." - ) - - -def update_write_handle_if_exists(obj, response): - """Update the write_handle attribute of an object if it exists in the response.""" - if hasattr(response, "write_handle") and response.write_handle is not None: - obj.write_handle = response.write_handle +warnings.warn( + "google.cloud.storage._experimental.asyncio._utils has been moved to google.cloud.storage.asyncio._utils. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, +) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 26cbab7a0..538241bd2 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -1,67 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -import abc -from typing import Any, Optional +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_abstract_object_stream import * # noqa - -class _AsyncAbstractObjectStream(abc.ABC): - """Abstract base class to represent gRPC bidi-stream for GCS ``Object``. - - Concrete implementation of this class could be ``_AsyncReadObjectStream`` - or ``_AsyncWriteObjectStream``. - - :type bucket_name: str - :param bucket_name: (Optional) The name of the bucket containing the object. - - :type object_name: str - :param object_name: (Optional) The name of the object. - - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific revision of - this object. - - :type handle: Any - :param handle: (Optional) The handle for the object, could be read_handle or - write_handle, based on how the stream is used. - """ - - def __init__( - self, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, - handle: Optional[Any] = None, - ) -> None: - super().__init__() - self.bucket_name: str = bucket_name - self.object_name: str = object_name - self.generation_number: Optional[int] = generation_number - self.handle: Optional[Any] = handle - - @abc.abstractmethod - async def open(self) -> None: - pass - - @abc.abstractmethod - async def close(self) -> None: - pass - - @abc.abstractmethod - async def send(self, protobuf: Any) -> None: - pass - - @abc.abstractmethod - async def recv(self) -> Any: - pass +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_abstract_object_stream has been moved to google.cloud.storage.asyncio.async_abstract_object_stream. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, +) diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py index 1cc099043..53b813643 100644 --- a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py @@ -1,595 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) +import warnings -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM (Technical Account Manager) -if you want to use these Rapid Storage APIs. +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_appendable_object_writer import * # noqa -""" -from io import BufferedReader -import io -import logging -from typing import List, Optional, Tuple, Union - -from google.api_core import exceptions -from google.api_core.retry_async import AsyncRetry -from google.rpc import status_pb2 -from google.cloud._storage_v2.types import BidiWriteObjectRedirectedError -from google.cloud._storage_v2.types.storage import BidiWriteObjectRequest - - -from . import _utils -from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio.async_grpc_client import ( - AsyncGrpcClient, -) -from google.cloud.storage._experimental.asyncio.async_write_object_stream import ( - _AsyncWriteObjectStream, -) -from google.cloud.storage._experimental.asyncio.retry.bidi_stream_retry_manager import ( - _BidiStreamRetryManager, -) -from google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy import ( - _WriteResumptionStrategy, - _WriteState, -) -from google.cloud.storage._experimental.asyncio.retry._helpers import ( - _extract_bidi_writes_redirect_proto, +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_appendable_object_writer has been moved to google.cloud.storage.asyncio.async_appendable_object_writer. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - - -_MAX_CHUNK_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB -_DEFAULT_FLUSH_INTERVAL_BYTES = 16 * 1024 * 1024 # 16 MiB -_BIDI_WRITE_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" -) -logger = logging.getLogger(__name__) - - -def _is_write_retryable(exc): - """Predicate to determine if a write operation should be retried.""" - - if isinstance( - exc, - ( - exceptions.InternalServerError, - exceptions.ServiceUnavailable, - exceptions.DeadlineExceeded, - exceptions.TooManyRequests, - BidiWriteObjectRedirectedError, - ), - ): - logger.warning(f"Retryable write exception encountered: {exc}") - return True - - grpc_error = None - if isinstance(exc, exceptions.Aborted) and exc.errors: - grpc_error = exc.errors[0] - if isinstance(grpc_error, BidiWriteObjectRedirectedError): - return True - - trailers = grpc_error.trailing_metadata() - if not trailers: - return False - - status_details_bin = None - for key, value in trailers: - if key == "grpc-status-details-bin": - status_details_bin = value - break - - if status_details_bin: - status_proto = status_pb2.Status() - try: - status_proto.ParseFromString(status_details_bin) - for detail in status_proto.details: - if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: - return True - except Exception: - logger.error( - "Error unpacking redirect details from gRPC error. Exception: ", - {exc}, - ) - return False - return False - - -class AsyncAppendableObjectWriter: - """Class for appending data to a GCS Appendable Object asynchronously.""" - - def __init__( - self, - client: AsyncGrpcClient, - bucket_name: str, - object_name: str, - generation: Optional[int] = None, - write_handle: Optional[_storage_v2.BidiWriteHandle] = None, - writer_options: Optional[dict] = None, - ): - """ - Class for appending data to a GCS Appendable Object. - - Example usage: - - ``` - - from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient - from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter - import asyncio - - client = AsyncGrpcClient().grpc_client - bucket_name = "my-bucket" - object_name = "my-appendable-object" - - # instantiate the writer - writer = AsyncAppendableObjectWriter(client, bucket_name, object_name) - # open the writer, (underlying gRPC bidi-stream will be opened) - await writer.open() - - # append data, it can be called multiple times. - await writer.append(b"hello world") - await writer.append(b"some more data") - - # optionally flush data to persist. - await writer.flush() - - # close the gRPC stream. - # Please note closing the program will also close the stream, - # however it's recommended to close the stream if no more data to append - # to clean up gRPC connection (which means CPU/memory/network resources) - await writer.close() - ``` - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` - :param client: async grpc client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the GCS bucket containing the object. - - :type object_name: str - :param object_name: The name of the GCS Appendable Object to be written. - - :type generation: Optional[int] - :param generation: (Optional) If present, creates writer for that - specific revision of that object. Use this to append data to an - existing Appendable Object. - - Setting to ``0`` makes the `writer.open()` succeed only if - object doesn't exist in the bucket (useful for not accidentally - overwriting existing objects). - - Warning: If `None`, a new object is created. If an object with the - same name already exists, it will be overwritten the moment - `writer.open()` is called. - - :type write_handle: _storage_v2.BidiWriteHandle - :param write_handle: (Optional) An handle for writing the object. - If provided, opening the bidi-gRPC connection will be faster. - - :type writer_options: dict - :param writer_options: (Optional) A dictionary of writer options. - Supported options: - - "FLUSH_INTERVAL_BYTES": int - The number of bytes to append before "persisting" data in GCS - servers. Default is `_DEFAULT_FLUSH_INTERVAL_BYTES`. - Must be a multiple of `_MAX_CHUNK_SIZE_BYTES`. - """ - _utils.raise_if_no_fast_crc32c() - self.client = client - self.bucket_name = bucket_name - self.object_name = object_name - self.write_handle = write_handle - self.generation = generation - - self.write_obj_stream: Optional[_AsyncWriteObjectStream] = None - self._is_stream_open: bool = False - # `offset` is the latest size of the object without staleless. - self.offset: Optional[int] = None - # `persisted_size` is the total_bytes persisted in the GCS server. - # Please note: `offset` and `persisted_size` are same when the stream is - # opened. - self.persisted_size: Optional[int] = None - if writer_options is None: - writer_options = {} - self.flush_interval = writer_options.get( - "FLUSH_INTERVAL_BYTES", _DEFAULT_FLUSH_INTERVAL_BYTES - ) - if self.flush_interval < _MAX_CHUNK_SIZE_BYTES: - raise exceptions.OutOfRange( - f"flush_interval must be >= {_MAX_CHUNK_SIZE_BYTES} , but provided {self.flush_interval}" - ) - if self.flush_interval % _MAX_CHUNK_SIZE_BYTES != 0: - raise exceptions.OutOfRange( - f"flush_interval must be a multiple of {_MAX_CHUNK_SIZE_BYTES}, but provided {self.flush_interval}" - ) - self.bytes_appended_since_last_flush = 0 - self._routing_token: Optional[str] = None - self.object_resource: Optional[_storage_v2.Object] = None - - async def state_lookup(self) -> int: - """Returns the persisted_size - - :rtype: int - :returns: persisted size. - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before state_lookup().") - - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest( - state_lookup=True, - ) - ) - response = await self.write_obj_stream.recv() - self.persisted_size = response.persisted_size - return self.persisted_size - - def _on_open_error(self, exc): - """Extracts routing token and write handle on redirect error during open.""" - redirect_proto = _extract_bidi_writes_redirect_proto(exc) - if redirect_proto: - if redirect_proto.routing_token: - self._routing_token = redirect_proto.routing_token - if redirect_proto.write_handle: - self.write_handle = redirect_proto.write_handle - if redirect_proto.generation: - self.generation = redirect_proto.generation - - async def open( - self, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> None: - """Opens the underlying bidi-gRPC stream. - - :raises ValueError: If the stream is already open. - - """ - if self._is_stream_open: - raise ValueError("Underlying bidi-gRPC stream is already open") - - if retry_policy is None: - retry_policy = AsyncRetry( - predicate=_is_write_retryable, on_error=self._on_open_error - ) - else: - original_on_error = retry_policy._on_error - - def combined_on_error(exc): - self._on_open_error(exc) - if original_on_error: - original_on_error(exc) - - retry_policy = AsyncRetry( - predicate=_is_write_retryable, - initial=retry_policy._initial, - maximum=retry_policy._maximum, - multiplier=retry_policy._multiplier, - deadline=retry_policy._deadline, - on_error=combined_on_error, - ) - - async def _do_open(): - current_metadata = list(metadata) if metadata else [] - - # Cleanup stream from previous failed attempt, if any. - if self.write_obj_stream: - if self.write_obj_stream.is_stream_open: - try: - await self.write_obj_stream.close() - except Exception as e: - logger.warning( - "Error closing previous write stream during open retry. Got exception: ", - {e}, - ) - self.write_obj_stream = None - self._is_stream_open = False - - self.write_obj_stream = _AsyncWriteObjectStream( - client=self.client.grpc_client, - bucket_name=self.bucket_name, - object_name=self.object_name, - generation_number=self.generation, - write_handle=self.write_handle, - routing_token=self._routing_token, - ) - - if self._routing_token: - current_metadata.append( - ("x-goog-request-params", f"routing_token={self._routing_token}") - ) - - await self.write_obj_stream.open( - metadata=current_metadata if metadata else None - ) - - if self.write_obj_stream.generation_number: - self.generation = self.write_obj_stream.generation_number - if self.write_obj_stream.write_handle: - self.write_handle = self.write_obj_stream.write_handle - if self.write_obj_stream.persisted_size is not None: - self.persisted_size = self.write_obj_stream.persisted_size - - self._is_stream_open = True - self._routing_token = None - - await retry_policy(_do_open)() - - async def append( - self, - data: bytes, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> None: - """Appends data to the Appendable object with automatic retries. - - calling `self.append` will append bytes at the end of the current size - ie. `self.offset` bytes relative to the begining of the object. - - This method sends the provided `data` to the GCS server in chunks. - and persists data in GCS at every `_DEFAULT_FLUSH_INTERVAL_BYTES` bytes - or at the last chunk whichever is earlier. Persisting is done by setting - `flush=True` on request. - - :type data: bytes - :param data: The bytes to append to the object. - - :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` - :param retry_policy: (Optional) The retry policy to use for the operation. - - :type metadata: List[Tuple[str, str]] - :param metadata: (Optional) The metadata to be sent with the request. - - :raises ValueError: If the stream is not open. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before append().") - if not data: - logger.debug("No data provided to append; returning without action.") - return - - if retry_policy is None: - retry_policy = AsyncRetry(predicate=_is_write_retryable) - - strategy = _WriteResumptionStrategy() - buffer = io.BytesIO(data) - attempt_count = 0 - - def send_and_recv_generator( - requests: List[BidiWriteObjectRequest], - state: dict[str, _WriteState], - metadata: Optional[List[Tuple[str, str]]] = None, - ): - async def generator(): - nonlocal attempt_count - nonlocal requests - attempt_count += 1 - resp = None - write_state = state["write_state"] - # If this is a retry or redirect, we must re-open the stream - if attempt_count > 1 or write_state.routing_token: - logger.info( - f"Re-opening the stream with attempt_count: {attempt_count}" - ) - if self.write_obj_stream and self.write_obj_stream.is_stream_open: - await self.write_obj_stream.close() - - current_metadata = list(metadata) if metadata else [] - if write_state.routing_token: - current_metadata.append( - ( - "x-goog-request-params", - f"routing_token={write_state.routing_token}", - ) - ) - self._routing_token = write_state.routing_token - - self._is_stream_open = False - await self.open(metadata=current_metadata) - - write_state.persisted_size = self.persisted_size - write_state.write_handle = self.write_handle - write_state.routing_token = None - - write_state.user_buffer.seek(write_state.persisted_size) - write_state.bytes_sent = write_state.persisted_size - write_state.bytes_since_last_flush = 0 - - requests = strategy.generate_requests(state) - - num_requests = len(requests) - for i, chunk_req in enumerate(requests): - if i == num_requests - 1: - chunk_req.state_lookup = True - chunk_req.flush = True - await self.write_obj_stream.send(chunk_req) - - resp = await self.write_obj_stream.recv() - if resp: - if resp.persisted_size is not None: - self.persisted_size = resp.persisted_size - state["write_state"].persisted_size = resp.persisted_size - self.offset = self.persisted_size - if resp.write_handle: - self.write_handle = resp.write_handle - state["write_state"].write_handle = resp.write_handle - self.bytes_appended_since_last_flush = 0 - - yield resp - - return generator() - - # State initialization - write_state = _WriteState(_MAX_CHUNK_SIZE_BYTES, buffer, self.flush_interval) - write_state.write_handle = self.write_handle - write_state.persisted_size = self.persisted_size - write_state.bytes_sent = self.persisted_size - write_state.bytes_since_last_flush = self.bytes_appended_since_last_flush - - retry_manager = _BidiStreamRetryManager( - _WriteResumptionStrategy(), - lambda r, s: send_and_recv_generator(r, s, metadata), - ) - await retry_manager.execute({"write_state": write_state}, retry_policy) - - # Sync local markers - self.write_obj_stream.persisted_size = write_state.persisted_size - self.write_obj_stream.write_handle = write_state.write_handle - self.bytes_appended_since_last_flush = write_state.bytes_since_last_flush - self.persisted_size = write_state.persisted_size - self.offset = write_state.persisted_size - - async def simple_flush(self) -> None: - """Flushes the data to the server. - Please note: Unlike `flush` it does not do `state_lookup` - - :rtype: None - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before simple_flush().") - - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest( - flush=True, - ) - ) - self.bytes_appended_since_last_flush = 0 - - async def flush(self) -> int: - """Flushes the data to the server. - - :rtype: int - :returns: The persisted size after flush. - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before flush().") - - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest( - flush=True, - state_lookup=True, - ) - ) - response = await self.write_obj_stream.recv() - self.persisted_size = response.persisted_size - self.offset = self.persisted_size - self.bytes_appended_since_last_flush = 0 - return self.persisted_size - - async def close(self, finalize_on_close=False) -> Union[int, _storage_v2.Object]: - """Closes the underlying bidi-gRPC stream. - - :type finalize_on_close: bool - :param finalize_on_close: Finalizes the Appendable Object. No more data - can be appended. - - rtype: Union[int, _storage_v2.Object] - returns: Updated `self.persisted_size` by default after closing the - bidi-gRPC stream. However, if `finalize_on_close=True` is passed, - returns the finalized object resource. - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before close().") - - if finalize_on_close: - return await self.finalize() - - await self.write_obj_stream.close() - - self._is_stream_open = False - self.offset = None - return self.persisted_size - - async def finalize(self) -> _storage_v2.Object: - """Finalizes the Appendable Object. - - Note: Once finalized no more data can be appended. - This method is different from `close`. if `.close()` is called data may - still be appended to object at a later point in time by opening with - generation number. - (i.e. `open(..., generation=)`. - However if `.finalize()` is called no more data can be appended to the - object. - - rtype: google.cloud.storage_v2.types.Object - returns: The finalized object resource. - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before finalize().") - - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest(finish_write=True) - ) - response = await self.write_obj_stream.recv() - self.object_resource = response.resource - self.persisted_size = self.object_resource.size - await self.write_obj_stream.close() - - self._is_stream_open = False - self.offset = None - return self.object_resource - - @property - def is_stream_open(self) -> bool: - return self._is_stream_open - - # helper methods. - async def append_from_string(self, data: str): - """ - str data will be encoded to bytes using utf-8 encoding calling - - self.append(data.encode("utf-8")) - """ - raise NotImplementedError("append_from_string is not implemented yet.") - - async def append_from_stream(self, stream_obj): - """ - At a time read a chunk of data (16MiB) from `stream_obj` - and call self.append(chunk) - """ - raise NotImplementedError("append_from_stream is not implemented yet.") - - async def append_from_file( - self, file_obj: BufferedReader, block_size: int = _DEFAULT_FLUSH_INTERVAL_BYTES - ): - """ - Appends data to an Appendable Object using file_handle which is opened - for reading in binary mode. - - :type file_obj: file - :param file_obj: A file handle opened in binary mode for reading. - - """ - while block := file_obj.read(block_size): - await self.append(block) diff --git a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py index e985f2252..558ff0c5a 100644 --- a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py +++ b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py @@ -1,100 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -"""An async client for interacting with Google Cloud Storage using the gRPC API.""" +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_grpc_client import * # noqa -from google.cloud import _storage_v2 as storage_v2 -from google.cloud._storage_v2.services.storage.transports.base import ( - DEFAULT_CLIENT_INFO, +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_grpc_client has been moved to google.cloud.storage.asyncio.async_grpc_client. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - - -class AsyncGrpcClient: - """An asynchronous client for interacting with Google Cloud Storage using the gRPC API. - - :type credentials: :class:`~google.auth.credentials.Credentials` - :param credentials: (Optional) The OAuth2 Credentials to use for this - client. If not passed, falls back to the default - inferred from the environment. - - :type client_info: :class:`~google.api_core.client_info.ClientInfo` - :param client_info: - The client info used to send a user-agent string along with API - requests. If ``None``, then default info will be used. - - :type client_options: :class:`~google.api_core.client_options.ClientOptions` - :param client_options: (Optional) Client options used to set user options - on the client. - - :type attempt_direct_path: bool - :param attempt_direct_path: - (Optional) Whether to attempt to use DirectPath for gRPC connections. - Defaults to ``True``. - """ - - def __init__( - self, - credentials=None, - client_info=None, - client_options=None, - *, - attempt_direct_path=True, - ): - self._grpc_client = self._create_async_grpc_client( - credentials=credentials, - client_info=client_info, - client_options=client_options, - attempt_direct_path=attempt_direct_path, - ) - - def _create_async_grpc_client( - self, - credentials=None, - client_info=None, - client_options=None, - attempt_direct_path=True, - ): - transport_cls = storage_v2.StorageAsyncClient.get_transport_class( - "grpc_asyncio" - ) - - if client_info is None: - client_info = DEFAULT_CLIENT_INFO - primary_user_agent = client_info.to_user_agent() - - channel = transport_cls.create_channel( - attempt_direct_path=attempt_direct_path, - credentials=credentials, - options=(("grpc.primary_user_agent", primary_user_agent),), - ) - transport = transport_cls(channel=channel) - - return storage_v2.StorageAsyncClient( - transport=transport, - client_info=client_info, - client_options=client_options, - ) - - @property - def grpc_client(self): - """The underlying gRPC client. - - This property gives users direct access to the `_storage_v2.StorageAsyncClient` - instance. This can be useful for accessing - newly added or experimental RPCs that are not yet exposed through - the high-level GrpcClient. - Returns: - google.cloud._storage_v2.StorageAsyncClient: The configured GAPIC client. - """ - return self._grpc_client diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 340f087da..bfc2c7c2b 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -1,506 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -from __future__ import annotations -import asyncio -import logging -from google.api_core import exceptions -from google.api_core.retry_async import AsyncRetry -from google.cloud.storage._experimental.asyncio.retry._helpers import _handle_redirect -from google.rpc import status_pb2 +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_multi_range_downloader import * # noqa -from typing import List, Optional, Tuple, Any, Dict - -from ._utils import raise_if_no_fast_crc32c -from google.cloud.storage._experimental.asyncio.async_read_object_stream import ( - _AsyncReadObjectStream, -) -from google.cloud.storage._experimental.asyncio.async_grpc_client import ( - AsyncGrpcClient, -) -from google.cloud.storage._experimental.asyncio.retry.bidi_stream_retry_manager import ( - _BidiStreamRetryManager, -) -from google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy import ( - _ReadResumptionStrategy, - _DownloadState, -) - -from io import BytesIO -from google.cloud import _storage_v2 -from google.cloud.storage._helpers import generate_random_56_bit_integer - - -_MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100 -_BIDI_READ_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader has been moved to google.cloud.storage.asyncio.async_multi_range_downloader. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - -logger = logging.getLogger(__name__) - - -def _is_read_retryable(exc): - """Predicate to determine if a read operation should be retried.""" - if isinstance( - exc, - ( - exceptions.InternalServerError, - exceptions.ServiceUnavailable, - exceptions.DeadlineExceeded, - exceptions.TooManyRequests, - ), - ): - return True - - if not isinstance(exc, exceptions.Aborted) or not exc.errors: - return False - - try: - grpc_error = exc.errors[0] - trailers = grpc_error.trailing_metadata() - if not trailers: - return False - - status_details_bin = next( - (v for k, v in trailers if k == "grpc-status-details-bin"), None - ) - - if not status_details_bin: - return False - - status_proto = status_pb2.Status() - status_proto.ParseFromString(status_details_bin) - return any( - detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL - for detail in status_proto.details - ) - except Exception as e: - logger.error(f"Error parsing status_details_bin: {e}") - return False - - -class AsyncMultiRangeDownloader: - """Provides an interface for downloading multiple ranges of a GCS ``Object`` - concurrently. - - Example usage: - - .. code-block:: python - - client = AsyncGrpcClient().grpc_client - mrd = await AsyncMultiRangeDownloader.create_mrd( - client, bucket_name="chandrasiri-rs", object_name="test_open9" - ) - my_buff1 = open('my_fav_file.txt', 'wb') - my_buff2 = BytesIO() - my_buff3 = BytesIO() - my_buff4 = any_object_which_provides_BytesIO_like_interface() - await mrd.download_ranges( - [ - # (start_byte, bytes_to_read, writeable_buffer) - (0, 100, my_buff1), - (100, 20, my_buff2), - (200, 123, my_buff3), - (300, 789, my_buff4), - ] - ) - - # verify data in buffers... - assert my_buff2.getbuffer().nbytes == 20 - - - """ - - @classmethod - async def create_mrd( - cls, - client: AsyncGrpcClient, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, - read_handle: Optional[_storage_v2.BidiReadHandle] = None, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> AsyncMultiRangeDownloader: - """Initializes a MultiRangeDownloader and opens the underlying bidi-gRPC - object for reading. - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` - :param client: The asynchronous client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the bucket containing the object. - - :type object_name: str - :param object_name: The name of the object to be read. - - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific - revision of this object. - - :type read_handle: _storage_v2.BidiReadHandle - :param read_handle: (Optional) An existing handle for reading the object. - If provided, opening the bidi-gRPC connection will be faster. - - :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` - :param retry_policy: (Optional) The retry policy to use for the ``open`` operation. - - :type metadata: List[Tuple[str, str]] - :param metadata: (Optional) The metadata to be sent with the ``open`` request. - - :rtype: :class:`~google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader` - :returns: An initialized AsyncMultiRangeDownloader instance for reading. - """ - mrd = cls(client, bucket_name, object_name, generation_number, read_handle) - await mrd.open(retry_policy=retry_policy, metadata=metadata) - return mrd - - def __init__( - self, - client: AsyncGrpcClient, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, - read_handle: Optional[_storage_v2.BidiReadHandle] = None, - ) -> None: - """Constructor for AsyncMultiRangeDownloader, clients are not adviced to - use it directly. Instead it's adviced to use the classmethod `create_mrd`. - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` - :param client: The asynchronous client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the bucket containing the object. - - :type object_name: str - :param object_name: The name of the object to be read. - - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific revision of - this object. - - :type read_handle: _storage_v2.BidiReadHandle - :param read_handle: (Optional) An existing read handle. - """ - - raise_if_no_fast_crc32c() - - self.client = client - self.bucket_name = bucket_name - self.object_name = object_name - self.generation_number = generation_number - self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle - self.read_obj_str: Optional[_AsyncReadObjectStream] = None - self._is_stream_open: bool = False - self._routing_token: Optional[str] = None - self._read_id_to_writable_buffer_dict = {} - self._read_id_to_download_ranges_id = {} - self._download_ranges_id_to_pending_read_ids = {} - self.persisted_size: Optional[int] = None # updated after opening the stream - - async def __aenter__(self): - """Opens the underlying bidi-gRPC connection to read from the object.""" - await self.open() - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - """Closes the underlying bidi-gRPC connection.""" - if self.is_stream_open: - await self.close() - - def _on_open_error(self, exc): - """Extracts routing token and read handle on redirect error during open.""" - routing_token, read_handle = _handle_redirect(exc) - if routing_token: - self._routing_token = routing_token - if read_handle: - self.read_handle = read_handle - - async def open( - self, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> None: - """Opens the bidi-gRPC connection to read from the object.""" - if self._is_stream_open: - raise ValueError("Underlying bidi-gRPC stream is already open") - - if retry_policy is None: - retry_policy = AsyncRetry( - predicate=_is_read_retryable, on_error=self._on_open_error - ) - else: - original_on_error = retry_policy._on_error - - def combined_on_error(exc): - self._on_open_error(exc) - if original_on_error: - original_on_error(exc) - - retry_policy = AsyncRetry( - predicate=_is_read_retryable, - initial=retry_policy._initial, - maximum=retry_policy._maximum, - multiplier=retry_policy._multiplier, - deadline=retry_policy._deadline, - on_error=combined_on_error, - ) - - async def _do_open(): - current_metadata = list(metadata) if metadata else [] - - # Cleanup stream from previous failed attempt, if any. - if self.read_obj_str: - if self.read_obj_str.is_stream_open: - try: - await self.read_obj_str.close() - except exceptions.GoogleAPICallError as e: - logger.warning( - f"Failed to close existing stream during resumption: {e}" - ) - self.read_obj_str = None - self._is_stream_open = False - - self.read_obj_str = _AsyncReadObjectStream( - client=self.client.grpc_client, - bucket_name=self.bucket_name, - object_name=self.object_name, - generation_number=self.generation_number, - read_handle=self.read_handle, - ) - - if self._routing_token: - current_metadata.append( - ("x-goog-request-params", f"routing_token={self._routing_token}") - ) - self._routing_token = None - - await self.read_obj_str.open( - metadata=current_metadata if current_metadata else None - ) - - if self.read_obj_str.generation_number: - self.generation_number = self.read_obj_str.generation_number - if self.read_obj_str.read_handle: - self.read_handle = self.read_obj_str.read_handle - if self.read_obj_str.persisted_size is not None: - self.persisted_size = self.read_obj_str.persisted_size - - self._is_stream_open = True - - await retry_policy(_do_open)() - - async def download_ranges( - self, - read_ranges: List[Tuple[int, int, BytesIO]], - lock: asyncio.Lock = None, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> None: - """Downloads multiple byte ranges from the object into the buffers - provided by user with automatic retries. - - :type read_ranges: List[Tuple[int, int, "BytesIO"]] - :param read_ranges: A list of tuples, where each tuple represents a - combination of byte_range and writeable buffer in format - - (`start_byte`, `bytes_to_read`, `writeable_buffer`). Buffer has - to be provided by the user, and user has to make sure appropriate - memory is available in the application to avoid out-of-memory crash. - - Special cases: - if the value of `bytes_to_read` is 0, it'll be interpreted as - download all contents until the end of the file from `start_byte`. - Examples: - * (0, 0, buffer) : downloads 0 to end , i.e. entire object. - * (100, 0, buffer) : downloads from 100 to end. - - - :type lock: asyncio.Lock - :param lock: (Optional) An asyncio lock to synchronize sends and recvs - on the underlying bidi-GRPC stream. This is required when multiple - coroutines are calling this method concurrently. - - i.e. Example usage with multiple coroutines: - - ``` - lock = asyncio.Lock() - task1 = asyncio.create_task(mrd.download_ranges(ranges1, lock)) - task2 = asyncio.create_task(mrd.download_ranges(ranges2, lock)) - await asyncio.gather(task1, task2) - - ``` - - If user want to call this method serially from multiple coroutines, - then providing a lock is not necessary. - - ``` - await mrd.download_ranges(ranges1) - await mrd.download_ranges(ranges2) - - # ... some other code code... - - ``` - - :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` - :param retry_policy: (Optional) The retry policy to use for the operation. - - :raises ValueError: if the underlying bidi-GRPC stream is not open. - :raises ValueError: if the length of read_ranges is more than 1000. - :raises DataCorruption: if a checksum mismatch is detected while reading data. - - """ - - if len(read_ranges) > 1000: - raise ValueError( - "Invalid input - length of read_ranges cannot be more than 1000" - ) - - if not self._is_stream_open: - raise ValueError("Underlying bidi-gRPC stream is not open") - - if lock is None: - lock = asyncio.Lock() - - if retry_policy is None: - retry_policy = AsyncRetry(predicate=_is_read_retryable) - - # Initialize Global State for Retry Strategy - download_states = {} - for read_range in read_ranges: - read_id = generate_random_56_bit_integer() - download_states[read_id] = _DownloadState( - initial_offset=read_range[0], - initial_length=read_range[1], - user_buffer=read_range[2], - ) - - initial_state = { - "download_states": download_states, - "read_handle": self.read_handle, - "routing_token": None, - } - - # Track attempts to manage stream reuse - attempt_count = 0 - - def send_ranges_and_get_bytes( - requests: List[_storage_v2.ReadRange], - state: Dict[str, Any], - metadata: Optional[List[Tuple[str, str]]] = None, - ): - async def generator(): - nonlocal attempt_count - attempt_count += 1 - - if attempt_count > 1: - logger.info( - f"Resuming download (attempt {attempt_count - 1}) for {len(requests)} ranges." - ) - - async with lock: - current_handle = state.get("read_handle") - current_token = state.get("routing_token") - - # We reopen if it's a redirect (token exists) OR if this is a retry - # (not first attempt). This prevents trying to send data on a dead - # stream from a previous failed attempt. - should_reopen = ( - (attempt_count > 1) - or (current_token is not None) - or (metadata is not None) - ) - - if should_reopen: - if current_token: - logger.info( - f"Re-opening stream with routing token: {current_token}" - ) - # Close existing stream if any - if self.read_obj_str and self.read_obj_str.is_stream_open: - await self.read_obj_str.close() - - # Re-initialize stream - self.read_obj_str = _AsyncReadObjectStream( - client=self.client.grpc_client, - bucket_name=self.bucket_name, - object_name=self.object_name, - generation_number=self.generation_number, - read_handle=current_handle, - ) - - # Inject routing_token into metadata if present - current_metadata = list(metadata) if metadata else [] - if current_token: - current_metadata.append( - ( - "x-goog-request-params", - f"routing_token={current_token}", - ) - ) - - await self.read_obj_str.open( - metadata=current_metadata if current_metadata else None - ) - self._is_stream_open = True - - pending_read_ids = {r.read_id for r in requests} - - # Send Requests - for i in range( - 0, len(requests), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST - ): - batch = requests[i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST] - await self.read_obj_str.send( - _storage_v2.BidiReadObjectRequest(read_ranges=batch) - ) - - while pending_read_ids: - response = await self.read_obj_str.recv() - if response is None: - break - if response.object_data_ranges: - for data_range in response.object_data_ranges: - if data_range.range_end: - pending_read_ids.discard( - data_range.read_range.read_id - ) - yield response - - return generator() - - strategy = _ReadResumptionStrategy() - retry_manager = _BidiStreamRetryManager( - strategy, lambda r, s: send_ranges_and_get_bytes(r, s, metadata=metadata) - ) - - await retry_manager.execute(initial_state, retry_policy) - - if initial_state.get("read_handle"): - self.read_handle = initial_state["read_handle"] - - async def close(self): - """ - Closes the underlying bidi-gRPC connection. - """ - if not self._is_stream_open: - raise ValueError("Underlying bidi-gRPC stream is not open") - - if self.read_obj_str: - await self.read_obj_str.close() - self.read_obj_str = None - self._is_stream_open = False - - @property - def is_stream_open(self) -> bool: - return self._is_stream_open diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index b59c7d162..cb39386f2 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -1,198 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) +import warnings -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM(Technical Account Manager) -if you want to use these APIs. +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_read_object_stream import * # noqa -""" - -from typing import List, Optional, Tuple -from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( - _AsyncAbstractObjectStream, +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_read_object_stream has been moved to google.cloud.storage.asyncio.async_read_object_stream. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - -from google.api_core.bidi_async import AsyncBidiRpc - - -class _AsyncReadObjectStream(_AsyncAbstractObjectStream): - """Class representing a gRPC bidi-stream for reading data from a GCS ``Object``. - - This class provides a unix socket-like interface to a GCS ``Object``, with - methods like ``open``, ``close``, ``send``, and ``recv``. - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` - :param client: async grpc client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the GCS ``bucket`` containing the object. - - :type object_name: str - :param object_name: The name of the GCS ``object`` to be read. - - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific revision of - this object. - - :type read_handle: _storage_v2.BidiReadHandle - :param read_handle: (Optional) An existing handle for reading the object. - If provided, opening the bidi-gRPC connection will be faster. - """ - - def __init__( - self, - client: AsyncGrpcClient.grpc_client, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, - read_handle: Optional[_storage_v2.BidiReadHandle] = None, - ) -> None: - if client is None: - raise ValueError("client must be provided") - if bucket_name is None: - raise ValueError("bucket_name must be provided") - if object_name is None: - raise ValueError("object_name must be provided") - - super().__init__( - bucket_name=bucket_name, - object_name=object_name, - generation_number=generation_number, - ) - self.client: AsyncGrpcClient.grpc_client = client - self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle - - self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" - - self.rpc = self.client._client._transport._wrapped_methods[ - self.client._client._transport.bidi_read_object - ] - self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) - self.socket_like_rpc: Optional[AsyncBidiRpc] = None - self._is_stream_open: bool = False - self.persisted_size: Optional[int] = None - - async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: - """Opens the bidi-gRPC connection to read from the object. - - This method sends an initial request to start the stream and receives - the first response containing metadata and a read handle. - - Args: - metadata (Optional[List[Tuple[str, str]]]): Additional metadata - to send with the initial stream request, e.g., for routing tokens. - """ - if self._is_stream_open: - raise ValueError("Stream is already open") - - read_handle = self.read_handle if self.read_handle else None - - read_object_spec = _storage_v2.BidiReadObjectSpec( - bucket=self._full_bucket_name, - object=self.object_name, - generation=self.generation_number if self.generation_number else None, - read_handle=read_handle, - ) - self.first_bidi_read_req = _storage_v2.BidiReadObjectRequest( - read_object_spec=read_object_spec - ) - - # Build the x-goog-request-params header - request_params = [f"bucket={self._full_bucket_name}"] - other_metadata = [] - if metadata: - for key, value in metadata: - if key == "x-goog-request-params": - request_params.append(value) - else: - other_metadata.append((key, value)) - - current_metadata = other_metadata - current_metadata.append(("x-goog-request-params", ",".join(request_params))) - - self.socket_like_rpc = AsyncBidiRpc( - self.rpc, - initial_request=self.first_bidi_read_req, - metadata=current_metadata, - ) - await self.socket_like_rpc.open() # this is actually 1 send - response = await self.socket_like_rpc.recv() - # populated only in the first response of bidi-stream and when opened - # without using `read_handle` - if hasattr(response, "metadata") and response.metadata: - if self.generation_number is None: - self.generation_number = response.metadata.generation - # update persisted size - self.persisted_size = response.metadata.size - - if response and response.read_handle: - self.read_handle = response.read_handle - - self._is_stream_open = True - - async def close(self) -> None: - """Closes the bidi-gRPC connection.""" - if not self._is_stream_open: - raise ValueError("Stream is not open") - await self.requests_done() - await self.socket_like_rpc.close() - self._is_stream_open = False - - async def requests_done(self): - """Signals that all requests have been sent.""" - - await self.socket_like_rpc.send(None) - await self.socket_like_rpc.recv() - - async def send( - self, bidi_read_object_request: _storage_v2.BidiReadObjectRequest - ) -> None: - """Sends a request message on the stream. - - Args: - bidi_read_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): - The request message to send. This is typically used to specify - the read offset and limit. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open") - await self.socket_like_rpc.send(bidi_read_object_request) - - async def recv(self) -> _storage_v2.BidiReadObjectResponse: - """Receives a response from the stream. - - This method waits for the next message from the server, which could - contain object data or metadata. - - Returns: - :class:`~google.cloud._storage_v2.types.BidiReadObjectResponse`: - The response message from the server. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open") - response = await self.socket_like_rpc.recv() - # Update read_handle if present in response - if response and response.read_handle: - self.read_handle = response.read_handle - return response - - @property - def is_stream_open(self) -> bool: - return self._is_stream_open diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py index b73a43d1b..132e2c9d0 100644 --- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py @@ -1,236 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) +import warnings -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM(Technical Account Manager) -if you want to use these Rapid Storage APIs. +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_write_object_stream import * # noqa -""" -from typing import List, Optional, Tuple -from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio import _utils -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( - _AsyncAbstractObjectStream, +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_write_object_stream has been moved to google.cloud.storage.asyncio.async_write_object_stream. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -from google.api_core.bidi_async import AsyncBidiRpc - - -class _AsyncWriteObjectStream(_AsyncAbstractObjectStream): - """Class representing a gRPC bidi-stream for writing data from a GCS - ``Appendable Object``. - - This class provides a unix socket-like interface to a GCS ``Object``, with - methods like ``open``, ``close``, ``send``, and ``recv``. - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` - :param client: async grpc client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the GCS ``bucket`` containing the object. - - :type object_name: str - :param object_name: The name of the GCS ``Appendable Object`` to be write. - - :type generation_number: int - :param generation_number: (Optional) If present, creates writer for that - specific revision of that object. Use this to append data to an - existing Appendable Object. - - Setting to ``0`` makes the `writer.open()` succeed only if - object doesn't exist in the bucket (useful for not accidentally - overwriting existing objects). - - Warning: If `None`, a new object is created. If an object with the - same name already exists, it will be overwritten the moment - `writer.open()` is called. - - :type write_handle: _storage_v2.BidiWriteHandle - :param write_handle: (Optional) An existing handle for writing the object. - If provided, opening the bidi-gRPC connection will be faster. - """ - - def __init__( - self, - client: AsyncGrpcClient.grpc_client, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, # None means new object - write_handle: Optional[_storage_v2.BidiWriteHandle] = None, - routing_token: Optional[str] = None, - ) -> None: - if client is None: - raise ValueError("client must be provided") - if bucket_name is None: - raise ValueError("bucket_name must be provided") - if object_name is None: - raise ValueError("object_name must be provided") - - super().__init__( - bucket_name=bucket_name, - object_name=object_name, - generation_number=generation_number, - ) - self.client: AsyncGrpcClient.grpc_client = client - self.write_handle: Optional[_storage_v2.BidiWriteHandle] = write_handle - self.routing_token: Optional[str] = routing_token - - self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" - - self.rpc = self.client._client._transport._wrapped_methods[ - self.client._client._transport.bidi_write_object - ] - - self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) - self.socket_like_rpc: Optional[AsyncBidiRpc] = None - self._is_stream_open: bool = False - self.first_bidi_write_req = None - self.persisted_size = 0 - self.object_resource: Optional[_storage_v2.Object] = None - - async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: - """ - Opens the bidi-gRPC connection to write to the object. - - This method sends an initial request to start the stream and receives - the first response containing metadata and a write handle. - - :rtype: None - :raises ValueError: If the stream is already open. - :raises google.api_core.exceptions.FailedPrecondition: - if `generation_number` is 0 and object already exists. - """ - if self._is_stream_open: - raise ValueError("Stream is already open") - - # Create a new object or overwrite existing one if generation_number - # is None. This makes it consistent with GCS JSON API behavior. - # Created object type would be Appendable Object. - # if `generation_number` == 0 new object will be created only if there - # isn't any existing object. - if self.generation_number is None or self.generation_number == 0: - self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( - write_object_spec=_storage_v2.WriteObjectSpec( - resource=_storage_v2.Object( - name=self.object_name, bucket=self._full_bucket_name - ), - appendable=True, - if_generation_match=self.generation_number, - ), - ) - else: - self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( - append_object_spec=_storage_v2.AppendObjectSpec( - bucket=self._full_bucket_name, - object=self.object_name, - generation=self.generation_number, - write_handle=self.write_handle if self.write_handle else None, - routing_token=self.routing_token if self.routing_token else None, - ), - ) - - request_param_values = [f"bucket={self._full_bucket_name}"] - final_metadata = [] - if metadata: - for key, value in metadata: - if key == "x-goog-request-params": - request_param_values.append(value) - else: - final_metadata.append((key, value)) - - final_metadata.append(("x-goog-request-params", ",".join(request_param_values))) - - self.socket_like_rpc = AsyncBidiRpc( - self.rpc, - initial_request=self.first_bidi_write_req, - metadata=final_metadata, - ) - - await self.socket_like_rpc.open() # this is actually 1 send - response = await self.socket_like_rpc.recv() - self._is_stream_open = True - - if response.persisted_size: - self.persisted_size = response.persisted_size - - if response.resource: - if not response.resource.size: - # Appending to a 0 byte appendable object. - self.persisted_size = 0 - else: - self.persisted_size = response.resource.size - - self.generation_number = response.resource.generation - - if response.write_handle: - self.write_handle = response.write_handle - - async def close(self) -> None: - """Closes the bidi-gRPC connection.""" - if not self._is_stream_open: - raise ValueError("Stream is not open") - await self.requests_done() - await self.socket_like_rpc.close() - self._is_stream_open = False - - async def requests_done(self): - """Signals that all requests have been sent.""" - - await self.socket_like_rpc.send(None) - _utils.update_write_handle_if_exists(self, await self.socket_like_rpc.recv()) - - async def send( - self, bidi_write_object_request: _storage_v2.BidiWriteObjectRequest - ) -> None: - """Sends a request message on the stream. - - Args: - bidi_write_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): - The request message to send. This is typically used to specify - the read offset and limit. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open") - await self.socket_like_rpc.send(bidi_write_object_request) - - async def recv(self) -> _storage_v2.BidiWriteObjectResponse: - """Receives a response from the stream. - - This method waits for the next message from the server, which could - contain object data or metadata. - - Returns: - :class:`~google.cloud._storage_v2.types.BidiWriteObjectResponse`: - The response message from the server. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open") - response = await self.socket_like_rpc.recv() - # Update write_handle if present in response - if response: - if response.write_handle: - self.write_handle = response.write_handle - if response.persisted_size is not None: - self.persisted_size = response.persisted_size - if response.resource and response.resource.size: - self.persisted_size = response.resource.size - return response - - @property - def is_stream_open(self) -> bool: - return self._is_stream_open diff --git a/google/cloud/storage/_experimental/asyncio/retry/_helpers.py b/google/cloud/storage/_experimental/asyncio/retry/_helpers.py index d9ad2462e..092986f58 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/_helpers.py +++ b/google/cloud/storage/_experimental/asyncio/retry/_helpers.py @@ -1,125 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -from __future__ import annotations +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry._helpers import * # noqa -import logging -from typing import Tuple, Optional - -from google.api_core import exceptions -from google.cloud._storage_v2.types import ( - BidiReadObjectRedirectedError, - BidiWriteObjectRedirectedError, -) -from google.rpc import status_pb2 - -_BIDI_READ_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry._helpers has been moved to google.cloud.storage.asyncio.retry._helpers. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -_BIDI_WRITE_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" -) -logger = logging.getLogger(__name__) - - -def _handle_redirect( - exc: Exception, -) -> Tuple[Optional[str], Optional[bytes]]: - """ - Extracts routing token and read handle from a gRPC error. - - :type exc: Exception - :param exc: The exception to parse. - - :rtype: Tuple[Optional[str], Optional[bytes]] - :returns: A tuple of (routing_token, read_handle). - """ - routing_token = None - read_handle = None - - grpc_error = None - if isinstance(exc, exceptions.Aborted) and exc.errors: - grpc_error = exc.errors[0] - - if grpc_error: - if isinstance(grpc_error, BidiReadObjectRedirectedError): - routing_token = grpc_error.routing_token - if grpc_error.read_handle: - read_handle = grpc_error.read_handle - return routing_token, read_handle - - if hasattr(grpc_error, "trailing_metadata"): - trailers = grpc_error.trailing_metadata() - if not trailers: - return None, None - - status_details_bin = None - for key, value in trailers: - if key == "grpc-status-details-bin": - status_details_bin = value - break - - if status_details_bin: - status_proto = status_pb2.Status() - try: - status_proto.ParseFromString(status_details_bin) - for detail in status_proto.details: - if detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL: - redirect_proto = BidiReadObjectRedirectedError.deserialize( - detail.value - ) - if redirect_proto.routing_token: - routing_token = redirect_proto.routing_token - if redirect_proto.read_handle: - read_handle = redirect_proto.read_handle - break - except Exception as e: - logger.error(f"Error unpacking redirect: {e}") - - return routing_token, read_handle - - -def _extract_bidi_writes_redirect_proto(exc: Exception): - grpc_error = None - if isinstance(exc, exceptions.Aborted) and exc.errors: - grpc_error = exc.errors[0] - - if grpc_error: - if isinstance(grpc_error, BidiWriteObjectRedirectedError): - return grpc_error - - if hasattr(grpc_error, "trailing_metadata"): - trailers = grpc_error.trailing_metadata() - if not trailers: - return - - status_details_bin = None - for key, value in trailers: - if key == "grpc-status-details-bin": - status_details_bin = value - break - - if status_details_bin: - status_proto = status_pb2.Status() - try: - status_proto.ParseFromString(status_details_bin) - for detail in status_proto.details: - if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: - redirect_proto = BidiWriteObjectRedirectedError.deserialize( - detail.value - ) - return redirect_proto - except Exception: - logger.error("Error unpacking redirect details from gRPC error.") - pass diff --git a/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py index ff193f109..58c58136c 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py +++ b/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py @@ -1,83 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -import abc -from typing import Any, Iterable +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry.base_strategy import * # noqa - -class _BaseResumptionStrategy(abc.ABC): - """Abstract base class defining the interface for a bidi stream resumption strategy. - - This class defines the skeleton for a pluggable strategy that contains - all the service-specific logic for a given bidi operation (e.g., reads - or writes). This allows a generic retry manager to handle the common - retry loop while sending the state management and request generation - to a concrete implementation of this class. - """ - - @abc.abstractmethod - def generate_requests(self, state: Any) -> Iterable[Any]: - """Generates the next batch of requests based on the current state. - - This method is called at the beginning of each retry attempt. It should - inspect the provided state object and generate the appropriate list of - request protos to send to the server. For example, a read strategy - would use this to implement "Smarter Resumption" by creating smaller - `ReadRange` requests for partially downloaded ranges. For bidi-writes, - it will set the `write_offset` field to the persisted size received - from the server in the next request. - - :type state: Any - :param state: An object containing all the state needed for the - operation (e.g., requested ranges, user buffers, - bytes written). - """ - pass - - @abc.abstractmethod - def update_state_from_response(self, response: Any, state: Any) -> None: - """Updates the state based on a successful server response. - - This method is called for every message received from the server. It is - responsible for processing the response and updating the shared state - object. - - :type response: Any - :param response: The response message received from the server. - - :type state: Any - :param state: The shared state object for the operation, which will be - mutated by this method. - """ - pass - - @abc.abstractmethod - async def recover_state_on_failure(self, error: Exception, state: Any) -> None: - """Prepares the state for the next retry attempt after a failure. - - This method is called when a retriable gRPC error occurs. It is - responsible for performing any necessary actions to ensure the next - retry attempt can succeed. For bidi reads, its primary role is to - handle the `BidiReadObjectRedirectError` by extracting the - `routing_token` and updating the state. For bidi writes, it will update - the state to reflect any bytes that were successfully persisted before - the failure. - - :type error: :class:`Exception` - :param error: The exception that was caught by the retry engine. - - :type state: Any - :param state: The shared state object for the operation. - """ - pass +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry.base_strategy has been moved to google.cloud.storage.asyncio.retry.base_strategy. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, +) diff --git a/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py b/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py index a8caae4eb..331ee1326 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py +++ b/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py @@ -1,69 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -import logging -from typing import Any, AsyncIterator, Callable +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry.bidi_stream_retry_manager import * # noqa -from google.cloud.storage._experimental.asyncio.retry.base_strategy import ( - _BaseResumptionStrategy, +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry.bidi_stream_retry_manager has been moved to google.cloud.storage.asyncio.retry.bidi_stream_retry_manager. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - -logger = logging.getLogger(__name__) - - -class _BidiStreamRetryManager: - """Manages the generic retry loop for a bidi streaming operation.""" - - def __init__( - self, - strategy: _BaseResumptionStrategy, - send_and_recv: Callable[..., AsyncIterator[Any]], - ): - """Initializes the retry manager. - Args: - strategy: The strategy for managing the state of a specific - bidi operation (e.g., reads or writes). - send_and_recv: An async callable that opens a new gRPC stream. - """ - self._strategy = strategy - self._send_and_recv = send_and_recv - - async def execute(self, initial_state: Any, retry_policy): - """ - Executes the bidi operation with the configured retry policy. - Args: - initial_state: An object containing all state for the operation. - retry_policy: The `google.api_core.retry.AsyncRetry` object to - govern the retry behavior for this specific operation. - """ - state = initial_state - - async def attempt(): - requests = self._strategy.generate_requests(state) - stream = self._send_and_recv(requests, state) - try: - async for response in stream: - self._strategy.update_state_from_response(response, state) - return - except Exception as e: - if retry_policy._predicate(e): - logger.info( - f"Bidi stream operation failed: {e}. Attempting state recovery and retry." - ) - await self._strategy.recover_state_on_failure(e, state) - raise e - - wrapped_attempt = retry_policy(attempt) - - await wrapped_attempt() diff --git a/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py index 916b82e6e..8f7051b6a 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py +++ b/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py @@ -1,157 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -from typing import Any, Dict, List, IO -import logging +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry.reads_resumption_strategy import * # noqa -from google_crc32c import Checksum -from google.cloud import _storage_v2 as storage_v2 -from google.cloud.storage.exceptions import DataCorruption -from google.cloud.storage._experimental.asyncio.retry._helpers import ( - _handle_redirect, +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy has been moved to google.cloud.storage.asyncio.retry.reads_resumption_strategy. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -from google.cloud.storage._experimental.asyncio.retry.base_strategy import ( - _BaseResumptionStrategy, -) - - -_BIDI_READ_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" -) -logger = logging.getLogger(__name__) - - -class _DownloadState: - """A helper class to track the state of a single range download.""" - - def __init__( - self, initial_offset: int, initial_length: int, user_buffer: IO[bytes] - ): - self.initial_offset = initial_offset - self.initial_length = initial_length - self.user_buffer = user_buffer - self.bytes_written = 0 - self.next_expected_offset = initial_offset - self.is_complete = False - - -class _ReadResumptionStrategy(_BaseResumptionStrategy): - """The concrete resumption strategy for bidi reads.""" - - def generate_requests(self, state: Dict[str, Any]) -> List[storage_v2.ReadRange]: - """Generates new ReadRange requests for all incomplete downloads. - - :type state: dict - :param state: A dictionary mapping a read_id to its corresponding - _DownloadState object. - """ - pending_requests = [] - download_states: Dict[int, _DownloadState] = state["download_states"] - - for read_id, read_state in download_states.items(): - if not read_state.is_complete: - new_offset = read_state.initial_offset + read_state.bytes_written - - # Calculate remaining length. If initial_length is 0 (read to end), - # it stays 0. Otherwise, subtract bytes_written. - new_length = 0 - if read_state.initial_length > 0: - new_length = read_state.initial_length - read_state.bytes_written - - new_request = storage_v2.ReadRange( - read_offset=new_offset, - read_length=new_length, - read_id=read_id, - ) - pending_requests.append(new_request) - return pending_requests - - def update_state_from_response( - self, response: storage_v2.BidiReadObjectResponse, state: Dict[str, Any] - ) -> None: - """Processes a server response, performs integrity checks, and updates state.""" - - # Capture read_handle if provided. - if response.read_handle: - state["read_handle"] = response.read_handle - - download_states = state["download_states"] - - for object_data_range in response.object_data_ranges: - # Ignore empty ranges or ranges for IDs not in our state - # (e.g., from a previously cancelled request on the same stream). - if not object_data_range.read_range: - logger.warning( - "Received response with missing read_range field; ignoring." - ) - continue - - read_id = object_data_range.read_range.read_id - if read_id not in download_states: - logger.warning( - f"Received data for unknown or stale read_id {read_id}; ignoring." - ) - continue - - read_state = download_states[read_id] - - # Offset Verification - chunk_offset = object_data_range.read_range.read_offset - if chunk_offset != read_state.next_expected_offset: - raise DataCorruption( - response, - f"Offset mismatch for read_id {read_id}. " - f"Expected {read_state.next_expected_offset}, got {chunk_offset}", - ) - - # Checksum Verification - # We must validate data before updating state or writing to buffer. - data = object_data_range.checksummed_data.content - server_checksum = object_data_range.checksummed_data.crc32c - - if server_checksum is not None: - client_checksum = int.from_bytes(Checksum(data).digest(), "big") - if server_checksum != client_checksum: - raise DataCorruption( - response, - f"Checksum mismatch for read_id {read_id}. " - f"Server sent {server_checksum}, client calculated {client_checksum}.", - ) - - # Update State & Write Data - chunk_size = len(data) - read_state.user_buffer.write(data) - read_state.bytes_written += chunk_size - read_state.next_expected_offset += chunk_size - - # Final Byte Count Verification - if object_data_range.range_end: - read_state.is_complete = True - if ( - read_state.initial_length != 0 - and read_state.bytes_written > read_state.initial_length - ): - raise DataCorruption( - response, - f"Byte count mismatch for read_id {read_id}. " - f"Expected {read_state.initial_length}, got {read_state.bytes_written}", - ) - - async def recover_state_on_failure(self, error: Exception, state: Any) -> None: - """Handles BidiReadObjectRedirectedError for reads.""" - routing_token, read_handle = _handle_redirect(error) - if routing_token: - state["routing_token"] = routing_token - if read_handle: - state["read_handle"] = read_handle diff --git a/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py index 09b22cb8e..7d2493841 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py +++ b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py @@ -1,147 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -from typing import Any, Dict, IO, List, Optional, Union +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry.writes_resumption_strategy import * # noqa -import google_crc32c -from google.cloud._storage_v2.types import storage as storage_type -from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError -from google.cloud.storage._experimental.asyncio.retry.base_strategy import ( - _BaseResumptionStrategy, +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy has been moved to google.cloud.storage.asyncio.retry.writes_resumption_strategy. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -from google.cloud.storage._experimental.asyncio.retry._helpers import ( - _extract_bidi_writes_redirect_proto, -) - - -class _WriteState: - """A helper class to track the state of a single upload operation. - - :type chunk_size: int - :param chunk_size: The size of chunks to write to the server. - - :type user_buffer: IO[bytes] - :param user_buffer: The data source. - - :type flush_interval: int - :param flush_interval: The flush interval at which the data is flushed. - """ - - def __init__( - self, - chunk_size: int, - user_buffer: IO[bytes], - flush_interval: int, - ): - self.chunk_size = chunk_size - self.user_buffer = user_buffer - self.persisted_size: int = 0 - self.bytes_sent: int = 0 - self.bytes_since_last_flush: int = 0 - self.flush_interval: int = flush_interval - self.write_handle: Union[bytes, storage_type.BidiWriteHandle, None] = None - self.routing_token: Optional[str] = None - self.is_finalized: bool = False - - -class _WriteResumptionStrategy(_BaseResumptionStrategy): - """The concrete resumption strategy for bidi writes.""" - - def generate_requests( - self, state: Dict[str, Any] - ) -> List[storage_type.BidiWriteObjectRequest]: - """Generates BidiWriteObjectRequests to resume or continue the upload. - - This method is not applicable for `open` methods. - """ - write_state: _WriteState = state["write_state"] - - requests = [] - # The buffer should already be seeked to the correct position (persisted_size) - # by the `recover_state_on_failure` method before this is called. - while not write_state.is_finalized: - chunk = write_state.user_buffer.read(write_state.chunk_size) - - # End of File detection - if not chunk: - break - - checksummed_data = storage_type.ChecksummedData(content=chunk) - checksum = google_crc32c.Checksum(chunk) - checksummed_data.crc32c = int.from_bytes(checksum.digest(), "big") - - request = storage_type.BidiWriteObjectRequest( - write_offset=write_state.bytes_sent, - checksummed_data=checksummed_data, - ) - chunk_len = len(chunk) - write_state.bytes_sent += chunk_len - write_state.bytes_since_last_flush += chunk_len - - if write_state.bytes_since_last_flush >= write_state.flush_interval: - request.flush = True - # reset counter after marking flush - write_state.bytes_since_last_flush = 0 - - requests.append(request) - return requests - - def update_state_from_response( - self, response: storage_type.BidiWriteObjectResponse, state: Dict[str, Any] - ) -> None: - """Processes a server response and updates the write state.""" - write_state: _WriteState = state["write_state"] - if response is None: - return - if response.persisted_size: - write_state.persisted_size = response.persisted_size - - if response.write_handle: - write_state.write_handle = response.write_handle - - if response.resource: - write_state.persisted_size = response.resource.size - if response.resource.finalize_time: - write_state.is_finalized = True - - async def recover_state_on_failure( - self, error: Exception, state: Dict[str, Any] - ) -> None: - """ - Handles errors, specifically BidiWriteObjectRedirectedError, and rewinds state. - - This method rewinds the user buffer and internal byte tracking to the - last confirmed 'persisted_size' from the server. - """ - write_state: _WriteState = state["write_state"] - - redirect_proto = None - - if isinstance(error, BidiWriteObjectRedirectedError): - redirect_proto = error - else: - redirect_proto = _extract_bidi_writes_redirect_proto(error) - - # Extract routing token and potentially a new write handle for redirection. - if redirect_proto: - if redirect_proto.routing_token: - write_state.routing_token = redirect_proto.routing_token - if redirect_proto.write_handle: - write_state.write_handle = redirect_proto.write_handle - - # We must assume any data sent beyond 'persisted_size' was lost. - # Reset the user buffer to the last known good byte confirmed by the server. - write_state.user_buffer.seek(write_state.persisted_size) - write_state.bytes_sent = write_state.persisted_size - write_state.bytes_since_last_flush = 0 diff --git a/google/cloud/storage/_experimental/grpc_client.py b/google/cloud/storage/_experimental/grpc_client.py index 7a739b7b7..99ecbe044 100644 --- a/google/cloud/storage/_experimental/grpc_client.py +++ b/google/cloud/storage/_experimental/grpc_client.py @@ -1,122 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -"""A client for interacting with Google Cloud Storage using the gRPC API.""" +# Import everything from the new stable module +from google.cloud.storage.grpc_client import * # noqa -from google.cloud.client import ClientWithProject -from google.cloud import _storage_v2 as storage_v2 - -_marker = object() - - -class GrpcClient(ClientWithProject): - """A client for interacting with Google Cloud Storage using the gRPC API. - - :type project: str or None - :param project: The project which the client acts on behalf of. If not - passed, falls back to the default inferred from the - environment. - - :type credentials: :class:`~google.auth.credentials.Credentials` - :param credentials: (Optional) The OAuth2 Credentials to use for this - client. If not passed, falls back to the default - inferred from the environment. - - :type client_info: :class:`~google.api_core.client_info.ClientInfo` - :param client_info: - The client info used to send a user-agent string along with API - requests. If ``None``, then default info will be used. Generally, - you only need to set this if you're developing your own library - or partner tool. - - :type client_options: :class:`~google.api_core.client_options.ClientOptions` or :class:`dict` - :param client_options: (Optional) Client options used to set user options - on the client. A non-default universe domain or API endpoint should be - set through client_options. - - :type api_key: string - :param api_key: - (Optional) An API key. Mutually exclusive with any other credentials. - This parameter is an alias for setting `client_options.api_key` and - will supersede any API key set in the `client_options` parameter. - - :type attempt_direct_path: bool - :param attempt_direct_path: - (Optional) Whether to attempt to use DirectPath for gRPC connections. - This provides a direct, unproxied connection to GCS for lower latency - and higher throughput, and is highly recommended when running on Google - Cloud infrastructure. Defaults to ``True``. - """ - - def __init__( - self, - project=_marker, - credentials=None, - client_info=None, - client_options=None, - *, - api_key=None, - attempt_direct_path=True, - ): - super(GrpcClient, self).__init__(project=project, credentials=credentials) - - if isinstance(client_options, dict): - if api_key: - client_options["api_key"] = api_key - elif client_options is None: - client_options = {} if not api_key else {"api_key": api_key} - elif api_key: - client_options.api_key = api_key - - self._grpc_client = self._create_gapic_client( - credentials=credentials, - client_info=client_info, - client_options=client_options, - attempt_direct_path=attempt_direct_path, - ) - - def _create_gapic_client( - self, - credentials=None, - client_info=None, - client_options=None, - attempt_direct_path=True, - ): - """Creates and configures the low-level GAPIC `storage_v2` client.""" - transport_cls = storage_v2.StorageClient.get_transport_class("grpc") - - channel = transport_cls.create_channel(attempt_direct_path=attempt_direct_path) - - transport = transport_cls(credentials=credentials, channel=channel) - - return storage_v2.StorageClient( - credentials=credentials, - transport=transport, - client_info=client_info, - client_options=client_options, - ) - - @property - def grpc_client(self): - """The underlying gRPC client. - - This property gives users direct access to the `storage_v2.StorageClient` - instance. This can be useful for accessing - newly added or experimental RPCs that are not yet exposed through - the high-level GrpcClient. - - Returns: - google.cloud.storage_v2.StorageClient: The configured GAPIC client. - """ - return self._grpc_client +warnings.warn( + "google.cloud.storage._experimental.grpc_client has been moved to google.cloud.storage.grpc_client. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, +) diff --git a/google/cloud/storage/asyncio/_utils.py b/google/cloud/storage/asyncio/_utils.py new file mode 100644 index 000000000..170a0cfae --- /dev/null +++ b/google/cloud/storage/asyncio/_utils.py @@ -0,0 +1,41 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import google_crc32c + +from google.api_core import exceptions + + +def raise_if_no_fast_crc32c(): + """Check if the C-accelerated version of google-crc32c is available. + + If not, raise an error to prevent silent performance degradation. + + raises google.api_core.exceptions.FailedPrecondition: If the C extension is not available. + returns: True if the C extension is available. + rtype: bool + + """ + if google_crc32c.implementation != "c": + raise exceptions.FailedPrecondition( + "The google-crc32c package is not installed with C support. " + "C extension is required for faster data integrity checks." + "For more information, see https://github.com/googleapis/python-crc32c." + ) + + +def update_write_handle_if_exists(obj, response): + """Update the write_handle attribute of an object if it exists in the response.""" + if hasattr(response, "write_handle") and response.write_handle is not None: + obj.write_handle = response.write_handle diff --git a/google/cloud/storage/asyncio/async_abstract_object_stream.py b/google/cloud/storage/asyncio/async_abstract_object_stream.py new file mode 100644 index 000000000..26cbab7a0 --- /dev/null +++ b/google/cloud/storage/asyncio/async_abstract_object_stream.py @@ -0,0 +1,67 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +from typing import Any, Optional + + +class _AsyncAbstractObjectStream(abc.ABC): + """Abstract base class to represent gRPC bidi-stream for GCS ``Object``. + + Concrete implementation of this class could be ``_AsyncReadObjectStream`` + or ``_AsyncWriteObjectStream``. + + :type bucket_name: str + :param bucket_name: (Optional) The name of the bucket containing the object. + + :type object_name: str + :param object_name: (Optional) The name of the object. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. + + :type handle: Any + :param handle: (Optional) The handle for the object, could be read_handle or + write_handle, based on how the stream is used. + """ + + def __init__( + self, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + handle: Optional[Any] = None, + ) -> None: + super().__init__() + self.bucket_name: str = bucket_name + self.object_name: str = object_name + self.generation_number: Optional[int] = generation_number + self.handle: Optional[Any] = handle + + @abc.abstractmethod + async def open(self) -> None: + pass + + @abc.abstractmethod + async def close(self) -> None: + pass + + @abc.abstractmethod + async def send(self, protobuf: Any) -> None: + pass + + @abc.abstractmethod + async def recv(self) -> Any: + pass diff --git a/google/cloud/storage/asyncio/async_appendable_object_writer.py b/google/cloud/storage/asyncio/async_appendable_object_writer.py new file mode 100644 index 000000000..5505ce390 --- /dev/null +++ b/google/cloud/storage/asyncio/async_appendable_object_writer.py @@ -0,0 +1,595 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +NOTE: +This is _experimental module for upcoming support for Rapid Storage. +(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) + +APIs may not work as intended and are not stable yet. Feature is not +GA(Generally Available) yet, please contact your TAM (Technical Account Manager) +if you want to use these Rapid Storage APIs. + +""" +from io import BufferedReader +import io +import logging +from typing import List, Optional, Tuple, Union + +from google.api_core import exceptions +from google.api_core.retry_async import AsyncRetry +from google.rpc import status_pb2 +from google.cloud._storage_v2.types import BidiWriteObjectRedirectedError +from google.cloud._storage_v2.types.storage import BidiWriteObjectRequest + + +from . import _utils +from google.cloud import _storage_v2 +from google.cloud.storage.asyncio.async_grpc_client import ( + AsyncGrpcClient, +) +from google.cloud.storage.asyncio.async_write_object_stream import ( + _AsyncWriteObjectStream, +) +from google.cloud.storage.asyncio.retry.bidi_stream_retry_manager import ( + _BidiStreamRetryManager, +) +from google.cloud.storage.asyncio.retry.writes_resumption_strategy import ( + _WriteResumptionStrategy, + _WriteState, +) +from google.cloud.storage.asyncio.retry._helpers import ( + _extract_bidi_writes_redirect_proto, +) + + +_MAX_CHUNK_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB +_DEFAULT_FLUSH_INTERVAL_BYTES = 16 * 1024 * 1024 # 16 MiB +_BIDI_WRITE_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" +) +logger = logging.getLogger(__name__) + + +def _is_write_retryable(exc): + """Predicate to determine if a write operation should be retried.""" + + if isinstance( + exc, + ( + exceptions.InternalServerError, + exceptions.ServiceUnavailable, + exceptions.DeadlineExceeded, + exceptions.TooManyRequests, + BidiWriteObjectRedirectedError, + ), + ): + logger.warning(f"Retryable write exception encountered: {exc}") + return True + + grpc_error = None + if isinstance(exc, exceptions.Aborted) and exc.errors: + grpc_error = exc.errors[0] + if isinstance(grpc_error, BidiWriteObjectRedirectedError): + return True + + trailers = grpc_error.trailing_metadata() + if not trailers: + return False + + status_details_bin = None + for key, value in trailers: + if key == "grpc-status-details-bin": + status_details_bin = value + break + + if status_details_bin: + status_proto = status_pb2.Status() + try: + status_proto.ParseFromString(status_details_bin) + for detail in status_proto.details: + if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: + return True + except Exception: + logger.error( + "Error unpacking redirect details from gRPC error. Exception: ", + {exc}, + ) + return False + return False + + +class AsyncAppendableObjectWriter: + """Class for appending data to a GCS Appendable Object asynchronously.""" + + def __init__( + self, + client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + generation: Optional[int] = None, + write_handle: Optional[_storage_v2.BidiWriteHandle] = None, + writer_options: Optional[dict] = None, + ): + """ + Class for appending data to a GCS Appendable Object. + + Example usage: + + ``` + + from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient + from google.cloud.storage.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter + import asyncio + + client = AsyncGrpcClient().grpc_client + bucket_name = "my-bucket" + object_name = "my-appendable-object" + + # instantiate the writer + writer = AsyncAppendableObjectWriter(client, bucket_name, object_name) + # open the writer, (underlying gRPC bidi-stream will be opened) + await writer.open() + + # append data, it can be called multiple times. + await writer.append(b"hello world") + await writer.append(b"some more data") + + # optionally flush data to persist. + await writer.flush() + + # close the gRPC stream. + # Please note closing the program will also close the stream, + # however it's recommended to close the stream if no more data to append + # to clean up gRPC connection (which means CPU/memory/network resources) + await writer.close() + ``` + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient` + :param client: async grpc client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the GCS bucket containing the object. + + :type object_name: str + :param object_name: The name of the GCS Appendable Object to be written. + + :type generation: Optional[int] + :param generation: (Optional) If present, creates writer for that + specific revision of that object. Use this to append data to an + existing Appendable Object. + + Setting to ``0`` makes the `writer.open()` succeed only if + object doesn't exist in the bucket (useful for not accidentally + overwriting existing objects). + + Warning: If `None`, a new object is created. If an object with the + same name already exists, it will be overwritten the moment + `writer.open()` is called. + + :type write_handle: _storage_v2.BidiWriteHandle + :param write_handle: (Optional) An handle for writing the object. + If provided, opening the bidi-gRPC connection will be faster. + + :type writer_options: dict + :param writer_options: (Optional) A dictionary of writer options. + Supported options: + - "FLUSH_INTERVAL_BYTES": int + The number of bytes to append before "persisting" data in GCS + servers. Default is `_DEFAULT_FLUSH_INTERVAL_BYTES`. + Must be a multiple of `_MAX_CHUNK_SIZE_BYTES`. + """ + _utils.raise_if_no_fast_crc32c() + self.client = client + self.bucket_name = bucket_name + self.object_name = object_name + self.write_handle = write_handle + self.generation = generation + + self.write_obj_stream: Optional[_AsyncWriteObjectStream] = None + self._is_stream_open: bool = False + # `offset` is the latest size of the object without staleless. + self.offset: Optional[int] = None + # `persisted_size` is the total_bytes persisted in the GCS server. + # Please note: `offset` and `persisted_size` are same when the stream is + # opened. + self.persisted_size: Optional[int] = None + if writer_options is None: + writer_options = {} + self.flush_interval = writer_options.get( + "FLUSH_INTERVAL_BYTES", _DEFAULT_FLUSH_INTERVAL_BYTES + ) + if self.flush_interval < _MAX_CHUNK_SIZE_BYTES: + raise exceptions.OutOfRange( + f"flush_interval must be >= {_MAX_CHUNK_SIZE_BYTES} , but provided {self.flush_interval}" + ) + if self.flush_interval % _MAX_CHUNK_SIZE_BYTES != 0: + raise exceptions.OutOfRange( + f"flush_interval must be a multiple of {_MAX_CHUNK_SIZE_BYTES}, but provided {self.flush_interval}" + ) + self.bytes_appended_since_last_flush = 0 + self._routing_token: Optional[str] = None + self.object_resource: Optional[_storage_v2.Object] = None + + async def state_lookup(self) -> int: + """Returns the persisted_size + + :rtype: int + :returns: persisted size. + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before state_lookup().") + + await self.write_obj_stream.send( + _storage_v2.BidiWriteObjectRequest( + state_lookup=True, + ) + ) + response = await self.write_obj_stream.recv() + self.persisted_size = response.persisted_size + return self.persisted_size + + def _on_open_error(self, exc): + """Extracts routing token and write handle on redirect error during open.""" + redirect_proto = _extract_bidi_writes_redirect_proto(exc) + if redirect_proto: + if redirect_proto.routing_token: + self._routing_token = redirect_proto.routing_token + if redirect_proto.write_handle: + self.write_handle = redirect_proto.write_handle + if redirect_proto.generation: + self.generation = redirect_proto.generation + + async def open( + self, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Opens the underlying bidi-gRPC stream. + + :raises ValueError: If the stream is already open. + + """ + if self._is_stream_open: + raise ValueError("Underlying bidi-gRPC stream is already open") + + if retry_policy is None: + retry_policy = AsyncRetry( + predicate=_is_write_retryable, on_error=self._on_open_error + ) + else: + original_on_error = retry_policy._on_error + + def combined_on_error(exc): + self._on_open_error(exc) + if original_on_error: + original_on_error(exc) + + retry_policy = AsyncRetry( + predicate=_is_write_retryable, + initial=retry_policy._initial, + maximum=retry_policy._maximum, + multiplier=retry_policy._multiplier, + deadline=retry_policy._deadline, + on_error=combined_on_error, + ) + + async def _do_open(): + current_metadata = list(metadata) if metadata else [] + + # Cleanup stream from previous failed attempt, if any. + if self.write_obj_stream: + if self.write_obj_stream.is_stream_open: + try: + await self.write_obj_stream.close() + except Exception as e: + logger.warning( + "Error closing previous write stream during open retry. Got exception: ", + {e}, + ) + self.write_obj_stream = None + self._is_stream_open = False + + self.write_obj_stream = _AsyncWriteObjectStream( + client=self.client.grpc_client, + bucket_name=self.bucket_name, + object_name=self.object_name, + generation_number=self.generation, + write_handle=self.write_handle, + routing_token=self._routing_token, + ) + + if self._routing_token: + current_metadata.append( + ("x-goog-request-params", f"routing_token={self._routing_token}") + ) + + await self.write_obj_stream.open( + metadata=current_metadata if metadata else None + ) + + if self.write_obj_stream.generation_number: + self.generation = self.write_obj_stream.generation_number + if self.write_obj_stream.write_handle: + self.write_handle = self.write_obj_stream.write_handle + if self.write_obj_stream.persisted_size is not None: + self.persisted_size = self.write_obj_stream.persisted_size + + self._is_stream_open = True + self._routing_token = None + + await retry_policy(_do_open)() + + async def append( + self, + data: bytes, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Appends data to the Appendable object with automatic retries. + + calling `self.append` will append bytes at the end of the current size + ie. `self.offset` bytes relative to the begining of the object. + + This method sends the provided `data` to the GCS server in chunks. + and persists data in GCS at every `_DEFAULT_FLUSH_INTERVAL_BYTES` bytes + or at the last chunk whichever is earlier. Persisting is done by setting + `flush=True` on request. + + :type data: bytes + :param data: The bytes to append to the object. + + :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` + :param retry_policy: (Optional) The retry policy to use for the operation. + + :type metadata: List[Tuple[str, str]] + :param metadata: (Optional) The metadata to be sent with the request. + + :raises ValueError: If the stream is not open. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before append().") + if not data: + logger.debug("No data provided to append; returning without action.") + return + + if retry_policy is None: + retry_policy = AsyncRetry(predicate=_is_write_retryable) + + strategy = _WriteResumptionStrategy() + buffer = io.BytesIO(data) + attempt_count = 0 + + def send_and_recv_generator( + requests: List[BidiWriteObjectRequest], + state: dict[str, _WriteState], + metadata: Optional[List[Tuple[str, str]]] = None, + ): + async def generator(): + nonlocal attempt_count + nonlocal requests + attempt_count += 1 + resp = None + write_state = state["write_state"] + # If this is a retry or redirect, we must re-open the stream + if attempt_count > 1 or write_state.routing_token: + logger.info( + f"Re-opening the stream with attempt_count: {attempt_count}" + ) + if self.write_obj_stream and self.write_obj_stream.is_stream_open: + await self.write_obj_stream.close() + + current_metadata = list(metadata) if metadata else [] + if write_state.routing_token: + current_metadata.append( + ( + "x-goog-request-params", + f"routing_token={write_state.routing_token}", + ) + ) + self._routing_token = write_state.routing_token + + self._is_stream_open = False + await self.open(metadata=current_metadata) + + write_state.persisted_size = self.persisted_size + write_state.write_handle = self.write_handle + write_state.routing_token = None + + write_state.user_buffer.seek(write_state.persisted_size) + write_state.bytes_sent = write_state.persisted_size + write_state.bytes_since_last_flush = 0 + + requests = strategy.generate_requests(state) + + num_requests = len(requests) + for i, chunk_req in enumerate(requests): + if i == num_requests - 1: + chunk_req.state_lookup = True + chunk_req.flush = True + await self.write_obj_stream.send(chunk_req) + + resp = await self.write_obj_stream.recv() + if resp: + if resp.persisted_size is not None: + self.persisted_size = resp.persisted_size + state["write_state"].persisted_size = resp.persisted_size + self.offset = self.persisted_size + if resp.write_handle: + self.write_handle = resp.write_handle + state["write_state"].write_handle = resp.write_handle + self.bytes_appended_since_last_flush = 0 + + yield resp + + return generator() + + # State initialization + write_state = _WriteState(_MAX_CHUNK_SIZE_BYTES, buffer, self.flush_interval) + write_state.write_handle = self.write_handle + write_state.persisted_size = self.persisted_size + write_state.bytes_sent = self.persisted_size + write_state.bytes_since_last_flush = self.bytes_appended_since_last_flush + + retry_manager = _BidiStreamRetryManager( + _WriteResumptionStrategy(), + lambda r, s: send_and_recv_generator(r, s, metadata), + ) + await retry_manager.execute({"write_state": write_state}, retry_policy) + + # Sync local markers + self.write_obj_stream.persisted_size = write_state.persisted_size + self.write_obj_stream.write_handle = write_state.write_handle + self.bytes_appended_since_last_flush = write_state.bytes_since_last_flush + self.persisted_size = write_state.persisted_size + self.offset = write_state.persisted_size + + async def simple_flush(self) -> None: + """Flushes the data to the server. + Please note: Unlike `flush` it does not do `state_lookup` + + :rtype: None + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before simple_flush().") + + await self.write_obj_stream.send( + _storage_v2.BidiWriteObjectRequest( + flush=True, + ) + ) + self.bytes_appended_since_last_flush = 0 + + async def flush(self) -> int: + """Flushes the data to the server. + + :rtype: int + :returns: The persisted size after flush. + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before flush().") + + await self.write_obj_stream.send( + _storage_v2.BidiWriteObjectRequest( + flush=True, + state_lookup=True, + ) + ) + response = await self.write_obj_stream.recv() + self.persisted_size = response.persisted_size + self.offset = self.persisted_size + self.bytes_appended_since_last_flush = 0 + return self.persisted_size + + async def close(self, finalize_on_close=False) -> Union[int, _storage_v2.Object]: + """Closes the underlying bidi-gRPC stream. + + :type finalize_on_close: bool + :param finalize_on_close: Finalizes the Appendable Object. No more data + can be appended. + + rtype: Union[int, _storage_v2.Object] + returns: Updated `self.persisted_size` by default after closing the + bidi-gRPC stream. However, if `finalize_on_close=True` is passed, + returns the finalized object resource. + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before close().") + + if finalize_on_close: + return await self.finalize() + + await self.write_obj_stream.close() + + self._is_stream_open = False + self.offset = None + return self.persisted_size + + async def finalize(self) -> _storage_v2.Object: + """Finalizes the Appendable Object. + + Note: Once finalized no more data can be appended. + This method is different from `close`. if `.close()` is called data may + still be appended to object at a later point in time by opening with + generation number. + (i.e. `open(..., generation=)`. + However if `.finalize()` is called no more data can be appended to the + object. + + rtype: google.cloud.storage_v2.types.Object + returns: The finalized object resource. + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before finalize().") + + await self.write_obj_stream.send( + _storage_v2.BidiWriteObjectRequest(finish_write=True) + ) + response = await self.write_obj_stream.recv() + self.object_resource = response.resource + self.persisted_size = self.object_resource.size + await self.write_obj_stream.close() + + self._is_stream_open = False + self.offset = None + return self.object_resource + + @property + def is_stream_open(self) -> bool: + return self._is_stream_open + + # helper methods. + async def append_from_string(self, data: str): + """ + str data will be encoded to bytes using utf-8 encoding calling + + self.append(data.encode("utf-8")) + """ + raise NotImplementedError("append_from_string is not implemented yet.") + + async def append_from_stream(self, stream_obj): + """ + At a time read a chunk of data (16MiB) from `stream_obj` + and call self.append(chunk) + """ + raise NotImplementedError("append_from_stream is not implemented yet.") + + async def append_from_file( + self, file_obj: BufferedReader, block_size: int = _DEFAULT_FLUSH_INTERVAL_BYTES + ): + """ + Appends data to an Appendable Object using file_handle which is opened + for reading in binary mode. + + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + """ + while block := file_obj.read(block_size): + await self.append(block) diff --git a/google/cloud/storage/asyncio/async_grpc_client.py b/google/cloud/storage/asyncio/async_grpc_client.py new file mode 100644 index 000000000..e985f2252 --- /dev/null +++ b/google/cloud/storage/asyncio/async_grpc_client.py @@ -0,0 +1,100 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An async client for interacting with Google Cloud Storage using the gRPC API.""" + +from google.cloud import _storage_v2 as storage_v2 +from google.cloud._storage_v2.services.storage.transports.base import ( + DEFAULT_CLIENT_INFO, +) + + +class AsyncGrpcClient: + """An asynchronous client for interacting with Google Cloud Storage using the gRPC API. + + :type credentials: :class:`~google.auth.credentials.Credentials` + :param credentials: (Optional) The OAuth2 Credentials to use for this + client. If not passed, falls back to the default + inferred from the environment. + + :type client_info: :class:`~google.api_core.client_info.ClientInfo` + :param client_info: + The client info used to send a user-agent string along with API + requests. If ``None``, then default info will be used. + + :type client_options: :class:`~google.api_core.client_options.ClientOptions` + :param client_options: (Optional) Client options used to set user options + on the client. + + :type attempt_direct_path: bool + :param attempt_direct_path: + (Optional) Whether to attempt to use DirectPath for gRPC connections. + Defaults to ``True``. + """ + + def __init__( + self, + credentials=None, + client_info=None, + client_options=None, + *, + attempt_direct_path=True, + ): + self._grpc_client = self._create_async_grpc_client( + credentials=credentials, + client_info=client_info, + client_options=client_options, + attempt_direct_path=attempt_direct_path, + ) + + def _create_async_grpc_client( + self, + credentials=None, + client_info=None, + client_options=None, + attempt_direct_path=True, + ): + transport_cls = storage_v2.StorageAsyncClient.get_transport_class( + "grpc_asyncio" + ) + + if client_info is None: + client_info = DEFAULT_CLIENT_INFO + primary_user_agent = client_info.to_user_agent() + + channel = transport_cls.create_channel( + attempt_direct_path=attempt_direct_path, + credentials=credentials, + options=(("grpc.primary_user_agent", primary_user_agent),), + ) + transport = transport_cls(channel=channel) + + return storage_v2.StorageAsyncClient( + transport=transport, + client_info=client_info, + client_options=client_options, + ) + + @property + def grpc_client(self): + """The underlying gRPC client. + + This property gives users direct access to the `_storage_v2.StorageAsyncClient` + instance. This can be useful for accessing + newly added or experimental RPCs that are not yet exposed through + the high-level GrpcClient. + Returns: + google.cloud._storage_v2.StorageAsyncClient: The configured GAPIC client. + """ + return self._grpc_client diff --git a/google/cloud/storage/asyncio/async_multi_range_downloader.py b/google/cloud/storage/asyncio/async_multi_range_downloader.py new file mode 100644 index 000000000..6925ddc77 --- /dev/null +++ b/google/cloud/storage/asyncio/async_multi_range_downloader.py @@ -0,0 +1,506 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations +import asyncio +import logging +from google.api_core import exceptions +from google.api_core.retry_async import AsyncRetry +from google.cloud.storage.asyncio.retry._helpers import _handle_redirect +from google.rpc import status_pb2 + +from typing import List, Optional, Tuple, Any, Dict + +from ._utils import raise_if_no_fast_crc32c +from google.cloud.storage.asyncio.async_read_object_stream import ( + _AsyncReadObjectStream, +) +from google.cloud.storage.asyncio.async_grpc_client import ( + AsyncGrpcClient, +) +from google.cloud.storage.asyncio.retry.bidi_stream_retry_manager import ( + _BidiStreamRetryManager, +) +from google.cloud.storage.asyncio.retry.reads_resumption_strategy import ( + _ReadResumptionStrategy, + _DownloadState, +) + +from io import BytesIO +from google.cloud import _storage_v2 +from google.cloud.storage._helpers import generate_random_56_bit_integer + + +_MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100 +_BIDI_READ_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +) + +logger = logging.getLogger(__name__) + + +def _is_read_retryable(exc): + """Predicate to determine if a read operation should be retried.""" + if isinstance( + exc, + ( + exceptions.InternalServerError, + exceptions.ServiceUnavailable, + exceptions.DeadlineExceeded, + exceptions.TooManyRequests, + ), + ): + return True + + if not isinstance(exc, exceptions.Aborted) or not exc.errors: + return False + + try: + grpc_error = exc.errors[0] + trailers = grpc_error.trailing_metadata() + if not trailers: + return False + + status_details_bin = next( + (v for k, v in trailers if k == "grpc-status-details-bin"), None + ) + + if not status_details_bin: + return False + + status_proto = status_pb2.Status() + status_proto.ParseFromString(status_details_bin) + return any( + detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL + for detail in status_proto.details + ) + except Exception as e: + logger.error(f"Error parsing status_details_bin: {e}") + return False + + +class AsyncMultiRangeDownloader: + """Provides an interface for downloading multiple ranges of a GCS ``Object`` + concurrently. + + Example usage: + + .. code-block:: python + + client = AsyncGrpcClient().grpc_client + mrd = await AsyncMultiRangeDownloader.create_mrd( + client, bucket_name="chandrasiri-rs", object_name="test_open9" + ) + my_buff1 = open('my_fav_file.txt', 'wb') + my_buff2 = BytesIO() + my_buff3 = BytesIO() + my_buff4 = any_object_which_provides_BytesIO_like_interface() + await mrd.download_ranges( + [ + # (start_byte, bytes_to_read, writeable_buffer) + (0, 100, my_buff1), + (100, 20, my_buff2), + (200, 123, my_buff3), + (300, 789, my_buff4), + ] + ) + + # verify data in buffers... + assert my_buff2.getbuffer().nbytes == 20 + + + """ + + @classmethod + async def create_mrd( + cls, + client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> AsyncMultiRangeDownloader: + """Initializes a MultiRangeDownloader and opens the underlying bidi-gRPC + object for reading. + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient` + :param client: The asynchronous client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the bucket containing the object. + + :type object_name: str + :param object_name: The name of the object to be read. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific + revision of this object. + + :type read_handle: _storage_v2.BidiReadHandle + :param read_handle: (Optional) An existing handle for reading the object. + If provided, opening the bidi-gRPC connection will be faster. + + :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` + :param retry_policy: (Optional) The retry policy to use for the ``open`` operation. + + :type metadata: List[Tuple[str, str]] + :param metadata: (Optional) The metadata to be sent with the ``open`` request. + + :rtype: :class:`~google.cloud.storage.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader` + :returns: An initialized AsyncMultiRangeDownloader instance for reading. + """ + mrd = cls(client, bucket_name, object_name, generation_number, read_handle) + await mrd.open(retry_policy=retry_policy, metadata=metadata) + return mrd + + def __init__( + self, + client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, + ) -> None: + """Constructor for AsyncMultiRangeDownloader, clients are not adviced to + use it directly. Instead it's adviced to use the classmethod `create_mrd`. + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient` + :param client: The asynchronous client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the bucket containing the object. + + :type object_name: str + :param object_name: The name of the object to be read. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. + + :type read_handle: _storage_v2.BidiReadHandle + :param read_handle: (Optional) An existing read handle. + """ + + raise_if_no_fast_crc32c() + + self.client = client + self.bucket_name = bucket_name + self.object_name = object_name + self.generation_number = generation_number + self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle + self.read_obj_str: Optional[_AsyncReadObjectStream] = None + self._is_stream_open: bool = False + self._routing_token: Optional[str] = None + self._read_id_to_writable_buffer_dict = {} + self._read_id_to_download_ranges_id = {} + self._download_ranges_id_to_pending_read_ids = {} + self.persisted_size: Optional[int] = None # updated after opening the stream + + async def __aenter__(self): + """Opens the underlying bidi-gRPC connection to read from the object.""" + await self.open() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Closes the underlying bidi-gRPC connection.""" + if self.is_stream_open: + await self.close() + + def _on_open_error(self, exc): + """Extracts routing token and read handle on redirect error during open.""" + routing_token, read_handle = _handle_redirect(exc) + if routing_token: + self._routing_token = routing_token + if read_handle: + self.read_handle = read_handle + + async def open( + self, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Opens the bidi-gRPC connection to read from the object.""" + if self._is_stream_open: + raise ValueError("Underlying bidi-gRPC stream is already open") + + if retry_policy is None: + retry_policy = AsyncRetry( + predicate=_is_read_retryable, on_error=self._on_open_error + ) + else: + original_on_error = retry_policy._on_error + + def combined_on_error(exc): + self._on_open_error(exc) + if original_on_error: + original_on_error(exc) + + retry_policy = AsyncRetry( + predicate=_is_read_retryable, + initial=retry_policy._initial, + maximum=retry_policy._maximum, + multiplier=retry_policy._multiplier, + deadline=retry_policy._deadline, + on_error=combined_on_error, + ) + + async def _do_open(): + current_metadata = list(metadata) if metadata else [] + + # Cleanup stream from previous failed attempt, if any. + if self.read_obj_str: + if self.read_obj_str.is_stream_open: + try: + await self.read_obj_str.close() + except exceptions.GoogleAPICallError as e: + logger.warning( + f"Failed to close existing stream during resumption: {e}" + ) + self.read_obj_str = None + self._is_stream_open = False + + self.read_obj_str = _AsyncReadObjectStream( + client=self.client.grpc_client, + bucket_name=self.bucket_name, + object_name=self.object_name, + generation_number=self.generation_number, + read_handle=self.read_handle, + ) + + if self._routing_token: + current_metadata.append( + ("x-goog-request-params", f"routing_token={self._routing_token}") + ) + self._routing_token = None + + await self.read_obj_str.open( + metadata=current_metadata if current_metadata else None + ) + + if self.read_obj_str.generation_number: + self.generation_number = self.read_obj_str.generation_number + if self.read_obj_str.read_handle: + self.read_handle = self.read_obj_str.read_handle + if self.read_obj_str.persisted_size is not None: + self.persisted_size = self.read_obj_str.persisted_size + + self._is_stream_open = True + + await retry_policy(_do_open)() + + async def download_ranges( + self, + read_ranges: List[Tuple[int, int, BytesIO]], + lock: asyncio.Lock = None, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Downloads multiple byte ranges from the object into the buffers + provided by user with automatic retries. + + :type read_ranges: List[Tuple[int, int, "BytesIO"]] + :param read_ranges: A list of tuples, where each tuple represents a + combination of byte_range and writeable buffer in format - + (`start_byte`, `bytes_to_read`, `writeable_buffer`). Buffer has + to be provided by the user, and user has to make sure appropriate + memory is available in the application to avoid out-of-memory crash. + + Special cases: + if the value of `bytes_to_read` is 0, it'll be interpreted as + download all contents until the end of the file from `start_byte`. + Examples: + * (0, 0, buffer) : downloads 0 to end , i.e. entire object. + * (100, 0, buffer) : downloads from 100 to end. + + + :type lock: asyncio.Lock + :param lock: (Optional) An asyncio lock to synchronize sends and recvs + on the underlying bidi-GRPC stream. This is required when multiple + coroutines are calling this method concurrently. + + i.e. Example usage with multiple coroutines: + + ``` + lock = asyncio.Lock() + task1 = asyncio.create_task(mrd.download_ranges(ranges1, lock)) + task2 = asyncio.create_task(mrd.download_ranges(ranges2, lock)) + await asyncio.gather(task1, task2) + + ``` + + If user want to call this method serially from multiple coroutines, + then providing a lock is not necessary. + + ``` + await mrd.download_ranges(ranges1) + await mrd.download_ranges(ranges2) + + # ... some other code code... + + ``` + + :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` + :param retry_policy: (Optional) The retry policy to use for the operation. + + :raises ValueError: if the underlying bidi-GRPC stream is not open. + :raises ValueError: if the length of read_ranges is more than 1000. + :raises DataCorruption: if a checksum mismatch is detected while reading data. + + """ + + if len(read_ranges) > 1000: + raise ValueError( + "Invalid input - length of read_ranges cannot be more than 1000" + ) + + if not self._is_stream_open: + raise ValueError("Underlying bidi-gRPC stream is not open") + + if lock is None: + lock = asyncio.Lock() + + if retry_policy is None: + retry_policy = AsyncRetry(predicate=_is_read_retryable) + + # Initialize Global State for Retry Strategy + download_states = {} + for read_range in read_ranges: + read_id = generate_random_56_bit_integer() + download_states[read_id] = _DownloadState( + initial_offset=read_range[0], + initial_length=read_range[1], + user_buffer=read_range[2], + ) + + initial_state = { + "download_states": download_states, + "read_handle": self.read_handle, + "routing_token": None, + } + + # Track attempts to manage stream reuse + attempt_count = 0 + + def send_ranges_and_get_bytes( + requests: List[_storage_v2.ReadRange], + state: Dict[str, Any], + metadata: Optional[List[Tuple[str, str]]] = None, + ): + async def generator(): + nonlocal attempt_count + attempt_count += 1 + + if attempt_count > 1: + logger.info( + f"Resuming download (attempt {attempt_count - 1}) for {len(requests)} ranges." + ) + + async with lock: + current_handle = state.get("read_handle") + current_token = state.get("routing_token") + + # We reopen if it's a redirect (token exists) OR if this is a retry + # (not first attempt). This prevents trying to send data on a dead + # stream from a previous failed attempt. + should_reopen = ( + (attempt_count > 1) + or (current_token is not None) + or (metadata is not None) + ) + + if should_reopen: + if current_token: + logger.info( + f"Re-opening stream with routing token: {current_token}" + ) + # Close existing stream if any + if self.read_obj_str and self.read_obj_str.is_stream_open: + await self.read_obj_str.close() + + # Re-initialize stream + self.read_obj_str = _AsyncReadObjectStream( + client=self.client.grpc_client, + bucket_name=self.bucket_name, + object_name=self.object_name, + generation_number=self.generation_number, + read_handle=current_handle, + ) + + # Inject routing_token into metadata if present + current_metadata = list(metadata) if metadata else [] + if current_token: + current_metadata.append( + ( + "x-goog-request-params", + f"routing_token={current_token}", + ) + ) + + await self.read_obj_str.open( + metadata=current_metadata if current_metadata else None + ) + self._is_stream_open = True + + pending_read_ids = {r.read_id for r in requests} + + # Send Requests + for i in range( + 0, len(requests), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST + ): + batch = requests[i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST] + await self.read_obj_str.send( + _storage_v2.BidiReadObjectRequest(read_ranges=batch) + ) + + while pending_read_ids: + response = await self.read_obj_str.recv() + if response is None: + break + if response.object_data_ranges: + for data_range in response.object_data_ranges: + if data_range.range_end: + pending_read_ids.discard( + data_range.read_range.read_id + ) + yield response + + return generator() + + strategy = _ReadResumptionStrategy() + retry_manager = _BidiStreamRetryManager( + strategy, lambda r, s: send_ranges_and_get_bytes(r, s, metadata=metadata) + ) + + await retry_manager.execute(initial_state, retry_policy) + + if initial_state.get("read_handle"): + self.read_handle = initial_state["read_handle"] + + async def close(self): + """ + Closes the underlying bidi-gRPC connection. + """ + if not self._is_stream_open: + raise ValueError("Underlying bidi-gRPC stream is not open") + + if self.read_obj_str: + await self.read_obj_str.close() + self.read_obj_str = None + self._is_stream_open = False + + @property + def is_stream_open(self) -> bool: + return self._is_stream_open diff --git a/google/cloud/storage/asyncio/async_read_object_stream.py b/google/cloud/storage/asyncio/async_read_object_stream.py new file mode 100644 index 000000000..b53fc1224 --- /dev/null +++ b/google/cloud/storage/asyncio/async_read_object_stream.py @@ -0,0 +1,198 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +NOTE: +This is _experimental module for upcoming support for Rapid Storage. +(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) + +APIs may not work as intended and are not stable yet. Feature is not +GA(Generally Available) yet, please contact your TAM(Technical Account Manager) +if you want to use these APIs. + +""" + +from typing import List, Optional, Tuple +from google.cloud import _storage_v2 +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_abstract_object_stream import ( + _AsyncAbstractObjectStream, +) + +from google.api_core.bidi_async import AsyncBidiRpc + + +class _AsyncReadObjectStream(_AsyncAbstractObjectStream): + """Class representing a gRPC bidi-stream for reading data from a GCS ``Object``. + + This class provides a unix socket-like interface to a GCS ``Object``, with + methods like ``open``, ``close``, ``send``, and ``recv``. + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :param client: async grpc client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the GCS ``bucket`` containing the object. + + :type object_name: str + :param object_name: The name of the GCS ``object`` to be read. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. + + :type read_handle: _storage_v2.BidiReadHandle + :param read_handle: (Optional) An existing handle for reading the object. + If provided, opening the bidi-gRPC connection will be faster. + """ + + def __init__( + self, + client: AsyncGrpcClient.grpc_client, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, + ) -> None: + if client is None: + raise ValueError("client must be provided") + if bucket_name is None: + raise ValueError("bucket_name must be provided") + if object_name is None: + raise ValueError("object_name must be provided") + + super().__init__( + bucket_name=bucket_name, + object_name=object_name, + generation_number=generation_number, + ) + self.client: AsyncGrpcClient.grpc_client = client + self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle + + self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" + + self.rpc = self.client._client._transport._wrapped_methods[ + self.client._client._transport.bidi_read_object + ] + self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) + self.socket_like_rpc: Optional[AsyncBidiRpc] = None + self._is_stream_open: bool = False + self.persisted_size: Optional[int] = None + + async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: + """Opens the bidi-gRPC connection to read from the object. + + This method sends an initial request to start the stream and receives + the first response containing metadata and a read handle. + + Args: + metadata (Optional[List[Tuple[str, str]]]): Additional metadata + to send with the initial stream request, e.g., for routing tokens. + """ + if self._is_stream_open: + raise ValueError("Stream is already open") + + read_handle = self.read_handle if self.read_handle else None + + read_object_spec = _storage_v2.BidiReadObjectSpec( + bucket=self._full_bucket_name, + object=self.object_name, + generation=self.generation_number if self.generation_number else None, + read_handle=read_handle, + ) + self.first_bidi_read_req = _storage_v2.BidiReadObjectRequest( + read_object_spec=read_object_spec + ) + + # Build the x-goog-request-params header + request_params = [f"bucket={self._full_bucket_name}"] + other_metadata = [] + if metadata: + for key, value in metadata: + if key == "x-goog-request-params": + request_params.append(value) + else: + other_metadata.append((key, value)) + + current_metadata = other_metadata + current_metadata.append(("x-goog-request-params", ",".join(request_params))) + + self.socket_like_rpc = AsyncBidiRpc( + self.rpc, + initial_request=self.first_bidi_read_req, + metadata=current_metadata, + ) + await self.socket_like_rpc.open() # this is actually 1 send + response = await self.socket_like_rpc.recv() + # populated only in the first response of bidi-stream and when opened + # without using `read_handle` + if hasattr(response, "metadata") and response.metadata: + if self.generation_number is None: + self.generation_number = response.metadata.generation + # update persisted size + self.persisted_size = response.metadata.size + + if response and response.read_handle: + self.read_handle = response.read_handle + + self._is_stream_open = True + + async def close(self) -> None: + """Closes the bidi-gRPC connection.""" + if not self._is_stream_open: + raise ValueError("Stream is not open") + await self.requests_done() + await self.socket_like_rpc.close() + self._is_stream_open = False + + async def requests_done(self): + """Signals that all requests have been sent.""" + + await self.socket_like_rpc.send(None) + await self.socket_like_rpc.recv() + + async def send( + self, bidi_read_object_request: _storage_v2.BidiReadObjectRequest + ) -> None: + """Sends a request message on the stream. + + Args: + bidi_read_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): + The request message to send. This is typically used to specify + the read offset and limit. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open") + await self.socket_like_rpc.send(bidi_read_object_request) + + async def recv(self) -> _storage_v2.BidiReadObjectResponse: + """Receives a response from the stream. + + This method waits for the next message from the server, which could + contain object data or metadata. + + Returns: + :class:`~google.cloud._storage_v2.types.BidiReadObjectResponse`: + The response message from the server. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open") + response = await self.socket_like_rpc.recv() + # Update read_handle if present in response + if response and response.read_handle: + self.read_handle = response.read_handle + return response + + @property + def is_stream_open(self) -> bool: + return self._is_stream_open diff --git a/google/cloud/storage/asyncio/async_write_object_stream.py b/google/cloud/storage/asyncio/async_write_object_stream.py new file mode 100644 index 000000000..233825865 --- /dev/null +++ b/google/cloud/storage/asyncio/async_write_object_stream.py @@ -0,0 +1,236 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +NOTE: +This is _experimental module for upcoming support for Rapid Storage. +(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) + +APIs may not work as intended and are not stable yet. Feature is not +GA(Generally Available) yet, please contact your TAM(Technical Account Manager) +if you want to use these Rapid Storage APIs. + +""" +from typing import List, Optional, Tuple +from google.cloud import _storage_v2 +from google.cloud.storage.asyncio import _utils +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_abstract_object_stream import ( + _AsyncAbstractObjectStream, +) +from google.api_core.bidi_async import AsyncBidiRpc + + +class _AsyncWriteObjectStream(_AsyncAbstractObjectStream): + """Class representing a gRPC bidi-stream for writing data from a GCS + ``Appendable Object``. + + This class provides a unix socket-like interface to a GCS ``Object``, with + methods like ``open``, ``close``, ``send``, and ``recv``. + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :param client: async grpc client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the GCS ``bucket`` containing the object. + + :type object_name: str + :param object_name: The name of the GCS ``Appendable Object`` to be write. + + :type generation_number: int + :param generation_number: (Optional) If present, creates writer for that + specific revision of that object. Use this to append data to an + existing Appendable Object. + + Setting to ``0`` makes the `writer.open()` succeed only if + object doesn't exist in the bucket (useful for not accidentally + overwriting existing objects). + + Warning: If `None`, a new object is created. If an object with the + same name already exists, it will be overwritten the moment + `writer.open()` is called. + + :type write_handle: _storage_v2.BidiWriteHandle + :param write_handle: (Optional) An existing handle for writing the object. + If provided, opening the bidi-gRPC connection will be faster. + """ + + def __init__( + self, + client: AsyncGrpcClient.grpc_client, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, # None means new object + write_handle: Optional[_storage_v2.BidiWriteHandle] = None, + routing_token: Optional[str] = None, + ) -> None: + if client is None: + raise ValueError("client must be provided") + if bucket_name is None: + raise ValueError("bucket_name must be provided") + if object_name is None: + raise ValueError("object_name must be provided") + + super().__init__( + bucket_name=bucket_name, + object_name=object_name, + generation_number=generation_number, + ) + self.client: AsyncGrpcClient.grpc_client = client + self.write_handle: Optional[_storage_v2.BidiWriteHandle] = write_handle + self.routing_token: Optional[str] = routing_token + + self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" + + self.rpc = self.client._client._transport._wrapped_methods[ + self.client._client._transport.bidi_write_object + ] + + self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) + self.socket_like_rpc: Optional[AsyncBidiRpc] = None + self._is_stream_open: bool = False + self.first_bidi_write_req = None + self.persisted_size = 0 + self.object_resource: Optional[_storage_v2.Object] = None + + async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: + """ + Opens the bidi-gRPC connection to write to the object. + + This method sends an initial request to start the stream and receives + the first response containing metadata and a write handle. + + :rtype: None + :raises ValueError: If the stream is already open. + :raises google.api_core.exceptions.FailedPrecondition: + if `generation_number` is 0 and object already exists. + """ + if self._is_stream_open: + raise ValueError("Stream is already open") + + # Create a new object or overwrite existing one if generation_number + # is None. This makes it consistent with GCS JSON API behavior. + # Created object type would be Appendable Object. + # if `generation_number` == 0 new object will be created only if there + # isn't any existing object. + if self.generation_number is None or self.generation_number == 0: + self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( + write_object_spec=_storage_v2.WriteObjectSpec( + resource=_storage_v2.Object( + name=self.object_name, bucket=self._full_bucket_name + ), + appendable=True, + if_generation_match=self.generation_number, + ), + ) + else: + self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( + append_object_spec=_storage_v2.AppendObjectSpec( + bucket=self._full_bucket_name, + object=self.object_name, + generation=self.generation_number, + write_handle=self.write_handle if self.write_handle else None, + routing_token=self.routing_token if self.routing_token else None, + ), + ) + + request_param_values = [f"bucket={self._full_bucket_name}"] + final_metadata = [] + if metadata: + for key, value in metadata: + if key == "x-goog-request-params": + request_param_values.append(value) + else: + final_metadata.append((key, value)) + + final_metadata.append(("x-goog-request-params", ",".join(request_param_values))) + + self.socket_like_rpc = AsyncBidiRpc( + self.rpc, + initial_request=self.first_bidi_write_req, + metadata=final_metadata, + ) + + await self.socket_like_rpc.open() # this is actually 1 send + response = await self.socket_like_rpc.recv() + self._is_stream_open = True + + if response.persisted_size: + self.persisted_size = response.persisted_size + + if response.resource: + if not response.resource.size: + # Appending to a 0 byte appendable object. + self.persisted_size = 0 + else: + self.persisted_size = response.resource.size + + self.generation_number = response.resource.generation + + if response.write_handle: + self.write_handle = response.write_handle + + async def close(self) -> None: + """Closes the bidi-gRPC connection.""" + if not self._is_stream_open: + raise ValueError("Stream is not open") + await self.requests_done() + await self.socket_like_rpc.close() + self._is_stream_open = False + + async def requests_done(self): + """Signals that all requests have been sent.""" + + await self.socket_like_rpc.send(None) + _utils.update_write_handle_if_exists(self, await self.socket_like_rpc.recv()) + + async def send( + self, bidi_write_object_request: _storage_v2.BidiWriteObjectRequest + ) -> None: + """Sends a request message on the stream. + + Args: + bidi_write_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): + The request message to send. This is typically used to specify + the read offset and limit. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open") + await self.socket_like_rpc.send(bidi_write_object_request) + + async def recv(self) -> _storage_v2.BidiWriteObjectResponse: + """Receives a response from the stream. + + This method waits for the next message from the server, which could + contain object data or metadata. + + Returns: + :class:`~google.cloud._storage_v2.types.BidiWriteObjectResponse`: + The response message from the server. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open") + response = await self.socket_like_rpc.recv() + # Update write_handle if present in response + if response: + if response.write_handle: + self.write_handle = response.write_handle + if response.persisted_size is not None: + self.persisted_size = response.persisted_size + if response.resource and response.resource.size: + self.persisted_size = response.resource.size + return response + + @property + def is_stream_open(self) -> bool: + return self._is_stream_open diff --git a/google/cloud/storage/asyncio/retry/_helpers.py b/google/cloud/storage/asyncio/retry/_helpers.py new file mode 100644 index 000000000..d9ad2462e --- /dev/null +++ b/google/cloud/storage/asyncio/retry/_helpers.py @@ -0,0 +1,125 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import logging +from typing import Tuple, Optional + +from google.api_core import exceptions +from google.cloud._storage_v2.types import ( + BidiReadObjectRedirectedError, + BidiWriteObjectRedirectedError, +) +from google.rpc import status_pb2 + +_BIDI_READ_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +) +_BIDI_WRITE_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" +) +logger = logging.getLogger(__name__) + + +def _handle_redirect( + exc: Exception, +) -> Tuple[Optional[str], Optional[bytes]]: + """ + Extracts routing token and read handle from a gRPC error. + + :type exc: Exception + :param exc: The exception to parse. + + :rtype: Tuple[Optional[str], Optional[bytes]] + :returns: A tuple of (routing_token, read_handle). + """ + routing_token = None + read_handle = None + + grpc_error = None + if isinstance(exc, exceptions.Aborted) and exc.errors: + grpc_error = exc.errors[0] + + if grpc_error: + if isinstance(grpc_error, BidiReadObjectRedirectedError): + routing_token = grpc_error.routing_token + if grpc_error.read_handle: + read_handle = grpc_error.read_handle + return routing_token, read_handle + + if hasattr(grpc_error, "trailing_metadata"): + trailers = grpc_error.trailing_metadata() + if not trailers: + return None, None + + status_details_bin = None + for key, value in trailers: + if key == "grpc-status-details-bin": + status_details_bin = value + break + + if status_details_bin: + status_proto = status_pb2.Status() + try: + status_proto.ParseFromString(status_details_bin) + for detail in status_proto.details: + if detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL: + redirect_proto = BidiReadObjectRedirectedError.deserialize( + detail.value + ) + if redirect_proto.routing_token: + routing_token = redirect_proto.routing_token + if redirect_proto.read_handle: + read_handle = redirect_proto.read_handle + break + except Exception as e: + logger.error(f"Error unpacking redirect: {e}") + + return routing_token, read_handle + + +def _extract_bidi_writes_redirect_proto(exc: Exception): + grpc_error = None + if isinstance(exc, exceptions.Aborted) and exc.errors: + grpc_error = exc.errors[0] + + if grpc_error: + if isinstance(grpc_error, BidiWriteObjectRedirectedError): + return grpc_error + + if hasattr(grpc_error, "trailing_metadata"): + trailers = grpc_error.trailing_metadata() + if not trailers: + return + + status_details_bin = None + for key, value in trailers: + if key == "grpc-status-details-bin": + status_details_bin = value + break + + if status_details_bin: + status_proto = status_pb2.Status() + try: + status_proto.ParseFromString(status_details_bin) + for detail in status_proto.details: + if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: + redirect_proto = BidiWriteObjectRedirectedError.deserialize( + detail.value + ) + return redirect_proto + except Exception: + logger.error("Error unpacking redirect details from gRPC error.") + pass diff --git a/google/cloud/storage/asyncio/retry/base_strategy.py b/google/cloud/storage/asyncio/retry/base_strategy.py new file mode 100644 index 000000000..ff193f109 --- /dev/null +++ b/google/cloud/storage/asyncio/retry/base_strategy.py @@ -0,0 +1,83 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +from typing import Any, Iterable + + +class _BaseResumptionStrategy(abc.ABC): + """Abstract base class defining the interface for a bidi stream resumption strategy. + + This class defines the skeleton for a pluggable strategy that contains + all the service-specific logic for a given bidi operation (e.g., reads + or writes). This allows a generic retry manager to handle the common + retry loop while sending the state management and request generation + to a concrete implementation of this class. + """ + + @abc.abstractmethod + def generate_requests(self, state: Any) -> Iterable[Any]: + """Generates the next batch of requests based on the current state. + + This method is called at the beginning of each retry attempt. It should + inspect the provided state object and generate the appropriate list of + request protos to send to the server. For example, a read strategy + would use this to implement "Smarter Resumption" by creating smaller + `ReadRange` requests for partially downloaded ranges. For bidi-writes, + it will set the `write_offset` field to the persisted size received + from the server in the next request. + + :type state: Any + :param state: An object containing all the state needed for the + operation (e.g., requested ranges, user buffers, + bytes written). + """ + pass + + @abc.abstractmethod + def update_state_from_response(self, response: Any, state: Any) -> None: + """Updates the state based on a successful server response. + + This method is called for every message received from the server. It is + responsible for processing the response and updating the shared state + object. + + :type response: Any + :param response: The response message received from the server. + + :type state: Any + :param state: The shared state object for the operation, which will be + mutated by this method. + """ + pass + + @abc.abstractmethod + async def recover_state_on_failure(self, error: Exception, state: Any) -> None: + """Prepares the state for the next retry attempt after a failure. + + This method is called when a retriable gRPC error occurs. It is + responsible for performing any necessary actions to ensure the next + retry attempt can succeed. For bidi reads, its primary role is to + handle the `BidiReadObjectRedirectError` by extracting the + `routing_token` and updating the state. For bidi writes, it will update + the state to reflect any bytes that were successfully persisted before + the failure. + + :type error: :class:`Exception` + :param error: The exception that was caught by the retry engine. + + :type state: Any + :param state: The shared state object for the operation. + """ + pass diff --git a/google/cloud/storage/asyncio/retry/bidi_stream_retry_manager.py b/google/cloud/storage/asyncio/retry/bidi_stream_retry_manager.py new file mode 100644 index 000000000..23bffb63d --- /dev/null +++ b/google/cloud/storage/asyncio/retry/bidi_stream_retry_manager.py @@ -0,0 +1,69 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any, AsyncIterator, Callable + +from google.cloud.storage.asyncio.retry.base_strategy import ( + _BaseResumptionStrategy, +) + +logger = logging.getLogger(__name__) + + +class _BidiStreamRetryManager: + """Manages the generic retry loop for a bidi streaming operation.""" + + def __init__( + self, + strategy: _BaseResumptionStrategy, + send_and_recv: Callable[..., AsyncIterator[Any]], + ): + """Initializes the retry manager. + Args: + strategy: The strategy for managing the state of a specific + bidi operation (e.g., reads or writes). + send_and_recv: An async callable that opens a new gRPC stream. + """ + self._strategy = strategy + self._send_and_recv = send_and_recv + + async def execute(self, initial_state: Any, retry_policy): + """ + Executes the bidi operation with the configured retry policy. + Args: + initial_state: An object containing all state for the operation. + retry_policy: The `google.api_core.retry.AsyncRetry` object to + govern the retry behavior for this specific operation. + """ + state = initial_state + + async def attempt(): + requests = self._strategy.generate_requests(state) + stream = self._send_and_recv(requests, state) + try: + async for response in stream: + self._strategy.update_state_from_response(response, state) + return + except Exception as e: + if retry_policy._predicate(e): + logger.info( + f"Bidi stream operation failed: {e}. Attempting state recovery and retry." + ) + await self._strategy.recover_state_on_failure(e, state) + raise e + + wrapped_attempt = retry_policy(attempt) + + await wrapped_attempt() diff --git a/google/cloud/storage/asyncio/retry/reads_resumption_strategy.py b/google/cloud/storage/asyncio/retry/reads_resumption_strategy.py new file mode 100644 index 000000000..468954332 --- /dev/null +++ b/google/cloud/storage/asyncio/retry/reads_resumption_strategy.py @@ -0,0 +1,157 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, List, IO +import logging + +from google_crc32c import Checksum +from google.cloud import _storage_v2 as storage_v2 +from google.cloud.storage.exceptions import DataCorruption +from google.cloud.storage.asyncio.retry._helpers import ( + _handle_redirect, +) +from google.cloud.storage.asyncio.retry.base_strategy import ( + _BaseResumptionStrategy, +) + + +_BIDI_READ_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +) +logger = logging.getLogger(__name__) + + +class _DownloadState: + """A helper class to track the state of a single range download.""" + + def __init__( + self, initial_offset: int, initial_length: int, user_buffer: IO[bytes] + ): + self.initial_offset = initial_offset + self.initial_length = initial_length + self.user_buffer = user_buffer + self.bytes_written = 0 + self.next_expected_offset = initial_offset + self.is_complete = False + + +class _ReadResumptionStrategy(_BaseResumptionStrategy): + """The concrete resumption strategy for bidi reads.""" + + def generate_requests(self, state: Dict[str, Any]) -> List[storage_v2.ReadRange]: + """Generates new ReadRange requests for all incomplete downloads. + + :type state: dict + :param state: A dictionary mapping a read_id to its corresponding + _DownloadState object. + """ + pending_requests = [] + download_states: Dict[int, _DownloadState] = state["download_states"] + + for read_id, read_state in download_states.items(): + if not read_state.is_complete: + new_offset = read_state.initial_offset + read_state.bytes_written + + # Calculate remaining length. If initial_length is 0 (read to end), + # it stays 0. Otherwise, subtract bytes_written. + new_length = 0 + if read_state.initial_length > 0: + new_length = read_state.initial_length - read_state.bytes_written + + new_request = storage_v2.ReadRange( + read_offset=new_offset, + read_length=new_length, + read_id=read_id, + ) + pending_requests.append(new_request) + return pending_requests + + def update_state_from_response( + self, response: storage_v2.BidiReadObjectResponse, state: Dict[str, Any] + ) -> None: + """Processes a server response, performs integrity checks, and updates state.""" + + # Capture read_handle if provided. + if response.read_handle: + state["read_handle"] = response.read_handle + + download_states = state["download_states"] + + for object_data_range in response.object_data_ranges: + # Ignore empty ranges or ranges for IDs not in our state + # (e.g., from a previously cancelled request on the same stream). + if not object_data_range.read_range: + logger.warning( + "Received response with missing read_range field; ignoring." + ) + continue + + read_id = object_data_range.read_range.read_id + if read_id not in download_states: + logger.warning( + f"Received data for unknown or stale read_id {read_id}; ignoring." + ) + continue + + read_state = download_states[read_id] + + # Offset Verification + chunk_offset = object_data_range.read_range.read_offset + if chunk_offset != read_state.next_expected_offset: + raise DataCorruption( + response, + f"Offset mismatch for read_id {read_id}. " + f"Expected {read_state.next_expected_offset}, got {chunk_offset}", + ) + + # Checksum Verification + # We must validate data before updating state or writing to buffer. + data = object_data_range.checksummed_data.content + server_checksum = object_data_range.checksummed_data.crc32c + + if server_checksum is not None: + client_checksum = int.from_bytes(Checksum(data).digest(), "big") + if server_checksum != client_checksum: + raise DataCorruption( + response, + f"Checksum mismatch for read_id {read_id}. " + f"Server sent {server_checksum}, client calculated {client_checksum}.", + ) + + # Update State & Write Data + chunk_size = len(data) + read_state.user_buffer.write(data) + read_state.bytes_written += chunk_size + read_state.next_expected_offset += chunk_size + + # Final Byte Count Verification + if object_data_range.range_end: + read_state.is_complete = True + if ( + read_state.initial_length != 0 + and read_state.bytes_written > read_state.initial_length + ): + raise DataCorruption( + response, + f"Byte count mismatch for read_id {read_id}. " + f"Expected {read_state.initial_length}, got {read_state.bytes_written}", + ) + + async def recover_state_on_failure(self, error: Exception, state: Any) -> None: + """Handles BidiReadObjectRedirectedError for reads.""" + routing_token, read_handle = _handle_redirect(error) + if routing_token: + state["routing_token"] = routing_token + if read_handle: + state["read_handle"] = read_handle diff --git a/google/cloud/storage/asyncio/retry/writes_resumption_strategy.py b/google/cloud/storage/asyncio/retry/writes_resumption_strategy.py new file mode 100644 index 000000000..b98b9b2e7 --- /dev/null +++ b/google/cloud/storage/asyncio/retry/writes_resumption_strategy.py @@ -0,0 +1,147 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, IO, List, Optional, Union + +import google_crc32c +from google.cloud._storage_v2.types import storage as storage_type +from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError +from google.cloud.storage.asyncio.retry.base_strategy import ( + _BaseResumptionStrategy, +) +from google.cloud.storage.asyncio.retry._helpers import ( + _extract_bidi_writes_redirect_proto, +) + + +class _WriteState: + """A helper class to track the state of a single upload operation. + + :type chunk_size: int + :param chunk_size: The size of chunks to write to the server. + + :type user_buffer: IO[bytes] + :param user_buffer: The data source. + + :type flush_interval: int + :param flush_interval: The flush interval at which the data is flushed. + """ + + def __init__( + self, + chunk_size: int, + user_buffer: IO[bytes], + flush_interval: int, + ): + self.chunk_size = chunk_size + self.user_buffer = user_buffer + self.persisted_size: int = 0 + self.bytes_sent: int = 0 + self.bytes_since_last_flush: int = 0 + self.flush_interval: int = flush_interval + self.write_handle: Union[bytes, storage_type.BidiWriteHandle, None] = None + self.routing_token: Optional[str] = None + self.is_finalized: bool = False + + +class _WriteResumptionStrategy(_BaseResumptionStrategy): + """The concrete resumption strategy for bidi writes.""" + + def generate_requests( + self, state: Dict[str, Any] + ) -> List[storage_type.BidiWriteObjectRequest]: + """Generates BidiWriteObjectRequests to resume or continue the upload. + + This method is not applicable for `open` methods. + """ + write_state: _WriteState = state["write_state"] + + requests = [] + # The buffer should already be seeked to the correct position (persisted_size) + # by the `recover_state_on_failure` method before this is called. + while not write_state.is_finalized: + chunk = write_state.user_buffer.read(write_state.chunk_size) + + # End of File detection + if not chunk: + break + + checksummed_data = storage_type.ChecksummedData(content=chunk) + checksum = google_crc32c.Checksum(chunk) + checksummed_data.crc32c = int.from_bytes(checksum.digest(), "big") + + request = storage_type.BidiWriteObjectRequest( + write_offset=write_state.bytes_sent, + checksummed_data=checksummed_data, + ) + chunk_len = len(chunk) + write_state.bytes_sent += chunk_len + write_state.bytes_since_last_flush += chunk_len + + if write_state.bytes_since_last_flush >= write_state.flush_interval: + request.flush = True + # reset counter after marking flush + write_state.bytes_since_last_flush = 0 + + requests.append(request) + return requests + + def update_state_from_response( + self, response: storage_type.BidiWriteObjectResponse, state: Dict[str, Any] + ) -> None: + """Processes a server response and updates the write state.""" + write_state: _WriteState = state["write_state"] + if response is None: + return + if response.persisted_size: + write_state.persisted_size = response.persisted_size + + if response.write_handle: + write_state.write_handle = response.write_handle + + if response.resource: + write_state.persisted_size = response.resource.size + if response.resource.finalize_time: + write_state.is_finalized = True + + async def recover_state_on_failure( + self, error: Exception, state: Dict[str, Any] + ) -> None: + """ + Handles errors, specifically BidiWriteObjectRedirectedError, and rewinds state. + + This method rewinds the user buffer and internal byte tracking to the + last confirmed 'persisted_size' from the server. + """ + write_state: _WriteState = state["write_state"] + + redirect_proto = None + + if isinstance(error, BidiWriteObjectRedirectedError): + redirect_proto = error + else: + redirect_proto = _extract_bidi_writes_redirect_proto(error) + + # Extract routing token and potentially a new write handle for redirection. + if redirect_proto: + if redirect_proto.routing_token: + write_state.routing_token = redirect_proto.routing_token + if redirect_proto.write_handle: + write_state.write_handle = redirect_proto.write_handle + + # We must assume any data sent beyond 'persisted_size' was lost. + # Reset the user buffer to the last known good byte confirmed by the server. + write_state.user_buffer.seek(write_state.persisted_size) + write_state.bytes_sent = write_state.persisted_size + write_state.bytes_since_last_flush = 0 diff --git a/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py b/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py index f00a6ba80..725eeb2bd 100644 --- a/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py +++ b/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py @@ -17,10 +17,10 @@ import argparse import asyncio -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient # [START storage_create_and_write_appendable_object] diff --git a/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py b/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py index 971658997..807fe40a5 100644 --- a/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py +++ b/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py @@ -17,10 +17,10 @@ import argparse import asyncio -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient # [START storage_finalize_appendable_object_upload] diff --git a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py index ce2ba678b..54c9621de 100644 --- a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py @@ -14,15 +14,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Downloads a range of bytes from multiple objects concurrently.""" +"""Downloads a range of bytes from multiple objects concurrently. +Example usage: + ```python samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py --bucket_name chandrasiri-benchmarks-zb --object_names test_md_11 test_md_10 test_md_9 test_md_8``` +""" import argparse import asyncio from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_grpc_client import ( +from google.cloud.storage.asyncio.async_grpc_client import ( AsyncGrpcClient, ) -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py index 02c3bace5..b0f64c486 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py @@ -18,8 +18,8 @@ import asyncio from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py b/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py index b4cad6718..2e18caabe 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py +++ b/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py @@ -18,8 +18,8 @@ import asyncio from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py index b013cc938..74bec43f6 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py @@ -18,8 +18,8 @@ import asyncio from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py b/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py index 3fb17ceae..c758dc641 100644 --- a/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py +++ b/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py @@ -17,10 +17,10 @@ import argparse import asyncio -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient # [START storage_pause_and_resume_appendable_upload] diff --git a/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py b/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py index 1134f28d6..9e4dcd738 100644 --- a/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py +++ b/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py @@ -20,11 +20,11 @@ from datetime import datetime from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/zonal_snippets_test.py b/samples/snippets/zonal_buckets/zonal_snippets_test.py index 736576eb5..6852efe22 100644 --- a/samples/snippets/zonal_buckets/zonal_snippets_test.py +++ b/samples/snippets/zonal_buckets/zonal_snippets_test.py @@ -20,8 +20,8 @@ from google.cloud.storage import Client import contextlib -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index 4d46353e2..ed59f9a4a 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -11,12 +11,12 @@ import gc # current library imports -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, _DEFAULT_FLUSH_INTERVAL_BYTES, ) -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) from google.api_core.exceptions import FailedPrecondition diff --git a/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py b/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py index 6c837ec5c..e0eba9030 100644 --- a/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py +++ b/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py @@ -18,10 +18,10 @@ from google.api_core import exceptions from google.api_core.retry_async import AsyncRetry -from google.cloud.storage._experimental.asyncio.retry import ( +from google.cloud.storage.asyncio.retry import ( bidi_stream_retry_manager as manager, ) -from google.cloud.storage._experimental.asyncio.retry import base_strategy +from google.cloud.storage.asyncio.retry import base_strategy def _is_retriable(exc): diff --git a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py index 62a05f19a..1e31961b6 100644 --- a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py +++ b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py @@ -20,7 +20,7 @@ from google.api_core import exceptions from google.cloud import _storage_v2 as storage_v2 -from google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy import ( +from google.cloud.storage.asyncio.retry.reads_resumption_strategy import ( _DownloadState, _ReadResumptionStrategy, ) @@ -28,7 +28,7 @@ _READ_ID = 1 LOGGER_NAME = ( - "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy" + "google.cloud.storage.asyncio.retry.reads_resumption_strategy" ) diff --git a/tests/unit/asyncio/retry/test_writes_resumption_strategy.py b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py index ce48e21c7..ca354e84a 100644 --- a/tests/unit/asyncio/retry/test_writes_resumption_strategy.py +++ b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py @@ -22,7 +22,7 @@ from google.api_core import exceptions from google.cloud._storage_v2.types import storage as storage_type -from google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy import ( +from google.cloud.storage.asyncio.retry.writes_resumption_strategy import ( _WriteState, _WriteResumptionStrategy, ) @@ -352,7 +352,7 @@ async def test_recover_state_on_failure_trailer_metadata_redirect(self, strategy ] with mock.patch( - "google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy._extract_bidi_writes_redirect_proto", + "google.cloud.storage.asyncio.retry.writes_resumption_strategy._extract_bidi_writes_redirect_proto", return_value=redirect_proto, ): await strategy.recover_state_on_failure( diff --git a/tests/unit/asyncio/test_async_appendable_object_writer.py b/tests/unit/asyncio/test_async_appendable_object_writer.py index 88f4864de..02fb3238d 100644 --- a/tests/unit/asyncio/test_async_appendable_object_writer.py +++ b/tests/unit/asyncio/test_async_appendable_object_writer.py @@ -21,7 +21,7 @@ from google.rpc import status_pb2 from google.cloud._storage_v2.types import storage as storage_type from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, _is_write_retryable, _MAX_CHUNK_SIZE_BYTES, @@ -91,7 +91,7 @@ def mock_appendable_writer(): mock_client.grpc_client = mock.AsyncMock() # Internal stream class patch stream_patcher = mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" + "google.cloud.storage.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" ) mock_stream_cls = stream_patcher.start() mock_stream = mock_stream_cls.return_value @@ -156,7 +156,7 @@ def test_init_validation_multiple_raises(self, mock_appendable_writer): def test_init_raises_if_crc32c_missing(self, mock_appendable_writer): with mock.patch( - "google.cloud.storage._experimental.asyncio._utils.google_crc32c" + "google.cloud.storage.asyncio._utils.google_crc32c" ) as mock_crc: mock_crc.implementation = "python" with pytest.raises(exceptions.FailedPrecondition): @@ -206,7 +206,7 @@ def test_on_open_error_redirection(self, mock_appendable_writer): ) with mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._extract_bidi_writes_redirect_proto", + "google.cloud.storage.asyncio.async_appendable_object_writer._extract_bidi_writes_redirect_proto", return_value=redirect, ): writer._on_open_error(exceptions.Aborted("redirect")) @@ -230,7 +230,7 @@ async def test_append_basic_success(self, mock_appendable_writer): data = b"test-data" with mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._BidiStreamRetryManager" + "google.cloud.storage.asyncio.async_appendable_object_writer._BidiStreamRetryManager" ) as MockManager: async def mock_execute(state, policy): @@ -277,7 +277,7 @@ async def mock_open(metadata=None): writer, "open", side_effect=mock_open ) as mock_writer_open: with mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._BidiStreamRetryManager" + "google.cloud.storage.asyncio.async_appendable_object_writer._BidiStreamRetryManager" ) as MockManager: async def mock_execute(state, policy): diff --git a/tests/unit/asyncio/test_async_grpc_client.py b/tests/unit/asyncio/test_async_grpc_client.py index f94729516..cb0ab2466 100644 --- a/tests/unit/asyncio/test_async_grpc_client.py +++ b/tests/unit/asyncio/test_async_grpc_client.py @@ -16,8 +16,8 @@ from google.auth import credentials as auth_credentials from google.auth.credentials import AnonymousCredentials from google.api_core import client_info as client_info_lib -from google.cloud.storage._experimental.asyncio import async_grpc_client -from google.cloud.storage._experimental.asyncio.async_grpc_client import ( +from google.cloud.storage.asyncio import async_grpc_client +from google.cloud.storage.asyncio.async_grpc_client import ( DEFAULT_CLIENT_INFO, ) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 09cf0fa09..8d4d6d31d 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -20,10 +20,10 @@ from google.api_core import exceptions from google_crc32c import Checksum -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) -from google.cloud.storage._experimental.asyncio import async_read_object_stream +from google.cloud.storage.asyncio import async_read_object_stream from io import BytesIO from google.cloud.storage.exceptions import DataCorruption @@ -68,7 +68,7 @@ async def _make_mock_mrd( return mrd, mock_client @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_create_mrd(self, mock_cls_async_read_object_stream): @@ -95,10 +95,10 @@ async def test_create_mrd(self, mock_cls_async_read_object_stream): assert mrd.is_stream_open @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.generate_random_56_bit_integer" + "google.cloud.storage.asyncio.async_multi_range_downloader.generate_random_56_bit_integer" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_download_ranges_via_async_gather( @@ -166,10 +166,10 @@ async def test_download_ranges_via_async_gather( assert second_buffer.getvalue() == data[10:16] @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.generate_random_56_bit_integer" + "google.cloud.storage.asyncio.async_multi_range_downloader.generate_random_56_bit_integer" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_download_ranges( @@ -236,7 +236,7 @@ async def test_downloading_ranges_with_more_than_1000_should_throw_error(self): ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_opening_mrd_more_than_once_should_throw_error( @@ -255,7 +255,7 @@ async def test_opening_mrd_more_than_once_should_throw_error( assert str(exc.value) == "Underlying bidi-gRPC stream is already open" @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_close_mrd(self, mock_cls_async_read_object_stream): @@ -302,7 +302,7 @@ async def test_downloading_without_opening_should_throw_error(self): assert str(exc.value) == "Underlying bidi-gRPC stream is not open" assert not mrd.is_stream_open - @mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c") + @mock.patch("google.cloud.storage.asyncio._utils.google_crc32c") def test_init_raises_if_crc32c_c_extension_is_missing( self, mock_google_crc32c ): @@ -318,12 +318,12 @@ def test_init_raises_if_crc32c_c_extension_is_missing( @pytest.mark.asyncio @mock.patch( - "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy.Checksum" + "google.cloud.storage.asyncio.retry.reads_resumption_strategy.Checksum" ) async def test_download_ranges_raises_on_checksum_mismatch( self, mock_checksum_class ): - from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) @@ -359,7 +359,7 @@ async def test_download_ranges_raises_on_checksum_mismatch( with pytest.raises(DataCorruption) as exc_info: with mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.generate_random_56_bit_integer", + "google.cloud.storage.asyncio.async_multi_range_downloader.generate_random_56_bit_integer", return_value=0, ): await mrd.download_ranges([(0, len(test_data), BytesIO())]) @@ -368,11 +368,11 @@ async def test_download_ranges_raises_on_checksum_mismatch( mock_checksum_class.assert_called_once_with(test_data) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.open", + "google.cloud.storage.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.open", new_callable=AsyncMock, ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.close", + "google.cloud.storage.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.close", new_callable=AsyncMock, ) @pytest.mark.asyncio diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index 18e0c464d..4e8a494b5 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -17,8 +17,8 @@ from unittest.mock import AsyncMock from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio import async_read_object_stream -from google.cloud.storage._experimental.asyncio.async_read_object_stream import ( +from google.cloud.storage.asyncio import async_read_object_stream +from google.cloud.storage.asyncio.async_read_object_stream import ( _AsyncReadObjectStream, ) @@ -80,10 +80,10 @@ async def instantiate_read_obj_stream_with_read_handle( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc): # Arrange @@ -111,10 +111,10 @@ def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_open(mock_client, mock_cls_async_bidi_rpc): @@ -137,10 +137,10 @@ async def test_open(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_open_with_read_handle(mock_client, mock_cls_async_bidi_rpc): @@ -163,10 +163,10 @@ async def test_open_with_read_handle(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_open_when_already_open_should_raise_error( @@ -186,10 +186,10 @@ async def test_open_when_already_open_should_raise_error( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_close(mock_client, mock_cls_async_bidi_rpc): @@ -209,10 +209,10 @@ async def test_close(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_requests_done(mock_client, mock_cls_async_bidi_rpc): @@ -233,10 +233,10 @@ async def test_requests_done(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_close_without_open_should_raise_error( @@ -256,10 +256,10 @@ async def test_close_without_open_should_raise_error( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_send(mock_client, mock_cls_async_bidi_rpc): @@ -279,10 +279,10 @@ async def test_send(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_send_without_open_should_raise_error( @@ -302,10 +302,10 @@ async def test_send_without_open_should_raise_error( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_recv(mock_client, mock_cls_async_bidi_rpc): @@ -327,10 +327,10 @@ async def test_recv(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_recv_without_open_should_raise_error( @@ -350,10 +350,10 @@ async def test_recv_without_open_should_raise_error( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_recv_updates_read_handle_on_refresh( diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index 7bfa2cea0..f4a7862d6 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -16,7 +16,7 @@ from unittest.mock import AsyncMock, MagicMock import pytest -from google.cloud.storage._experimental.asyncio.async_write_object_stream import ( +from google.cloud.storage.asyncio.async_write_object_stream import ( _AsyncWriteObjectStream, ) from google.cloud import _storage_v2 @@ -72,7 +72,7 @@ def test_init_raises_value_error(self, mock_client): # ------------------------------------------------------------------------- @mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" ) @pytest.mark.asyncio async def test_open_new_object(self, mock_rpc_cls, mock_client): @@ -102,7 +102,7 @@ async def test_open_new_object(self, mock_rpc_cls, mock_client): assert stream.generation_number == GENERATION @mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" ) @pytest.mark.asyncio async def test_open_existing_object_with_token(self, mock_rpc_cls, mock_client): @@ -133,7 +133,7 @@ async def test_open_existing_object_with_token(self, mock_rpc_cls, mock_client): assert stream.persisted_size == 1024 @mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" ) @pytest.mark.asyncio async def test_open_metadata_merging(self, mock_rpc_cls, mock_client): @@ -167,7 +167,7 @@ async def test_open_already_open_raises(self, mock_client): # ------------------------------------------------------------------------- @mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" ) @pytest.mark.asyncio async def test_send_and_recv_logic(self, mock_rpc_cls, mock_client): From 58df96ff62cb100aa1b35b8aab7a085b08a0352b Mon Sep 17 00:00:00 2001 From: Chandra Date: Wed, 28 Jan 2026 15:40:55 +0000 Subject: [PATCH 2/5] update import paths in micro benchmarking module --- .../storage_open_multiple_objects_ranged_read.py | 6 ++++-- tests/perf/microbenchmarks/conftest.py | 4 ++-- tests/perf/microbenchmarks/reads/test_reads.py | 4 ++-- tests/perf/microbenchmarks/writes/test_writes.py | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py index 54c9621de..bed580d36 100644 --- a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py @@ -15,8 +15,10 @@ # limitations under the License. """Downloads a range of bytes from multiple objects concurrently. -Example usage: - ```python samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py --bucket_name chandrasiri-benchmarks-zb --object_names test_md_11 test_md_10 test_md_9 test_md_8``` +Example usage: + ```python samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py \ + --bucket_name \ + --object_names ``` """ import argparse import asyncio diff --git a/tests/perf/microbenchmarks/conftest.py b/tests/perf/microbenchmarks/conftest.py index c09e9ce93..e748c6e43 100644 --- a/tests/perf/microbenchmarks/conftest.py +++ b/tests/perf/microbenchmarks/conftest.py @@ -22,10 +22,10 @@ import os import uuid from google.cloud import storage -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient from tests.perf.microbenchmarks.writes.parameters import WriteParameters _OBJECT_NAME_PREFIX = "micro-benchmark" diff --git a/tests/perf/microbenchmarks/reads/test_reads.py b/tests/perf/microbenchmarks/reads/test_reads.py index 2b5f80d42..d51102cea 100644 --- a/tests/perf/microbenchmarks/reads/test_reads.py +++ b/tests/perf/microbenchmarks/reads/test_reads.py @@ -30,8 +30,8 @@ import pytest -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py index 5648ada0d..02a0f5e4f 100644 --- a/tests/perf/microbenchmarks/writes/test_writes.py +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -30,8 +30,8 @@ import logging import pytest -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) From f577903e87903201d57824d33658a655f38836b2 Mon Sep 17 00:00:00 2001 From: Chandra Date: Wed, 28 Jan 2026 15:42:55 +0000 Subject: [PATCH 3/5] fix client initialization in doc strings --- google/cloud/storage/asyncio/async_appendable_object_writer.py | 2 +- google/cloud/storage/asyncio/async_multi_range_downloader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/storage/asyncio/async_appendable_object_writer.py b/google/cloud/storage/asyncio/async_appendable_object_writer.py index 5505ce390..a61f4c4d4 100644 --- a/google/cloud/storage/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/asyncio/async_appendable_object_writer.py @@ -132,7 +132,7 @@ def __init__( from google.cloud.storage.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter import asyncio - client = AsyncGrpcClient().grpc_client + client = AsyncGrpcClient() bucket_name = "my-bucket" object_name = "my-appendable-object" diff --git a/google/cloud/storage/asyncio/async_multi_range_downloader.py b/google/cloud/storage/asyncio/async_multi_range_downloader.py index 6925ddc77..993fc9522 100644 --- a/google/cloud/storage/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/asyncio/async_multi_range_downloader.py @@ -98,7 +98,7 @@ class AsyncMultiRangeDownloader: .. code-block:: python - client = AsyncGrpcClient().grpc_client + client = AsyncGrpcClient() mrd = await AsyncMultiRangeDownloader.create_mrd( client, bucket_name="chandrasiri-rs", object_name="test_open9" ) From d5cafc2226639e45b06b329a355c7f2b6e534014 Mon Sep 17 00:00:00 2001 From: Chandra Date: Thu, 29 Jan 2026 07:20:47 +0000 Subject: [PATCH 4/5] remove experimental warning note --- .../storage/asyncio/async_appendable_object_writer.py | 9 --------- .../cloud/storage/asyncio/async_read_object_stream.py | 10 ---------- .../cloud/storage/asyncio/async_write_object_stream.py | 9 --------- 3 files changed, 28 deletions(-) diff --git a/google/cloud/storage/asyncio/async_appendable_object_writer.py b/google/cloud/storage/asyncio/async_appendable_object_writer.py index a61f4c4d4..3ab06f8ba 100644 --- a/google/cloud/storage/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/asyncio/async_appendable_object_writer.py @@ -11,16 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM (Technical Account Manager) -if you want to use these Rapid Storage APIs. - -""" from io import BufferedReader import io import logging diff --git a/google/cloud/storage/asyncio/async_read_object_stream.py b/google/cloud/storage/asyncio/async_read_object_stream.py index b53fc1224..d456f16cc 100644 --- a/google/cloud/storage/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/asyncio/async_read_object_stream.py @@ -11,16 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) - -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM(Technical Account Manager) -if you want to use these APIs. - -""" from typing import List, Optional, Tuple from google.cloud import _storage_v2 diff --git a/google/cloud/storage/asyncio/async_write_object_stream.py b/google/cloud/storage/asyncio/async_write_object_stream.py index 233825865..721183962 100644 --- a/google/cloud/storage/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/asyncio/async_write_object_stream.py @@ -11,16 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM(Technical Account Manager) -if you want to use these Rapid Storage APIs. - -""" from typing import List, Optional, Tuple from google.cloud import _storage_v2 from google.cloud.storage.asyncio import _utils From 9ed8f9ad0bc8084788c85cb53ccc36d6f3fe991c Mon Sep 17 00:00:00 2001 From: Chandra Date: Thu, 29 Jan 2026 07:30:38 +0000 Subject: [PATCH 5/5] move grpc_client.py (sync client) out of experimental dir --- google/cloud/storage/grpc_client.py | 122 ++++++++++++++++++++++++++++ tests/unit/test_grpc_client.py | 17 ++-- 2 files changed, 128 insertions(+), 11 deletions(-) create mode 100644 google/cloud/storage/grpc_client.py diff --git a/google/cloud/storage/grpc_client.py b/google/cloud/storage/grpc_client.py new file mode 100644 index 000000000..7a739b7b7 --- /dev/null +++ b/google/cloud/storage/grpc_client.py @@ -0,0 +1,122 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A client for interacting with Google Cloud Storage using the gRPC API.""" + +from google.cloud.client import ClientWithProject +from google.cloud import _storage_v2 as storage_v2 + +_marker = object() + + +class GrpcClient(ClientWithProject): + """A client for interacting with Google Cloud Storage using the gRPC API. + + :type project: str or None + :param project: The project which the client acts on behalf of. If not + passed, falls back to the default inferred from the + environment. + + :type credentials: :class:`~google.auth.credentials.Credentials` + :param credentials: (Optional) The OAuth2 Credentials to use for this + client. If not passed, falls back to the default + inferred from the environment. + + :type client_info: :class:`~google.api_core.client_info.ClientInfo` + :param client_info: + The client info used to send a user-agent string along with API + requests. If ``None``, then default info will be used. Generally, + you only need to set this if you're developing your own library + or partner tool. + + :type client_options: :class:`~google.api_core.client_options.ClientOptions` or :class:`dict` + :param client_options: (Optional) Client options used to set user options + on the client. A non-default universe domain or API endpoint should be + set through client_options. + + :type api_key: string + :param api_key: + (Optional) An API key. Mutually exclusive with any other credentials. + This parameter is an alias for setting `client_options.api_key` and + will supersede any API key set in the `client_options` parameter. + + :type attempt_direct_path: bool + :param attempt_direct_path: + (Optional) Whether to attempt to use DirectPath for gRPC connections. + This provides a direct, unproxied connection to GCS for lower latency + and higher throughput, and is highly recommended when running on Google + Cloud infrastructure. Defaults to ``True``. + """ + + def __init__( + self, + project=_marker, + credentials=None, + client_info=None, + client_options=None, + *, + api_key=None, + attempt_direct_path=True, + ): + super(GrpcClient, self).__init__(project=project, credentials=credentials) + + if isinstance(client_options, dict): + if api_key: + client_options["api_key"] = api_key + elif client_options is None: + client_options = {} if not api_key else {"api_key": api_key} + elif api_key: + client_options.api_key = api_key + + self._grpc_client = self._create_gapic_client( + credentials=credentials, + client_info=client_info, + client_options=client_options, + attempt_direct_path=attempt_direct_path, + ) + + def _create_gapic_client( + self, + credentials=None, + client_info=None, + client_options=None, + attempt_direct_path=True, + ): + """Creates and configures the low-level GAPIC `storage_v2` client.""" + transport_cls = storage_v2.StorageClient.get_transport_class("grpc") + + channel = transport_cls.create_channel(attempt_direct_path=attempt_direct_path) + + transport = transport_cls(credentials=credentials, channel=channel) + + return storage_v2.StorageClient( + credentials=credentials, + transport=transport, + client_info=client_info, + client_options=client_options, + ) + + @property + def grpc_client(self): + """The underlying gRPC client. + + This property gives users direct access to the `storage_v2.StorageClient` + instance. This can be useful for accessing + newly added or experimental RPCs that are not yet exposed through + the high-level GrpcClient. + + Returns: + google.cloud.storage_v2.StorageClient: The configured GAPIC client. + """ + return self._grpc_client diff --git a/tests/unit/test_grpc_client.py b/tests/unit/test_grpc_client.py index 9eca1b280..eb048ff42 100644 --- a/tests/unit/test_grpc_client.py +++ b/tests/unit/test_grpc_client.py @@ -16,6 +16,7 @@ from unittest import mock from google.auth import credentials as auth_credentials from google.api_core import client_options as client_options_lib +from google.cloud.storage import grpc_client def _make_credentials(spec=None): @@ -30,7 +31,6 @@ class TestGrpcClient(unittest.TestCase): def test_constructor_defaults_and_options( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls @@ -71,12 +71,11 @@ def test_constructor_defaults_and_options( # 4. Assert the client instance holds the mocked GAPIC client. self.assertIs(client.grpc_client, mock_storage_client.return_value) - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_constructor_disables_direct_path( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls @@ -94,12 +93,11 @@ def test_constructor_disables_direct_path( attempt_direct_path=False ) - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_constructor_initialize_with_api_key( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls @@ -124,10 +122,9 @@ def test_constructor_initialize_with_api_key( client_options={"api_key": "test-api-key"}, ) - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_grpc_client_property(self, mock_storage_client, mock_base_client): - from google.cloud.storage._experimental import grpc_client mock_creds = _make_credentials() mock_base_client.return_value._credentials = mock_creds @@ -138,12 +135,11 @@ def test_grpc_client_property(self, mock_storage_client, mock_base_client): self.assertIs(retrieved_client, mock_storage_client.return_value) - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_constructor_with_api_key_and_client_options( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls @@ -173,12 +169,11 @@ def test_constructor_with_api_key_and_client_options( ) self.assertEqual(client_options_obj.api_key, "new-test-key") - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_constructor_with_api_key_and_dict_options( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_creds = _make_credentials() mock_base_instance = mock_base_client.return_value