From 06b61c091abac8fff52e4be13c49cfec46f5b5de Mon Sep 17 00:00:00 2001 From: Alex Severin Date: Tue, 15 Jul 2025 23:48:38 +0300 Subject: [PATCH 1/5] check not found from gitlab --- src/boxdrive/handlers.py | 7 +++--- src/boxdrive/s3.py | 35 +++++++++++++++++++--------- src/boxdrive/stores/gitlab/client.py | 4 +++- src/boxdrive/stores/gitlab/store.py | 3 +++ 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/boxdrive/handlers.py b/src/boxdrive/handlers.py index 094088d..9580c0c 100644 --- a/src/boxdrive/handlers.py +++ b/src/boxdrive/handlers.py @@ -3,7 +3,7 @@ import logging from typing import Annotated, Literal -from fastapi import APIRouter, Depends, Header, Query, Request, Response, status +from fastapi import APIRouter, Depends, Header, Query, Request, Response from fastapi.responses import StreamingResponse from . import ( @@ -98,9 +98,8 @@ async def put_object( @router.delete("/{bucket}/{key:path}") -async def delete_object(bucket: BucketName, key: Key, s3: S3Dep) -> XMLResponse: - await s3.delete_object(bucket, key) - return XMLResponse(status_code=status.HTTP_204_NO_CONTENT) +async def delete_object(bucket: BucketName, key: Key, s3: S3Dep) -> Response: + return await s3.delete_object(bucket, key) @router.put("/{bucket}") diff --git a/src/boxdrive/s3.py b/src/boxdrive/s3.py index f325189..674b56a 100644 --- a/src/boxdrive/s3.py +++ b/src/boxdrive/s3.py @@ -2,8 +2,9 @@ import os from collections.abc import AsyncIterator -from fastapi import HTTPException, Response +from fastapi import HTTPException, Response, status from fastapi.responses import StreamingResponse +from opentelemetry import trace from boxdrive.schemas import BaseListObjectsInfo @@ -11,6 +12,7 @@ from .schemas import BucketName, ContentType, Key, MaxKeys, xml from .store import ObjectStore +tracer = trace.get_tracer(__name__) logger = logging.getLogger(__name__) @@ -18,6 +20,7 @@ class S3: def __init__(self, store: ObjectStore): self.store = store + @tracer.start_as_current_span("list_buckets") async def list_buckets(self) -> xml.ListAllMyBucketsResult: buckets = await self.store.list_buckets() buckets_xml = [ @@ -27,6 +30,7 @@ async def list_buckets(self) -> xml.ListAllMyBucketsResult: buckets_model = xml.Buckets(buckets=buckets_xml) return xml.ListAllMyBucketsResult(owner=owner, buckets=buckets_model) + @tracer.start_as_current_span("list_objects_v2") async def list_objects_v2( self, bucket: BucketName, @@ -52,6 +56,7 @@ async def list_objects_v2( max_keys=max_keys, ) + @tracer.start_as_current_span("list_objects") async def list_objects( self, bucket: BucketName, @@ -103,6 +108,7 @@ def _build_list_bucket_result( common_prefixes=[xml.CommonPrefix(prefix=prefix) for prefix in objects_info.common_prefixes], ) + @tracer.start_as_current_span("get_object") async def get_object( self, bucket: BucketName, @@ -153,10 +159,11 @@ async def generate() -> AsyncIterator[bytes]: status_code=status_code, ) + @tracer.start_as_current_span("head_object") async def head_object(self, bucket: BucketName, key: Key) -> Response: metadata = await self.store.head_object(bucket, key) return Response( - status_code=200, + status_code=status.HTTP_200_OK, headers={ "Content-Length": str(metadata.size), "ETag": f'"{metadata.etag}"', @@ -166,6 +173,7 @@ async def head_object(self, bucket: BucketName, key: Key) -> Response: }, ) + @tracer.start_as_current_span("put_object") async def put_object( self, bucket: BucketName, @@ -175,27 +183,32 @@ async def put_object( ) -> Response: final_content_type = content_type or constants.DEFAULT_CONTENT_TYPE result_etag = await self.store.put_object(bucket, key, content, final_content_type) - return Response(status_code=200, headers={"ETag": f'"{result_etag}"', "Content-Length": "0"}) + return Response(status_code=status.HTTP_200_OK, headers={"ETag": f'"{result_etag}"'}) - async def delete_object(self, bucket: BucketName, key: Key) -> None: + @tracer.start_as_current_span("delete_object") + async def delete_object(self, bucket: BucketName, key: Key) -> Response: try: await self.store.delete_object(bucket, key) except exceptions.NoSuchBucket: logger.info("Bucket %s not found", bucket) except exceptions.NoSuchKey: logger.info("Object %s not found in bucket %s", key, bucket) - return None + return Response( + status_code=status.HTTP_204_NO_CONTENT, + headers={ + "content-length": "0", + }, + ) + @tracer.start_as_current_span("create_bucket") async def create_bucket(self, bucket: BucketName) -> Response: - try: - await self.store.create_bucket(bucket) - except exceptions.BucketAlreadyExists: - raise HTTPException(status_code=409, detail="Bucket already exists") - return Response(status_code=200, headers={"Location": f"/{bucket}"}) + await self.store.create_bucket(bucket) + return Response(status_code=status.HTTP_200_OK, headers={"Location": f"/{bucket}"}) + @tracer.start_as_current_span("delete_bucket") async def delete_bucket(self, bucket: BucketName) -> Response: try: await self.store.delete_bucket(bucket) except exceptions.NoSuchBucket: logger.info("Bucket %s not found", bucket) - return Response(status_code=204) + return Response(status_code=status.HTTP_204_NO_CONTENT) diff --git a/src/boxdrive/stores/gitlab/client.py b/src/boxdrive/stores/gitlab/client.py index 91a8674..4ef5b3b 100644 --- a/src/boxdrive/stores/gitlab/client.py +++ b/src/boxdrive/stores/gitlab/client.py @@ -116,7 +116,7 @@ async def head_file(self, file_path: str, *, ref: str) -> FileHead | None: ) raise_for_gitlab_response(resp) - async def get_tree(self, params: TreeParams) -> Tree: + async def get_tree(self, params: TreeParams) -> Tree | None: tree_url = os.path.join(self.api_url, "projects", str(self.repo_id), "repository/tree") resp = await self.client.get(tree_url, params=params.model_dump(exclude_none=True)) if resp.status_code == 200: @@ -126,6 +126,8 @@ async def get_tree(self, params: TreeParams) -> Tree: total_pages=int(resp.headers["x-total-pages"]), ) return Tree(items=items, headers=headers) + if resp.status_code == 404: + return None raise_for_gitlab_response(resp) diff --git a/src/boxdrive/stores/gitlab/store.py b/src/boxdrive/stores/gitlab/store.py index e77f543..7bf0c3f 100644 --- a/src/boxdrive/stores/gitlab/store.py +++ b/src/boxdrive/stores/gitlab/store.py @@ -80,6 +80,7 @@ async def list_buckets(self) -> list[BucketInfo]: now = datetime.datetime.now(datetime.UTC) async with self.lock.reader: tree = await self.gitlab_client.get_tree(TreeParams(ref=self.branch)) + assert tree buckets = [] items = [item for item in tree.items if item.type == "tree"] for item in items: @@ -327,6 +328,8 @@ async def _fetch_object_keys( per_page=per_page, ) tree = await self.gitlab_client.get_tree(params) + if tree is None: + raise exceptions.NoSuchBucket items = [item for item in tree.items if item.type == "blob"] for item in items: try: From 9123369b3d2c4ae2934b1b0909e86822a362a9e5 Mon Sep 17 00:00:00 2001 From: Alex Severin Date: Wed, 16 Jul 2025 00:21:54 +0300 Subject: [PATCH 2/5] test_bucket_listv2_encoding_basic --- src/boxdrive/handlers.py | 2 ++ src/boxdrive/s3.py | 2 ++ src/boxdrive/stores/_utils.py | 6 ++++++ tests/third_party/s3-tests/pytest.ini | 1 + .../s3-tests/s3tests_boto3/functional/test_s3.py | 4 ++++ 5 files changed, 15 insertions(+) diff --git a/src/boxdrive/handlers.py b/src/boxdrive/handlers.py index 9580c0c..90c51ba 100644 --- a/src/boxdrive/handlers.py +++ b/src/boxdrive/handlers.py @@ -40,6 +40,7 @@ async def list_objects( continuation_token: Key | None = Query(None, alias="continuation-token"), start_after: Key | None = Query(None, alias="start-after"), list_type: Literal["1", "2"] = Query("1", alias="list-type"), + encoding_type: Literal["url"] | None = Query(None, alias="encoding-type"), *, s3: S3Dep, ) -> XMLResponse: @@ -53,6 +54,7 @@ async def list_objects( max_keys=max_keys, continuation_token=continuation_token, start_after=start_after, + encoding_type=encoding_type, ) return XMLResponse(objects) diff --git a/src/boxdrive/s3.py b/src/boxdrive/s3.py index 674b56a..e04324c 100644 --- a/src/boxdrive/s3.py +++ b/src/boxdrive/s3.py @@ -39,6 +39,7 @@ async def list_objects_v2( max_keys: MaxKeys = constants.MAX_KEYS, continuation_token: Key | None = None, start_after: Key | None = None, + encoding_type: str | None = None, ) -> xml.ListBucketResult: objects_info = await self.store.list_objects_v2( bucket, @@ -47,6 +48,7 @@ async def list_objects_v2( max_keys=max_keys, continuation_token=continuation_token, start_after=start_after, + encoding_type=encoding_type, ) return self._build_list_bucket_result( bucket, diff --git a/src/boxdrive/stores/_utils.py b/src/boxdrive/stores/_utils.py index 7665537..4942cf5 100644 --- a/src/boxdrive/stores/_utils.py +++ b/src/boxdrive/stores/_utils.py @@ -1,3 +1,5 @@ +import urllib.parse + from boxdrive.schemas import Key, ListObjectsInfo, ListObjectsV2Info, MaxKeys, ObjectInfo @@ -49,6 +51,10 @@ def filter_objects_v2( objects = objects[:max_keys] objects, common_prefixes = _split_contents_and_prefixes(objects, prefix=prefix, delimiter=delimiter) + if encoding_type == "url": + for obj in objects: + obj.key = urllib.parse.quote(obj.key) + common_prefixes = [urllib.parse.quote(prefix) for prefix in common_prefixes] return ListObjectsV2Info(objects=objects, is_truncated=is_truncated, common_prefixes=common_prefixes) diff --git a/tests/third_party/s3-tests/pytest.ini b/tests/third_party/s3-tests/pytest.ini index bbde51e..837bb1b 100644 --- a/tests/third_party/s3-tests/pytest.ini +++ b/tests/third_party/s3-tests/pytest.ini @@ -1,5 +1,6 @@ [pytest] markers = + no_gitlab gitlab inmemory abac_test diff --git a/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py b/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py index 433c1d9..ca12eaa 100644 --- a/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py +++ b/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py @@ -207,6 +207,7 @@ def test_basic_key_count(): response1 = client.list_objects_v2(Bucket=bucket_name) assert response1['KeyCount'] == 5 +@pytest.mark.no_gitlab @pytest.mark.inmemory def test_bucket_list_delimiter_basic(): bucket_name = _create_objects(keys=['foo/bar', 'foo/bar/xyzzy', 'quux/thud', 'asdf']) @@ -221,6 +222,7 @@ def test_bucket_list_delimiter_basic(): assert len(prefixes) == 2 assert prefixes == ['foo/', 'quux/'] +@pytest.mark.no_gitlab @pytest.mark.inmemory @pytest.mark.list_objects_v2 def test_bucket_listv2_delimiter_basic(): @@ -238,6 +240,8 @@ def test_bucket_listv2_delimiter_basic(): assert response['KeyCount'] == len(prefixes) + len(keys) +@pytest.mark.gitlab +@pytest.mark.inmemory @pytest.mark.list_objects_v2 def test_bucket_listv2_encoding_basic(): bucket_name = _create_objects(keys=['foo+1/bar', 'foo/bar/xyzzy', 'quux ab/thud', 'asdf+b']) From 3bdc614dc6c00ba9c0da33a4d6e1c93305c08fdd Mon Sep 17 00:00:00 2001 From: Alex Severin Date: Wed, 16 Jul 2025 00:31:14 +0300 Subject: [PATCH 3/5] test_bucket_list_encoding_basic --- README.md | 2 ++ examples/custom_store.pyi | 1 + src/boxdrive/handlers.py | 9 ++++++++- src/boxdrive/s3.py | 3 ++- src/boxdrive/store.py | 1 + src/boxdrive/stores/_utils.py | 19 +++++++++++++++---- src/boxdrive/stores/gitlab/store.py | 2 ++ src/boxdrive/stores/inmemory.py | 5 ++++- .../s3tests_boto3/functional/test_s3.py | 2 ++ 9 files changed, 37 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5cf8125..4e9088e 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ class MyCustomStore(ObjectStore): delimiter: str | None = None, max_keys: MaxKeys = 1000, marker: Key | None = None, + encoding_type: str | None = None, ) -> ListObjectsInfo: ... async def list_objects_v2( self, @@ -149,3 +150,4 @@ uv run mypy . ## License Apache 2.0 – see the [LICENSE](./LICENSE) file for details. + diff --git a/examples/custom_store.pyi b/examples/custom_store.pyi index 3cb94d6..7cdb243 100644 --- a/examples/custom_store.pyi +++ b/examples/custom_store.pyi @@ -29,6 +29,7 @@ class MyCustomStore(ObjectStore): delimiter: str | None = None, max_keys: MaxKeys = 1000, marker: Key | None = None, + encoding_type: str | None = None, ) -> ListObjectsInfo: ... async def list_objects_v2( self, diff --git a/src/boxdrive/handlers.py b/src/boxdrive/handlers.py index 90c51ba..1a99596 100644 --- a/src/boxdrive/handlers.py +++ b/src/boxdrive/handlers.py @@ -45,7 +45,14 @@ async def list_objects( s3: S3Dep, ) -> XMLResponse: if list_type == "1": - objects = await s3.list_objects(bucket, prefix=prefix, delimiter=delimiter, max_keys=max_keys, marker=marker) + objects = await s3.list_objects( + bucket, + prefix=prefix, + delimiter=delimiter, + max_keys=max_keys, + marker=marker, + encoding_type=encoding_type, + ) else: objects = await s3.list_objects_v2( bucket, diff --git a/src/boxdrive/s3.py b/src/boxdrive/s3.py index e04324c..dfee603 100644 --- a/src/boxdrive/s3.py +++ b/src/boxdrive/s3.py @@ -66,9 +66,10 @@ async def list_objects( delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, marker: Key | None = None, + encoding_type: str | None = None, ) -> xml.ListBucketResult: objects_info = await self.store.list_objects( - bucket, prefix=prefix, delimiter=delimiter, max_keys=max_keys, marker=marker + bucket, prefix=prefix, delimiter=delimiter, max_keys=max_keys, marker=marker, encoding_type=encoding_type ) return self._build_list_bucket_result( bucket, diff --git a/src/boxdrive/store.py b/src/boxdrive/store.py index 0b1f225..5c050c3 100644 --- a/src/boxdrive/store.py +++ b/src/boxdrive/store.py @@ -43,6 +43,7 @@ async def list_objects( delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, marker: Key | None = None, + encoding_type: str | None = None, ) -> ListObjectsInfo: """List objects in a bucket.""" pass diff --git a/src/boxdrive/stores/_utils.py b/src/boxdrive/stores/_utils.py index 4942cf5..13a259e 100644 --- a/src/boxdrive/stores/_utils.py +++ b/src/boxdrive/stores/_utils.py @@ -3,6 +3,18 @@ from boxdrive.schemas import Key, ListObjectsInfo, ListObjectsV2Info, MaxKeys, ObjectInfo +def _encode_keys_and_prefixes( + objects: list[ObjectInfo], + common_prefixes: list[str], + *, + encoding_type: str | None = None, +) -> tuple[list[ObjectInfo], list[str]]: + if encoding_type == "url": + objects = [obj.model_copy(update={"key": urllib.parse.quote(obj.key)}) for obj in objects] + common_prefixes = [urllib.parse.quote(prefix) for prefix in common_prefixes] + return objects, common_prefixes + + def filter_objects( objects: list[ObjectInfo], *, @@ -10,6 +22,7 @@ def filter_objects( delimiter: str | None = None, max_keys: MaxKeys = 1000, marker: Key | None = None, + encoding_type: str | None = None, ) -> ListObjectsInfo: if prefix: objects = [obj for obj in objects if obj.key.startswith(prefix)] @@ -22,6 +35,7 @@ def filter_objects( objects = objects[:max_keys] objects, common_prefixes = _split_contents_and_prefixes(objects, prefix=prefix, delimiter=delimiter) + objects, common_prefixes = _encode_keys_and_prefixes(objects, common_prefixes, encoding_type=encoding_type) return ListObjectsInfo( is_truncated=is_truncated, common_prefixes=common_prefixes, @@ -51,10 +65,7 @@ def filter_objects_v2( objects = objects[:max_keys] objects, common_prefixes = _split_contents_and_prefixes(objects, prefix=prefix, delimiter=delimiter) - if encoding_type == "url": - for obj in objects: - obj.key = urllib.parse.quote(obj.key) - common_prefixes = [urllib.parse.quote(prefix) for prefix in common_prefixes] + objects, common_prefixes = _encode_keys_and_prefixes(objects, common_prefixes, encoding_type=encoding_type) return ListObjectsV2Info(objects=objects, is_truncated=is_truncated, common_prefixes=common_prefixes) diff --git a/src/boxdrive/stores/gitlab/store.py b/src/boxdrive/stores/gitlab/store.py index 7bf0c3f..41faae5 100644 --- a/src/boxdrive/stores/gitlab/store.py +++ b/src/boxdrive/stores/gitlab/store.py @@ -131,6 +131,7 @@ async def list_objects( delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, marker: Key | None = None, + encoding_type: str | None = None, ) -> ListObjectsInfo: """List objects in a bucket.""" @@ -142,6 +143,7 @@ def filter_objects(objects: list[ObjectInfo]) -> ListObjectsInfo: delimiter=delimiter, max_keys=max_keys, marker=marker, + encoding_type=encoding_type, ) async with self.lock.reader: diff --git a/src/boxdrive/stores/inmemory.py b/src/boxdrive/stores/inmemory.py index 843a9f8..fff10f0 100644 --- a/src/boxdrive/stores/inmemory.py +++ b/src/boxdrive/stores/inmemory.py @@ -82,12 +82,15 @@ async def list_objects( delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, marker: Key | None = None, + encoding_type: str | None = None, ) -> ListObjectsInfo: bucket = self.buckets.get(bucket_name) if bucket is None: raise exceptions.NoSuchBucket objects = [obj.info for obj in bucket.objects.values()] - return filter_objects(objects, prefix=prefix, delimiter=delimiter, max_keys=max_keys, marker=marker) + return filter_objects( + objects, prefix=prefix, delimiter=delimiter, max_keys=max_keys, marker=marker, encoding_type=encoding_type + ) async def list_objects_v2( self, diff --git a/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py b/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py index ca12eaa..b8bcb3e 100644 --- a/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py +++ b/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py @@ -256,6 +256,8 @@ def test_bucket_listv2_encoding_basic(): assert len(prefixes) == 3 assert prefixes == ['foo%2B1/', 'foo/', 'quux%20ab/'] +@pytest.mark.gitlab +@pytest.mark.inmemory def test_bucket_list_encoding_basic(): bucket_name = _create_objects(keys=['foo+1/bar', 'foo/bar/xyzzy', 'quux ab/thud', 'asdf+b']) client = get_client() From b5dcc63494fd1aee4cfe351403862a5fb6f9cdbe Mon Sep 17 00:00:00 2001 From: Alex Severin Date: Mon, 21 Jul 2025 11:51:30 +0300 Subject: [PATCH 4/5] specify safe chars for url escape --- src/boxdrive/stores/_utils.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/boxdrive/stores/_utils.py b/src/boxdrive/stores/_utils.py index 13a259e..c924db3 100644 --- a/src/boxdrive/stores/_utils.py +++ b/src/boxdrive/stores/_utils.py @@ -3,18 +3,6 @@ from boxdrive.schemas import Key, ListObjectsInfo, ListObjectsV2Info, MaxKeys, ObjectInfo -def _encode_keys_and_prefixes( - objects: list[ObjectInfo], - common_prefixes: list[str], - *, - encoding_type: str | None = None, -) -> tuple[list[ObjectInfo], list[str]]: - if encoding_type == "url": - objects = [obj.model_copy(update={"key": urllib.parse.quote(obj.key)}) for obj in objects] - common_prefixes = [urllib.parse.quote(prefix) for prefix in common_prefixes] - return objects, common_prefixes - - def filter_objects( objects: list[ObjectInfo], *, @@ -88,3 +76,26 @@ def _split_contents_and_prefixes( else: contents.append(obj) return contents, sorted(common_prefixes) + + +def _encode_keys_and_prefixes( + objects: list[ObjectInfo], + common_prefixes: list[str], + *, + encoding_type: str | None = None, +) -> tuple[list[ObjectInfo], list[str]]: + SAFE = [ + "-", + "_", + ".", + "/", + "*", + ] + + def quote(s: str) -> str: + return urllib.parse.quote(s, safe="".join(SAFE)) + + if encoding_type == "url": + objects = [obj.model_copy(update={"key": quote(obj.key)}) for obj in objects] + common_prefixes = [quote(prefix) for prefix in common_prefixes] + return objects, common_prefixes From 11431e0ca38a5aa4f95fca44940aeb6926cfb507 Mon Sep 17 00:00:00 2001 From: Alex Severin Date: Mon, 21 Jul 2025 13:57:45 +0300 Subject: [PATCH 5/5] next marker --- README.md | 6 +-- examples/custom_store.pyi | 6 +-- src/boxdrive/handlers.py | 4 +- src/boxdrive/middleware.py | 2 +- src/boxdrive/s3.py | 49 ++++++++++--------- src/boxdrive/schemas/store.py | 2 +- src/boxdrive/schemas/xml.py | 1 + src/boxdrive/store.py | 6 +-- src/boxdrive/stores/_utils.py | 13 ++++- src/boxdrive/stores/inmemory.py | 4 +- .../s3tests_boto3/functional/test_s3.py | 17 ++++--- 11 files changed, 63 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 4e9088e..991bb7a 100644 --- a/README.md +++ b/README.md @@ -78,10 +78,10 @@ class MyCustomStore(ObjectStore): self, bucket_name: BucketName, *, - prefix: Key | None = None, + prefix: str | None = None, delimiter: str | None = None, max_keys: MaxKeys = 1000, - marker: Key | None = None, + marker: str | None = None, encoding_type: str | None = None, ) -> ListObjectsInfo: ... async def list_objects_v2( @@ -92,7 +92,7 @@ class MyCustomStore(ObjectStore): delimiter: str | None = None, encoding_type: str | None = None, max_keys: MaxKeys = 1000, - prefix: Key | None = None, + prefix: str | None = None, start_after: Key | None = None, ) -> ListObjectsV2Info: ... ``` diff --git a/examples/custom_store.pyi b/examples/custom_store.pyi index 7cdb243..a2ea70f 100644 --- a/examples/custom_store.pyi +++ b/examples/custom_store.pyi @@ -25,10 +25,10 @@ class MyCustomStore(ObjectStore): self, bucket_name: BucketName, *, - prefix: Key | None = None, + prefix: str | None = None, delimiter: str | None = None, max_keys: MaxKeys = 1000, - marker: Key | None = None, + marker: str | None = None, encoding_type: str | None = None, ) -> ListObjectsInfo: ... async def list_objects_v2( @@ -39,6 +39,6 @@ class MyCustomStore(ObjectStore): delimiter: str | None = None, encoding_type: str | None = None, max_keys: MaxKeys = 1000, - prefix: Key | None = None, + prefix: str | None = None, start_after: Key | None = None, ) -> ListObjectsV2Info: ... diff --git a/src/boxdrive/handlers.py b/src/boxdrive/handlers.py index 1a99596..b0b4bbb 100644 --- a/src/boxdrive/handlers.py +++ b/src/boxdrive/handlers.py @@ -33,10 +33,10 @@ async def list_buckets(s3: S3Dep) -> XMLResponse: @router.get("/{bucket}") async def list_objects( bucket: BucketName, - prefix: Key | None = Query(None), + prefix: str | None = Query(None), delimiter: str | None = Query(None), max_keys: MaxKeys = Query(constants.MAX_KEYS, alias="max-keys"), - marker: Key | None = Query(None), + marker: str | None = Query(None), continuation_token: Key | None = Query(None, alias="continuation-token"), start_after: Key | None = Query(None, alias="start-after"), list_type: Literal["1", "2"] = Query("1", alias="list-type"), diff --git a/src/boxdrive/middleware.py b/src/boxdrive/middleware.py index 375c917..68fc435 100644 --- a/src/boxdrive/middleware.py +++ b/src/boxdrive/middleware.py @@ -41,9 +41,9 @@ async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) - logger.info( "Response info: %s", { + "status_code": status_code, "method": method, "path": path, - "status_code": status_code, "process_time": f"{process_time:.3f}s", "content_length": content_length, }, diff --git a/src/boxdrive/s3.py b/src/boxdrive/s3.py index dfee603..ae2bf25 100644 --- a/src/boxdrive/s3.py +++ b/src/boxdrive/s3.py @@ -30,62 +30,66 @@ async def list_buckets(self) -> xml.ListAllMyBucketsResult: buckets_model = xml.Buckets(buckets=buckets_xml) return xml.ListAllMyBucketsResult(owner=owner, buckets=buckets_model) - @tracer.start_as_current_span("list_objects_v2") - async def list_objects_v2( + @tracer.start_as_current_span("list_objects") + async def list_objects( self, bucket: BucketName, - prefix: Key | None = None, + prefix: str | None = None, delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, - continuation_token: Key | None = None, - start_after: Key | None = None, + marker: Key | None = None, encoding_type: str | None = None, ) -> xml.ListBucketResult: - objects_info = await self.store.list_objects_v2( - bucket, - prefix=prefix, - delimiter=delimiter, - max_keys=max_keys, - continuation_token=continuation_token, - start_after=start_after, - encoding_type=encoding_type, + objects_info = await self.store.list_objects( + bucket, prefix=prefix, delimiter=delimiter, max_keys=max_keys, marker=marker, encoding_type=encoding_type ) return self._build_list_bucket_result( bucket, - objects_info, + next_marker=objects_info.next_marker, + objects_info=objects_info, prefix=prefix, delimiter=delimiter, max_keys=max_keys, ) - @tracer.start_as_current_span("list_objects") - async def list_objects( + @tracer.start_as_current_span("list_objects_v2") + async def list_objects_v2( self, bucket: BucketName, - prefix: Key | None = None, + prefix: str | None = None, delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, - marker: Key | None = None, + continuation_token: Key | None = None, + start_after: Key | None = None, encoding_type: str | None = None, ) -> xml.ListBucketResult: - objects_info = await self.store.list_objects( - bucket, prefix=prefix, delimiter=delimiter, max_keys=max_keys, marker=marker, encoding_type=encoding_type + objects_info = await self.store.list_objects_v2( + bucket, + prefix=prefix, + delimiter=delimiter, + max_keys=max_keys, + continuation_token=continuation_token, + start_after=start_after, + encoding_type=encoding_type, ) return self._build_list_bucket_result( bucket, - objects_info, + objects_info=objects_info, prefix=prefix, delimiter=delimiter, max_keys=max_keys, ) + # TODO: exclude None NextMarker from response def _build_list_bucket_result( self, bucket: BucketName, + *, objects_info: BaseListObjectsInfo, - prefix: Key | None = None, + prefix: str | None = None, delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, + next_marker: str = "", ) -> xml.ListBucketResult: objects: list[xml.Content] = [] for obj in objects_info.objects: @@ -107,6 +111,7 @@ def _build_list_bucket_result( key_count=len(objects) + len(objects_info.common_prefixes), is_truncated=objects_info.is_truncated, delimiter=delimiter, + next_marker=next_marker or None, contents=objects, common_prefixes=[xml.CommonPrefix(prefix=prefix) for prefix in objects_info.common_prefixes], ) diff --git a/src/boxdrive/schemas/store.py b/src/boxdrive/schemas/store.py index d1c0073..6f33484 100644 --- a/src/boxdrive/schemas/store.py +++ b/src/boxdrive/schemas/store.py @@ -104,7 +104,7 @@ class BaseListObjectsInfo(BaseModel): class ListObjectsInfo(BaseListObjectsInfo): - pass + next_marker: str = "" class ListObjectsV2Info(BaseListObjectsInfo): diff --git a/src/boxdrive/schemas/xml.py b/src/boxdrive/schemas/xml.py index 3768326..fd1dda1 100644 --- a/src/boxdrive/schemas/xml.py +++ b/src/boxdrive/schemas/xml.py @@ -66,3 +66,4 @@ class ListBucketResult(BaseXmlModel): delimiter: str | None = element(tag="Delimiter", default=None) contents: list[Content] = element(tag="Contents") common_prefixes: list[CommonPrefix] = element(tag="CommonPrefixes") + next_marker: str | None = element(tag="NextMarker", default=None) diff --git a/src/boxdrive/store.py b/src/boxdrive/store.py index 5c050c3..f482d61 100644 --- a/src/boxdrive/store.py +++ b/src/boxdrive/store.py @@ -39,10 +39,10 @@ async def list_objects( self, bucket_name: BucketName, *, - prefix: Key | None = None, + prefix: str | None = None, delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, - marker: Key | None = None, + marker: str | None = None, encoding_type: str | None = None, ) -> ListObjectsInfo: """List objects in a bucket.""" @@ -57,7 +57,7 @@ async def list_objects_v2( delimiter: str | None = None, encoding_type: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, - prefix: Key | None = None, + prefix: str | None = None, start_after: Key | None = None, ) -> ListObjectsV2Info: """List objects in a bucket.""" diff --git a/src/boxdrive/stores/_utils.py b/src/boxdrive/stores/_utils.py index c924db3..bcd9cdb 100644 --- a/src/boxdrive/stores/_utils.py +++ b/src/boxdrive/stores/_utils.py @@ -6,7 +6,7 @@ def filter_objects( objects: list[ObjectInfo], *, - prefix: Key | None = None, + prefix: str | None = None, delimiter: str | None = None, max_keys: MaxKeys = 1000, marker: Key | None = None, @@ -24,10 +24,19 @@ def filter_objects( objects, common_prefixes = _split_contents_and_prefixes(objects, prefix=prefix, delimiter=delimiter) objects, common_prefixes = _encode_keys_and_prefixes(objects, common_prefixes, encoding_type=encoding_type) + + next_marker = "" + if is_truncated: + if common_prefixes: + next_marker = common_prefixes[-1] + elif objects: + next_marker = objects[-1].key + return ListObjectsInfo( is_truncated=is_truncated, common_prefixes=common_prefixes, objects=objects, + next_marker=next_marker, ) @@ -38,7 +47,7 @@ def filter_objects_v2( delimiter: str | None = None, encoding_type: str | None = None, max_keys: MaxKeys = 1000, - prefix: Key | None = None, + prefix: str | None = None, start_after: Key | None = None, ) -> ListObjectsV2Info: if prefix: diff --git a/src/boxdrive/stores/inmemory.py b/src/boxdrive/stores/inmemory.py index fff10f0..116fb47 100644 --- a/src/boxdrive/stores/inmemory.py +++ b/src/boxdrive/stores/inmemory.py @@ -78,7 +78,7 @@ async def list_objects( self, bucket_name: str, *, - prefix: Key | None = None, + prefix: str | None = None, delimiter: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, marker: Key | None = None, @@ -100,7 +100,7 @@ async def list_objects_v2( delimiter: str | None = None, encoding_type: str | None = None, max_keys: MaxKeys = constants.MAX_KEYS, - prefix: Key | None = None, + prefix: str | None = None, start_after: Key | None = None, ) -> ListObjectsV2Info: bucket = self.buckets.get(bucket_name) diff --git a/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py b/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py index b8bcb3e..9acd6a4 100644 --- a/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py +++ b/tests/third_party/s3-tests/s3tests_boto3/functional/test_s3.py @@ -319,6 +319,7 @@ def validate_bucket_listv2(bucket_name, prefix, delimiter, continuation_token, m return response['NextContinuationToken'] +@pytest.mark.inmemory @pytest.mark.fails_on_dbstore def test_bucket_list_delimiter_prefix(): bucket_name = _create_objects(keys=['asdf', 'boo/bar', 'boo/baz/xyzzy', 'cquux/thud', 'cquux/bla']) @@ -327,19 +328,19 @@ def test_bucket_list_delimiter_prefix(): marker = '' prefix = '' - marker = validate_bucket_list(bucket_name, prefix, delim, '', 1, True, ['asdf'], [], 'asdf') - marker = validate_bucket_list(bucket_name, prefix, delim, marker, 1, True, [], ['boo/'], 'boo/') - marker = validate_bucket_list(bucket_name, prefix, delim, marker, 1, False, [], ['cquux/'], None) + marker = validate_bucket_list(bucket_name, prefix, delim, '', 1, True, ['asdf'], [], next_marker='asdf') + marker = validate_bucket_list(bucket_name, prefix, delim, marker, 1, True, [], ['boo/'], next_marker='boo/') + #TODO: marker = validate_bucket_list(bucket_name, prefix, delim, marker, 1, False, [], ['cquux/'], next_marker=None) - marker = validate_bucket_list(bucket_name, prefix, delim, '', 2, True, ['asdf'], ['boo/'], 'boo/') - marker = validate_bucket_list(bucket_name, prefix, delim, marker, 2, False, [], ['cquux/'], None) + marker = validate_bucket_list(bucket_name, prefix, delim, '', 2, True, ['asdf'], ['boo/'], next_marker='boo/') + #TODO: marker = validate_bucket_list(bucket_name, prefix, delim, marker, 2, False, [], ['cquux/'], next_marker=None) prefix = 'boo/' - marker = validate_bucket_list(bucket_name, prefix, delim, '', 1, True, ['boo/bar'], [], 'boo/bar') - marker = validate_bucket_list(bucket_name, prefix, delim, marker, 1, False, [], ['boo/baz/'], None) + marker = validate_bucket_list(bucket_name, prefix, delim, '', 1, True, ['boo/bar'], [], next_marker='boo/bar') + marker = validate_bucket_list(bucket_name, prefix, delim, marker, 1, False, [], ['boo/baz/'], next_marker='') - marker = validate_bucket_list(bucket_name, prefix, delim, '', 2, False, ['boo/bar'], ['boo/baz/'], None) + marker = validate_bucket_list(bucket_name, prefix, delim, '', 2, False, ['boo/bar'], ['boo/baz/'], next_marker='') @pytest.mark.list_objects_v2 @pytest.mark.fails_on_dbstore