Skip to content

Commit caeeb50

Browse files
authored
Fix globbing top level buckets and (#312)
* Add regression tests for 311 * form paths properly in glob and error above bucket * changelog and version
1 parent 2556df0 commit caeeb50

File tree

4 files changed

+32
-3
lines changed

4 files changed

+32
-3
lines changed

HISTORY.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# cloudpathlib Changelog
22

3+
## v0.12.1 (2023-01-04)
4+
5+
- Fix glob logic for buckets; add regression test; add error on globbing all buckets ([Issue #311](https://github.com/drivendataorg/cloudpathlib/issues/311), [PR #312](https://github.com/drivendataorg/cloudpathlib/pull/312))
6+
37
## v0.12.0 (2022-12-30)
48

59
- API Change: `S3Client` supports an `extra_args` kwarg now to pass extra args down to `boto3` functions; this enables Requester Pays bucket access and bucket encryption. (Issues [#254](https://github.com/drivendataorg/cloudpathlib/issues/254), [#180](https://github.com/drivendataorg/cloudpathlib/issues/180); [PR #307](https://github.com/drivendataorg/cloudpathlib/pull/307))

cloudpathlib/cloudpath.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,11 @@ def _glob_checks(self, pattern: str) -> None:
357357
if pattern.startswith(self.cloud_prefix) or pattern.startswith("/"):
358358
raise CloudPathNotImplementedError("Non-relative patterns are unsupported")
359359

360+
if self.drive == "":
361+
raise CloudPathNotImplementedError(
362+
".glob is only supported within a bucket or container; you can use `.iterdir` to list buckets; for example, CloudPath('s3://').iterdir()"
363+
)
364+
360365
def _glob(
361366
self: DerivedCloudPath, selector, recursive: bool
362367
) -> Generator[DerivedCloudPath, None, None]:
@@ -390,12 +395,13 @@ def _build_tree(trunk, branch, nodes, is_dir):
390395

391396
root = _CloudPathSelectable(
392397
self.name,
393-
[p.name for p in self.parents[:-1]], # all parents except bucket/container
398+
[], # nothing above self will be returned, so initial parents is empty
394399
file_tree,
395400
)
396401

397402
for p in selector.select_from(root):
398-
yield self.client.CloudPath(f"{self.cloud_prefix}{self.drive}/{p}")
403+
# select_from returns self.name/... so strip before joining
404+
yield (self / str(p)[len(self.name) + 1 :])
399405

400406
def glob(self: DerivedCloudPath, pattern: str) -> Generator[DerivedCloudPath, None, None]:
401407
self._glob_checks(pattern)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,5 @@ def load_requirements(path: Path):
6161
"Source Code": "https://github.com/drivendataorg/cloudpathlib",
6262
},
6363
url="https://github.com/drivendataorg/cloudpathlib",
64-
version="0.12.0",
64+
version="0.12.1",
6565
)

tests/test_cloudpath_file_io.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,25 @@ def _check_glob(pattern, glob_method):
191191
)
192192

193193

194+
def test_glob_buckets(rig):
195+
# CloudPath("s3://").glob("*") results in error
196+
drive_level = rig.path_class(rig.path_class.cloud_prefix)
197+
198+
with pytest.raises(CloudPathNotImplementedError):
199+
list(drive_level.glob("*"))
200+
201+
# CloudPath("s3://bucket").glob("*") should work
202+
# bucket level glob returns correct results
203+
# regression test for #311
204+
bucket = rig.path_class(f"{rig.path_class.cloud_prefix}{rig.drive}")
205+
206+
first_result = next(bucket.glob("*"))
207+
208+
# assert all parts are unique
209+
assert first_result.drive == rig.drive
210+
assert len(first_result.parts) == len(set(first_result.parts))
211+
212+
194213
def test_glob_many_open_files(rig):
195214
# test_glob_many_open_files
196215
# Adapted from: https://github.com/python/cpython/blob/7ffe7ba30fc051014977c6f393c51e57e71a6648/Lib/test/test_pathlib.py#L1697-L1712

0 commit comments

Comments
 (0)