diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index e656fbed64..b4b06e3a46 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -619,15 +619,28 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi table_stmt = select(IcebergTables.table_namespace).where(IcebergTables.catalog_name == self.name) namespace_stmt = select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name == self.name) if namespace: - namespace_str = Catalog.namespace_to_string(namespace, NoSuchNamespaceError) - table_stmt = table_stmt.where(IcebergTables.table_namespace.like(namespace_str)) - namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str)) + namespace_like = Catalog.namespace_to_string(namespace, NoSuchNamespaceError) + "%" + table_stmt = table_stmt.where(IcebergTables.table_namespace.like(namespace_like)) + namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_like)) stmt = union( table_stmt, namespace_stmt, ) with Session(self.engine) as session: - return [Catalog.identifier_to_tuple(namespace_col) for namespace_col in session.execute(stmt).scalars()] + namespace_tuple = Catalog.identifier_to_tuple(namespace) + sub_namespaces_level_length = len(namespace_tuple) + 1 + + namespaces = list( + { # only get distinct namespaces + ns[:sub_namespaces_level_length] # truncate to the required level + for ns in {Catalog.identifier_to_tuple(ns) for ns in session.execute(stmt).scalars()} + if len(ns) >= sub_namespaces_level_length # only get sub namespaces/children + and ns[: sub_namespaces_level_length - 1] == namespace_tuple + # exclude fuzzy matches when `namespace` contains `%` or `_` + } + ) + + return namespaces def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties: """Get properties for a namespace. diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py index c00f4fde95..6526022a49 100644 --- a/tests/catalog/test_base.py +++ b/tests/catalog/test_base.py @@ -314,7 +314,7 @@ def test_rename_table(catalog: InMemoryCatalog) -> None: assert table._identifier == Catalog.identifier_to_tuple(new_table) # And - assert ("new", "namespace") in catalog.list_namespaces() + assert catalog._namespace_exists(table._identifier[:-1]) # And with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR): @@ -338,7 +338,7 @@ def test_rename_table_from_self_identifier(catalog: InMemoryCatalog) -> None: assert new_table._identifier == Catalog.identifier_to_tuple(new_table_name) # And - assert ("new", "namespace") in catalog.list_namespaces() + assert catalog._namespace_exists(new_table._identifier[:-1]) # And with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR): @@ -352,7 +352,7 @@ def test_create_namespace(catalog: InMemoryCatalog) -> None: catalog.create_namespace(TEST_TABLE_NAMESPACE, TEST_TABLE_PROPERTIES) # Then - assert TEST_TABLE_NAMESPACE in catalog.list_namespaces() + assert catalog._namespace_exists(TEST_TABLE_NAMESPACE) assert TEST_TABLE_PROPERTIES == catalog.load_namespace_properties(TEST_TABLE_NAMESPACE) @@ -375,7 +375,12 @@ def test_list_namespaces(catalog: InMemoryCatalog) -> None: # When namespaces = catalog.list_namespaces() # Then - assert TEST_TABLE_NAMESPACE in namespaces + assert TEST_TABLE_NAMESPACE[:1] in namespaces + + # When + namespaces = catalog.list_namespaces(TEST_TABLE_NAMESPACE) + # Then + assert not namespaces def test_drop_namespace(catalog: InMemoryCatalog) -> None: @@ -384,7 +389,7 @@ def test_drop_namespace(catalog: InMemoryCatalog) -> None: # When catalog.drop_namespace(TEST_TABLE_NAMESPACE) # Then - assert TEST_TABLE_NAMESPACE not in catalog.list_namespaces() + assert not catalog._namespace_exists(TEST_TABLE_NAMESPACE) def test_drop_namespace_raises_error_when_namespace_does_not_exist(catalog: InMemoryCatalog) -> None: @@ -433,7 +438,7 @@ def test_update_namespace_metadata(catalog: InMemoryCatalog) -> None: summary = catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, updates=new_metadata) # Then - assert TEST_TABLE_NAMESPACE in catalog.list_namespaces() + assert catalog._namespace_exists(TEST_TABLE_NAMESPACE) assert new_metadata.items() <= catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).items() assert summary.removed == [] assert sorted(summary.updated) == ["key3", "key4"] @@ -450,7 +455,7 @@ def test_update_namespace_metadata_removals(catalog: InMemoryCatalog) -> None: summary = catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, remove_metadata, new_metadata) # Then - assert TEST_TABLE_NAMESPACE in catalog.list_namespaces() + assert catalog._namespace_exists(TEST_TABLE_NAMESPACE) assert new_metadata.items() <= catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).items() assert remove_metadata.isdisjoint(catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).keys()) assert summary.removed == ["key1"] diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 33a76f7308..8c3047b2ca 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -17,7 +17,7 @@ import os from pathlib import Path -from typing import Any, Generator, List, cast +from typing import Any, Generator, cast import pyarrow as pa import pytest @@ -1027,7 +1027,7 @@ def test_create_namespace_if_not_exists(catalog: SqlCatalog, database_name: str) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) def test_create_namespace(catalog: SqlCatalog, namespace: str) -> None: catalog.create_namespace(namespace) - assert (Catalog.identifier_to_tuple(namespace)) in catalog.list_namespaces() + assert (Catalog.identifier_to_tuple(namespace)[:1]) in catalog.list_namespaces() @pytest.mark.parametrize( @@ -1074,7 +1074,7 @@ def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, namespa } catalog.create_namespace(namespace=namespace, properties=test_properties) loaded_database_list = catalog.list_namespaces() - assert Catalog.identifier_to_tuple(namespace) in loaded_database_list + assert Catalog.identifier_to_tuple(namespace)[:1] in loaded_database_list properties = catalog.load_namespace_properties(namespace) assert properties["comment"] == "this is a test description" assert properties["location"] == test_location @@ -1135,17 +1135,42 @@ def test_namespace_exists(catalog: SqlCatalog) -> None: lazy_fixture("catalog_sqlite"), ], ) -@pytest.mark.parametrize("namespace_list", [lazy_fixture("database_list"), lazy_fixture("hierarchical_namespace_list")]) -def test_list_namespaces(catalog: SqlCatalog, namespace_list: List[str]) -> None: +def test_list_namespaces(catalog: SqlCatalog) -> None: + namespace_list = ["db", "db.ns1", "db.ns1.ns2", "db.ns2", "db2", "db2.ns1", "db%"] for namespace in namespace_list: - catalog.create_namespace(namespace) - # Test global list + if not catalog._namespace_exists(namespace): + catalog.create_namespace(namespace) + ns_list = catalog.list_namespaces() + for ns in [("db",), ("db%",), ("db2",)]: + assert ns in ns_list + + ns_list = catalog.list_namespaces("db") + assert sorted(ns_list) == [("db", "ns1"), ("db", "ns2")] + + ns_list = catalog.list_namespaces("db.ns1") + assert sorted(ns_list) == [("db", "ns1", "ns2")] + + ns_list = catalog.list_namespaces("db.ns1.ns2") + assert len(ns_list) == 0 + + +@pytest.mark.parametrize( + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +def test_list_namespaces_fuzzy_match(catalog: SqlCatalog) -> None: + namespace_list = ["db.ns1", "db.ns1.ns2", "db.ns2", "db.ns1X.ns3", "db_.ns1.ns2", "db2.ns1.ns2"] for namespace in namespace_list: - assert Catalog.identifier_to_tuple(namespace) in ns_list - # Test individual namespace list - assert len(one_namespace := catalog.list_namespaces(namespace)) == 1 - assert Catalog.identifier_to_tuple(namespace) == one_namespace[0] + if not catalog._namespace_exists(namespace): + catalog.create_namespace(namespace) + + assert catalog.list_namespaces("db.ns1") == [("db", "ns1", "ns2")] + + assert catalog.list_namespaces("db_.ns1") == [("db_", "ns1", "ns2")] @pytest.mark.parametrize( @@ -1177,13 +1202,13 @@ def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None: def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: namespace = Catalog.namespace_from(table_identifier) catalog.create_namespace(namespace) - assert namespace in catalog.list_namespaces() + assert catalog._namespace_exists(namespace) catalog.create_table(table_identifier, table_schema_nested) with pytest.raises(NamespaceNotEmptyError): catalog.drop_namespace(namespace) catalog.drop_table(table_identifier) catalog.drop_namespace(namespace) - assert namespace not in catalog.list_namespaces() + assert not catalog._namespace_exists(namespace) @pytest.mark.parametrize(