Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions pyiceberg/catalog/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,15 +619,28 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi
table_stmt = select(IcebergTables.table_namespace).where(IcebergTables.catalog_name == self.name)
namespace_stmt = select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name == self.name)
if namespace:
namespace_str = Catalog.namespace_to_string(namespace, NoSuchNamespaceError)
table_stmt = table_stmt.where(IcebergTables.table_namespace.like(namespace_str))
namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str))
namespace_like = Catalog.namespace_to_string(namespace, NoSuchNamespaceError) + "%"
table_stmt = table_stmt.where(IcebergTables.table_namespace.like(namespace_like))
namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_like))
stmt = union(
table_stmt,
namespace_stmt,
)
with Session(self.engine) as session:
return [Catalog.identifier_to_tuple(namespace_col) for namespace_col in session.execute(stmt).scalars()]
namespace_tuple = Catalog.identifier_to_tuple(namespace)
sub_namespaces_level_length = len(namespace_tuple) + 1

namespaces = list(
{ # only get distinct namespaces
ns[:sub_namespaces_level_length] # truncate to the required level
for ns in {Catalog.identifier_to_tuple(ns) for ns in session.execute(stmt).scalars()}
if len(ns) >= sub_namespaces_level_length # only get sub namespaces/children
and ns[: sub_namespaces_level_length - 1] == namespace_tuple
# exclude fuzzy matches when `namespace` contains `%` or `_`
}
)

return namespaces

def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties:
"""Get properties for a namespace.
Expand Down
19 changes: 12 additions & 7 deletions tests/catalog/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def test_rename_table(catalog: InMemoryCatalog) -> None:
assert table._identifier == Catalog.identifier_to_tuple(new_table)

# And
assert ("new", "namespace") in catalog.list_namespaces()
assert catalog._namespace_exists(table._identifier[:-1])

# And
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
Expand All @@ -338,7 +338,7 @@ def test_rename_table_from_self_identifier(catalog: InMemoryCatalog) -> None:
assert new_table._identifier == Catalog.identifier_to_tuple(new_table_name)

# And
assert ("new", "namespace") in catalog.list_namespaces()
assert catalog._namespace_exists(new_table._identifier[:-1])

# And
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
Expand All @@ -352,7 +352,7 @@ def test_create_namespace(catalog: InMemoryCatalog) -> None:
catalog.create_namespace(TEST_TABLE_NAMESPACE, TEST_TABLE_PROPERTIES)

# Then
assert TEST_TABLE_NAMESPACE in catalog.list_namespaces()
assert catalog._namespace_exists(TEST_TABLE_NAMESPACE)
assert TEST_TABLE_PROPERTIES == catalog.load_namespace_properties(TEST_TABLE_NAMESPACE)


Expand All @@ -375,7 +375,12 @@ def test_list_namespaces(catalog: InMemoryCatalog) -> None:
# When
namespaces = catalog.list_namespaces()
# Then
assert TEST_TABLE_NAMESPACE in namespaces
assert TEST_TABLE_NAMESPACE[:1] in namespaces

# When
namespaces = catalog.list_namespaces(TEST_TABLE_NAMESPACE)
# Then
assert not namespaces


def test_drop_namespace(catalog: InMemoryCatalog) -> None:
Expand All @@ -384,7 +389,7 @@ def test_drop_namespace(catalog: InMemoryCatalog) -> None:
# When
catalog.drop_namespace(TEST_TABLE_NAMESPACE)
# Then
assert TEST_TABLE_NAMESPACE not in catalog.list_namespaces()
assert not catalog._namespace_exists(TEST_TABLE_NAMESPACE)


def test_drop_namespace_raises_error_when_namespace_does_not_exist(catalog: InMemoryCatalog) -> None:
Expand Down Expand Up @@ -433,7 +438,7 @@ def test_update_namespace_metadata(catalog: InMemoryCatalog) -> None:
summary = catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, updates=new_metadata)

# Then
assert TEST_TABLE_NAMESPACE in catalog.list_namespaces()
assert catalog._namespace_exists(TEST_TABLE_NAMESPACE)
assert new_metadata.items() <= catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).items()
assert summary.removed == []
assert sorted(summary.updated) == ["key3", "key4"]
Expand All @@ -450,7 +455,7 @@ def test_update_namespace_metadata_removals(catalog: InMemoryCatalog) -> None:
summary = catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, remove_metadata, new_metadata)

# Then
assert TEST_TABLE_NAMESPACE in catalog.list_namespaces()
assert catalog._namespace_exists(TEST_TABLE_NAMESPACE)
assert new_metadata.items() <= catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).items()
assert remove_metadata.isdisjoint(catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).keys())
assert summary.removed == ["key1"]
Expand Down
51 changes: 38 additions & 13 deletions tests/catalog/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import os
from pathlib import Path
from typing import Any, Generator, List, cast
from typing import Any, Generator, cast

import pyarrow as pa
import pytest
Expand Down Expand Up @@ -1027,7 +1027,7 @@ def test_create_namespace_if_not_exists(catalog: SqlCatalog, database_name: str)
@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")])
def test_create_namespace(catalog: SqlCatalog, namespace: str) -> None:
catalog.create_namespace(namespace)
assert (Catalog.identifier_to_tuple(namespace)) in catalog.list_namespaces()
assert (Catalog.identifier_to_tuple(namespace)[:1]) in catalog.list_namespaces()


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1074,7 +1074,7 @@ def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, namespa
}
catalog.create_namespace(namespace=namespace, properties=test_properties)
loaded_database_list = catalog.list_namespaces()
assert Catalog.identifier_to_tuple(namespace) in loaded_database_list
assert Catalog.identifier_to_tuple(namespace)[:1] in loaded_database_list
properties = catalog.load_namespace_properties(namespace)
assert properties["comment"] == "this is a test description"
assert properties["location"] == test_location
Expand Down Expand Up @@ -1135,17 +1135,42 @@ def test_namespace_exists(catalog: SqlCatalog) -> None:
lazy_fixture("catalog_sqlite"),
],
)
@pytest.mark.parametrize("namespace_list", [lazy_fixture("database_list"), lazy_fixture("hierarchical_namespace_list")])
def test_list_namespaces(catalog: SqlCatalog, namespace_list: List[str]) -> None:
def test_list_namespaces(catalog: SqlCatalog) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for adding this test!

namespace_list = ["db", "db.ns1", "db.ns1.ns2", "db.ns2", "db2", "db2.ns1", "db%"]
for namespace in namespace_list:
catalog.create_namespace(namespace)
# Test global list
if not catalog._namespace_exists(namespace):
catalog.create_namespace(namespace)

ns_list = catalog.list_namespaces()
for ns in [("db",), ("db%",), ("db2",)]:
assert ns in ns_list

ns_list = catalog.list_namespaces("db")
assert sorted(ns_list) == [("db", "ns1"), ("db", "ns2")]

ns_list = catalog.list_namespaces("db.ns1")
assert sorted(ns_list) == [("db", "ns1", "ns2")]

ns_list = catalog.list_namespaces("db.ns1.ns2")
assert len(ns_list) == 0


@pytest.mark.parametrize(
"catalog",
[
lazy_fixture("catalog_memory"),
lazy_fixture("catalog_sqlite"),
],
)
def test_list_namespaces_fuzzy_match(catalog: SqlCatalog) -> None:
namespace_list = ["db.ns1", "db.ns1.ns2", "db.ns2", "db.ns1X.ns3", "db_.ns1.ns2", "db2.ns1.ns2"]
for namespace in namespace_list:
assert Catalog.identifier_to_tuple(namespace) in ns_list
# Test individual namespace list
assert len(one_namespace := catalog.list_namespaces(namespace)) == 1
assert Catalog.identifier_to_tuple(namespace) == one_namespace[0]
if not catalog._namespace_exists(namespace):
catalog.create_namespace(namespace)

assert catalog.list_namespaces("db.ns1") == [("db", "ns1", "ns2")]

assert catalog.list_namespaces("db_.ns1") == [("db_", "ns1", "ns2")]


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1177,13 +1202,13 @@ def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None:
def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None:
namespace = Catalog.namespace_from(table_identifier)
catalog.create_namespace(namespace)
assert namespace in catalog.list_namespaces()
assert catalog._namespace_exists(namespace)
catalog.create_table(table_identifier, table_schema_nested)
with pytest.raises(NamespaceNotEmptyError):
catalog.drop_namespace(namespace)
catalog.drop_table(table_identifier)
catalog.drop_namespace(namespace)
assert namespace not in catalog.list_namespaces()
assert not catalog._namespace_exists(namespace)


@pytest.mark.parametrize(
Expand Down