Skip to content

Commit c59637e

Browse files
authored
[FIX]: Fix issue with cloning of external tables (databricks#1095)
2 parents b2bef75 + f379790 commit c59637e

File tree

10 files changed

+104
-32
lines changed

10 files changed

+104
-32
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
## dbt-databricks 1.10.5 (TBD)
22

3+
### Features
4+
5+
- Add cloning support for the the external tables (thanks @samgans!) ([1079](https://github.com/databricks/dbt-databricks/pull/1079))
6+
37
### Fixes
48

59
- Fix inefficient query when getting column schema for MV/STs ([1074](https://github.com/databricks/dbt-databricks/issues/1074))
610
- Fix bug causing false positives in diffing constraints between existing relation and model config for incremental runs ([1081](https://github.com/databricks/dbt-databricks/issues/1081))
711
- Fix bug causing "main is not being called during running model" errors for some view updates ([1077](https://github.com/databricks/dbt-databricks/issues/1077))
12+
- Fix the bugs with external tabls cloning [1095](https://github.com/databricks/dbt-databricks/pull/1095) (thanks @samgans!)
813

914
### Under the Hood
1015

@@ -16,7 +21,6 @@
1621

1722
- Support column tags (with Materialization V2) ([649](https://github.com/databricks/dbt-databricks/issues/649))
1823
- Support column masking (with Materialization V2) ([670](https://github.com/databricks/dbt-databricks/issues/670))
19-
- Add cloning support for the the external tables (thanks @samgans!) ([1079](https://github.com/databricks/dbt-databricks/pull/1079))
2024

2125
### Fixes
2226

dbt/adapters/databricks/impl.py

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from dataclasses import dataclass
99
from importlib import metadata
1010
from multiprocessing.context import SpawnContext
11-
from typing import TYPE_CHECKING, Any, ClassVar, Generic, Optional, Union, cast
11+
from typing import TYPE_CHECKING, Any, ClassVar, Generic, NamedTuple, Optional, Union, cast
1212
from uuid import uuid4
1313

1414
from dbt_common.behavior_flags import BehaviorFlag
@@ -123,6 +123,14 @@
123123
) # type: ignore[typeddict-item]
124124

125125

126+
class DatabricksRelationInfo(NamedTuple):
127+
table_name: str
128+
table_type: str
129+
file_format: Optional[str]
130+
table_owner: Optional[str]
131+
databricks_table_type: Optional[str]
132+
133+
126134
@dataclass
127135
class DatabricksConfig(AdapterConfig):
128136
file_format: str = "delta"
@@ -326,23 +334,30 @@ def execute(
326334
def list_relations_without_caching( # type: ignore[override]
327335
self, schema_relation: DatabricksRelation
328336
) -> list[DatabricksRelation]:
329-
empty: list[tuple[Optional[str], Optional[str], Optional[str], Optional[str]]] = []
337+
empty: list[DatabricksRelationInfo] = []
330338
results = handle_missing_objects(
331339
lambda: self.get_relations_without_caching(schema_relation), empty
332340
)
333341

334342
relations = []
335343
for row in results:
336-
name, kind, file_format, owner = row
344+
name, kind, file_format, owner, table_type = row
337345
metadata = None
338346
if file_format:
339347
metadata = {KEY_TABLE_OWNER: owner, KEY_TABLE_PROVIDER: file_format}
348+
349+
if table_type:
350+
databricks_table_type = DatabricksRelation.get_databricks_table_type(table_type)
351+
else:
352+
databricks_table_type = None
353+
340354
relations.append(
341355
DatabricksRelation.create(
342356
database=schema_relation.database,
343357
schema=schema_relation.schema,
344358
identifier=name,
345359
type=DatabricksRelation.get_relation_type(kind),
360+
databricks_table_type=databricks_table_type,
346361
metadata=metadata,
347362
is_delta=file_format == "delta",
348363
)
@@ -352,24 +367,26 @@ def list_relations_without_caching( # type: ignore[override]
352367

353368
def get_relations_without_caching(
354369
self, relation: DatabricksRelation
355-
) -> list[tuple[Optional[str], Optional[str], Optional[str], Optional[str]]]:
370+
) -> list[DatabricksRelationInfo]:
356371
if relation.is_hive_metastore():
357372
return self._get_hive_relations(relation)
358373
return self._get_uc_relations(relation)
359374

360-
def _get_uc_relations(
361-
self, relation: DatabricksRelation
362-
) -> list[tuple[Optional[str], Optional[str], Optional[str], Optional[str]]]:
375+
def _get_uc_relations(self, relation: DatabricksRelation) -> list[DatabricksRelationInfo]:
363376
kwargs = {"relation": relation}
364377
results = self.execute_macro("get_uc_tables", kwargs=kwargs)
365378
return [
366-
(row["table_name"], row["table_type"], row["file_format"], row["table_owner"])
379+
DatabricksRelationInfo(
380+
row["table_name"],
381+
row["table_type"],
382+
row["file_format"],
383+
row["table_owner"],
384+
row["databricks_table_type"],
385+
)
367386
for row in results
368387
]
369388

370-
def _get_hive_relations(
371-
self, relation: DatabricksRelation
372-
) -> list[tuple[Optional[str], Optional[str], Optional[str], Optional[str]]]:
389+
def _get_hive_relations(self, relation: DatabricksRelation) -> list[DatabricksRelationInfo]:
373390
kwargs = {"relation": relation}
374391

375392
new_rows: list[tuple[str, Optional[str]]]
@@ -383,8 +400,8 @@ def _get_hive_relations(
383400
for row in tables:
384401
# list_tables returns TABLE_TYPE as view for both materialized views and for
385402
# streaming tables. Set type to "" in this case and it will be resolved below.
386-
type = row["TABLE_TYPE"].lower() if row["TABLE_TYPE"] else None
387-
row = (row["TABLE_NAME"], type)
403+
rel_type = row["TABLE_TYPE"].lower() if row["TABLE_TYPE"] else None
404+
row = (row["TABLE_NAME"], rel_type)
388405
new_rows.append(row)
389406

390407
else:
@@ -401,7 +418,16 @@ def _get_hive_relations(
401418
for row in new_rows
402419
]
403420

404-
return [(row[0], row[1], None, None) for row in new_rows]
421+
return [
422+
DatabricksRelationInfo(
423+
row[0],
424+
row[1], # type: ignore[arg-type]
425+
None,
426+
None,
427+
None,
428+
)
429+
for row in new_rows
430+
]
405431

406432
@available.parse(lambda *a, **k: [])
407433
def get_column_schema_from_query(self, sql: str) -> list[DatabricksColumn]:
@@ -495,6 +521,7 @@ def _get_updated_relation(
495521
schema=relation.schema,
496522
identifier=relation.identifier,
497523
type=relation.type, # type: ignore
524+
databricks_table_type=relation.databricks_table_type,
498525
metadata=metadata,
499526
is_delta=metadata.get(KEY_TABLE_PROVIDER) == "delta",
500527
),

dbt/adapters/databricks/relation.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,15 @@ class DatabricksRelationType(StrEnum):
4040
MaterializedView = "materialized_view"
4141
Foreign = "foreign"
4242
StreamingTable = "streaming_table"
43+
MetricView = "metric_view"
44+
Unknown = "unknown"
45+
46+
47+
class DatabricksTableType(StrEnum):
4348
External = "external"
49+
Managed = "managed"
4450
ManagedShallowClone = "managed_shallow_clone"
4551
ExternalShallowClone = "external_shallow_clone"
46-
MetricView = "metric_view"
47-
Unknown = "unknown"
4852

4953

5054
@dataclass(frozen=True, eq=False, repr=False)
@@ -69,6 +73,7 @@ class DatabricksRelation(BaseRelation):
6973
metadata: Optional[dict[str, Any]] = None
7074
renameable_relations = (DatabricksRelationType.Table, DatabricksRelationType.View)
7175
replaceable_relations = (DatabricksRelationType.Table, DatabricksRelationType.View)
76+
databricks_table_type: Optional[DatabricksTableType] = None
7277

7378
@classmethod
7479
def __pre_deserialize__(cls, data: dict[Any, Any]) -> dict[Any, Any]:
@@ -95,7 +100,7 @@ def is_streaming_table(self) -> bool:
95100

96101
@property
97102
def is_external_table(self) -> bool:
98-
return self.type == DatabricksRelationType.External
103+
return self.databricks_table_type == DatabricksTableType.External
99104

100105
@property
101106
def is_dlt(self) -> bool:
@@ -158,6 +163,10 @@ def matches(
158163
def get_relation_type(cls) -> Type[DatabricksRelationType]: # noqa
159164
return DatabricksRelationType
160165

166+
@classproperty
167+
def get_databricks_table_type(cls) -> Type[DatabricksTableType]: # noqa
168+
return DatabricksTableType
169+
161170
def information_schema(self, view_name: Optional[str] = None) -> InformationSchema:
162171
# some of our data comes from jinja, where things can be `Undefined`.
163172
if not isinstance(view_name, str):

dbt/include/databricks/macros/adapters/metadata.sql

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,21 @@ SELECT
9696
table_name,
9797
if(table_type IN ('EXTERNAL', 'MANAGED', 'MANAGED_SHALLOW_CLONE', 'EXTERNAL_SHALLOW_CLONE'), 'table', lower(table_type)) AS table_type,
9898
lower(data_source_format) AS file_format,
99-
table_owner
99+
table_owner,
100+
if(
101+
table_type IN (
102+
'EXTERNAL',
103+
'MANAGED',
104+
'MANAGED_SHALLOW_CLONE',
105+
'EXTERNAL_SHALLOW_CLONE'
106+
),
107+
lower(table_type),
108+
NULL
109+
) AS databricks_table_type
100110
FROM `system`.`information_schema`.`tables`
101111
WHERE table_catalog = '{{ relation.database|lower }}'
102112
AND table_schema = '{{ relation.schema|lower }}'
103113
{%- if relation.identifier %}
104114
AND table_name = '{{ relation.identifier|lower }}'
105115
{% endif %}
106-
{% endmacro %}
116+
{% endmacro %}

dbt/include/databricks/macros/materializations/clone/clone.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
{% endif %}
3939

4040
-- as a general rule, data platforms that can clone tables can also do atomic 'create or replace'
41-
{% if other_existing_relation.is_external %}
41+
{% if other_existing_relation.is_external_table %}
4242
{% call statement('main') %}
4343
{{ create_or_replace_clone_external(target_relation, defer_relation) }}
4444
{% endcall %}

dbt/include/databricks/macros/materializations/clone/strategies.sql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
shallow clone {{ defer_relation.render() }}
55
{% endmacro %}
66

7-
{% macro databricks__create_or_replace_clone_external(this_relation, defer_relation) %}
7+
{% macro create_or_replace_clone_external(this_relation, defer_relation) %}
8+
89
{%- set catalog_relation = adapter.build_catalog_relation(config.model) -%}
910

1011
create or replace
1112
table {{ this_relation.render() }}
1213
shallow clone {{ defer_relation.render() }}
1314
{{ location_clause(catalog_relation) }}
15+
1416
{% endmacro %}

tests/unit/macros/adapters/test_metadata_macros.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ def test_get_uc_tables_sql_with_identifier(self, template_bundle, relation):
137137
table_name,
138138
if(table_type IN ('EXTERNAL', 'MANAGED', 'MANAGED_SHALLOW_CLONE', 'EXTERNAL_SHALLOW_CLONE'), 'table', lower(table_type)) AS table_type,
139139
lower(data_source_format) AS file_format,
140-
table_owner
140+
table_owner,
141+
if(table_type IN ('EXTERNAL', 'MANAGED', 'MANAGED_SHALLOW_CLONE', 'EXTERNAL_SHALLOW_CLONE'), lower(table_type), null) AS databricks_table_type
141142
FROM `system`.`information_schema`.`tables`
142143
WHERE table_catalog = 'some_database'
143144
AND table_schema = 'some_schema'
@@ -155,9 +156,10 @@ def test_get_uc_tables_sql_without_identifier(self, template_bundle, mock_schema
155156
table_name,
156157
if(table_type IN ('EXTERNAL', 'MANAGED', 'MANAGED_SHALLOW_CLONE', 'EXTERNAL_SHALLOW_CLONE'), 'table', lower(table_type)) AS table_type,
157158
lower(data_source_format) AS file_format,
158-
table_owner
159+
table_owner,
160+
if(table_type IN ('EXTERNAL', 'MANAGED', 'MANAGED_SHALLOW_CLONE', 'EXTERNAL_SHALLOW_CLONE'), lower(table_type), null) AS databricks_table_type
159161
FROM `system`.`information_schema`.`tables`
160-
WHERE table_catalog = 'test_db'
162+
WHERE table_catalog = 'test_db'
161163
AND table_schema = 'test_schema'
162164
""" # noqa
163165
self.assert_sql_equal(result, expected_sql)
@@ -176,9 +178,10 @@ def test_case_sensitivity(self, template_bundle):
176178
table_name,
177179
if(table_type IN ('EXTERNAL', 'MANAGED', 'MANAGED_SHALLOW_CLONE', 'EXTERNAL_SHALLOW_CLONE'), 'table', lower(table_type)) AS table_type,
178180
lower(data_source_format) AS file_format,
179-
table_owner
181+
table_owner,
182+
if(table_type IN ('EXTERNAL', 'MANAGED', 'MANAGED_SHALLOW_CLONE', 'EXTERNAL_SHALLOW_CLONE'), lower(table_type), null) AS databricks_table_type
180183
FROM `system`.`information_schema`.`tables`
181-
WHERE table_catalog = 'test_db'
184+
WHERE table_catalog = 'test_db'
182185
AND table_schema = 'test_schema'
183186
AND table_name = 'test_table'
184187
""" # noqa

tests/unit/macros/materializations/test_clone_macros.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def test_create_or_replace_clone(self, template_bundle, s_relation):
7373
def test_create_or_replace_clone_external(self, template_bundle, catalog_relation, s_relation):
7474
sql = self.render_clone_macro(
7575
template_bundle,
76-
"databricks__create_or_replace_clone_external",
76+
"create_or_replace_clone_external",
7777
s_relation,
7878
template_bundle.relation,
7979
)

tests/unit/test_adapter.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,15 @@
1313
from dbt.adapters.databricks.credentials import (
1414
CATALOG_KEY_IN_SESSION_PROPERTIES,
1515
)
16-
from dbt.adapters.databricks.impl import get_identifier_list_string
17-
from dbt.adapters.databricks.relation import DatabricksRelation, DatabricksRelationType
16+
from dbt.adapters.databricks.impl import (
17+
DatabricksRelationInfo,
18+
get_identifier_list_string,
19+
)
20+
from dbt.adapters.databricks.relation import (
21+
DatabricksRelation,
22+
DatabricksRelationType,
23+
DatabricksTableType,
24+
)
1825
from dbt.adapters.databricks.utils import check_not_found_error
1926
from dbt.config import RuntimeConfig
2027
from tests.unit.utils import config_from_parts_or_dicts
@@ -356,7 +363,9 @@ def test_list_relations_without_caching__no_relations(self, _):
356363
@patch("dbt.adapters.databricks.api_client.DatabricksApiClient.create")
357364
def test_list_relations_without_caching__some_relations(self, _):
358365
with patch.object(DatabricksAdapter, "get_relations_without_caching") as mocked:
359-
mocked.return_value = [("name", "table", "hudi", "owner")]
366+
mocked.return_value = [
367+
DatabricksRelationInfo("name", "table", "hudi", "owner", "external")
368+
]
360369
adapter = DatabricksAdapter(Mock(flags={}), get_context("spawn"))
361370
relations = adapter.list_relations("database", "schema")
362371
assert len(relations) == 1
@@ -365,13 +374,15 @@ def test_list_relations_without_caching__some_relations(self, _):
365374
assert relation.database == "database"
366375
assert relation.schema == "schema"
367376
assert relation.type == DatabricksRelationType.Table
377+
assert relation.databricks_table_type == DatabricksTableType.External
368378
assert relation.owner == "owner"
379+
assert relation.is_external_table
369380
assert relation.is_hudi
370381

371382
@patch("dbt.adapters.databricks.api_client.DatabricksApiClient.create")
372383
def test_list_relations_without_caching__hive_relation(self, _):
373384
with patch.object(DatabricksAdapter, "get_relations_without_caching") as mocked:
374-
mocked.return_value = [("name", "table", None, None)]
385+
mocked.return_value = [DatabricksRelationInfo("name", "table", None, None, None)]
375386
adapter = DatabricksAdapter(Mock(flags={}), get_context("spawn"))
376387
relations = adapter.list_relations("database", "schema")
377388
assert len(relations) == 1

tests/unit/test_relation.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,12 @@ class TestRelationsFunctions:
184184
def test_is_hive_metastore(self, database, expected):
185185
assert relation.is_hive_metastore(database) is expected
186186

187+
def test_is_external_table(self):
188+
relation = DatabricksRelation.create(
189+
identifier="external_table", databricks_table_type="external"
190+
)
191+
assert relation.is_external_table is True
192+
187193
@pytest.mark.parametrize(
188194
"input, expected",
189195
[

0 commit comments

Comments
 (0)