Skip to content

Commit 7972992

Browse files
tedhtchangwoop
andauthored
Make sure FeatureViews with same name can not be applied at the same … (feast-dev#1651)
* Make sure FeatureViews with same name can not be applied at the same time Signed-off-by: ted chang <[email protected]> * Add tests for applying feature views with duplicated names Signed-off-by: ted chang <[email protected]> * Resolve refactoring conflict Signed-off-by: ted chang <[email protected]> * Update sdk/python/feast/feature_store.py Co-authored-by: Willem Pienaar <[email protected]> Signed-off-by: ted chang <[email protected]> * Add another test case Signed-off-by: ted chang <[email protected]> Co-authored-by: Willem Pienaar <[email protected]>
1 parent 6713384 commit 7972992

File tree

5 files changed

+153
-2
lines changed

5 files changed

+153
-2
lines changed

sdk/python/feast/feature_store.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ def apply(
285285
assert isinstance(objects, list)
286286

287287
views_to_update = [ob for ob in objects if isinstance(ob, FeatureView)]
288+
_validate_feature_views(views_to_update)
288289
entities_to_update = [ob for ob in objects if isinstance(ob, Entity)]
289290
services_to_update = [ob for ob in objects if isinstance(ob, FeatureService)]
290291

@@ -829,3 +830,15 @@ def _print_materialization_log(
829830
f" to {Style.BRIGHT + Fore.GREEN}{end_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}"
830831
f" into the {Style.BRIGHT + Fore.GREEN}{online_store}{Style.RESET_ALL} online store.\n"
831832
)
833+
834+
835+
def _validate_feature_views(feature_views: List[FeatureView]):
836+
""" Verify feature views have unique names"""
837+
name_to_fv_dict = {}
838+
for fv in feature_views:
839+
if fv.name in name_to_fv_dict:
840+
raise ValueError(
841+
f"More than one feature view with name {fv.name} found. Please ensure that all feature view names are unique. It may be necessary to ignore certain files in your feature repository by using a .feastignore file."
842+
)
843+
else:
844+
name_to_fv_dict[fv.name] = fv

sdk/python/feast/repo_operations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from feast import Entity, FeatureTable
1515
from feast.feature_service import FeatureService
16-
from feast.feature_store import FeatureStore
16+
from feast.feature_store import FeatureStore, _validate_feature_views
1717
from feast.feature_view import FeatureView
1818
from feast.inference import (
1919
update_data_sources_with_inferred_event_timestamp_col,
@@ -103,7 +103,6 @@ def parse_repo(repo_root: Path) -> ParsedRepo:
103103
for repo_file in get_repo_files(repo_root):
104104
module_path = py_path_to_module(repo_file, repo_root)
105105
module = importlib.import_module(module_path)
106-
107106
for attr_name in dir(module):
108107
obj = getattr(module, attr_name)
109108
if isinstance(obj, FeatureTable):
@@ -162,6 +161,7 @@ def apply_total(repo_config: RepoConfig, repo_path: Path, skip_source_validation
162161
registry._initialize_registry()
163162
sys.dont_write_bytecode = True
164163
repo = parse_repo(repo_path)
164+
_validate_feature_views(repo.feature_views)
165165
data_sources = [t.batch_source for t in repo.feature_views]
166166

167167
if not skip_source_validation:
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from google.protobuf.duration_pb2 import Duration
2+
3+
from feast import FeatureView, FileSource
4+
5+
driver_hourly_stats = FileSource(
6+
path="driver_stats.parquet", # this parquet is not real and will not be read
7+
)
8+
9+
driver_hourly_stats_view = FeatureView(
10+
name="driver_hourly_stats", # Intentionally use the same FeatureView name
11+
entities=["driver_id"],
12+
online=False,
13+
input=driver_hourly_stats,
14+
ttl=Duration(seconds=10),
15+
tags={},
16+
)
17+
18+
driver_hourly_stats_view_dup1 = FeatureView(
19+
name="driver_hourly_stats", # Intentionally use the same FeatureView name
20+
entities=["driver_id"],
21+
online=False,
22+
input=driver_hourly_stats,
23+
ttl=Duration(seconds=10),
24+
tags={},
25+
)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import tempfile
2+
from pathlib import Path
3+
from textwrap import dedent
4+
5+
from tests.utils.cli_utils import CliRunner, get_example_repo
6+
7+
8+
def test_cli_apply_duplicated_featureview_names() -> None:
9+
"""
10+
Test apply feature views with duplicated names and single py file in a feature repo using CLI
11+
"""
12+
13+
with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
14+
runner = CliRunner()
15+
# Construct an example repo in a temporary dir
16+
repo_path = Path(repo_dir_name)
17+
data_path = Path(data_dir_name)
18+
19+
repo_config = repo_path / "feature_store.yaml"
20+
21+
repo_config.write_text(
22+
dedent(
23+
f"""
24+
project: foo
25+
registry: {data_path / "registry.db"}
26+
provider: local
27+
online_store:
28+
path: {data_path / "online_store.db"}
29+
"""
30+
)
31+
)
32+
33+
repo_example = repo_path / "example.py"
34+
repo_example.write_text(
35+
get_example_repo(
36+
"example_feature_repo_with_duplicated_featureview_names.py"
37+
)
38+
)
39+
rc, output = runner.run_with_output(["apply"], cwd=repo_path)
40+
41+
assert (
42+
rc != 0
43+
and b"Please ensure that all feature view names are unique" in output
44+
)
45+
46+
47+
def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None:
48+
"""
49+
Test apply feature views with duplicated names from multiple py files in a feature repo using CLI
50+
"""
51+
52+
with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
53+
runner = CliRunner()
54+
# Construct an example repo in a temporary dir
55+
repo_path = Path(repo_dir_name)
56+
data_path = Path(data_dir_name)
57+
58+
repo_config = repo_path / "feature_store.yaml"
59+
60+
repo_config.write_text(
61+
dedent(
62+
f"""
63+
project: foo
64+
registry: {data_path / "registry.db"}
65+
provider: local
66+
online_store:
67+
path: {data_path / "online_store.db"}
68+
"""
69+
)
70+
)
71+
# Create multiple py files containing the same feature view name
72+
for i in range(3):
73+
repo_example = repo_path / f"example{i}.py"
74+
repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
75+
rc, output = runner.run_with_output(["apply"], cwd=repo_path)
76+
77+
assert (
78+
rc != 0
79+
and b"Please ensure that all feature view names are unique" in output
80+
)

sdk/python/tests/integration/registration/test_feature_store.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,3 +480,36 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source):
480480
assert len(fv_stored.materialization_intervals) == 0
481481

482482
test_feature_store.teardown()
483+
484+
485+
def test_apply_duplicated_featureview_names(feature_store_with_local_registry):
486+
""" Test applying feature views with duplicated names"""
487+
488+
driver_stats = FeatureView(
489+
name="driver_hourly_stats",
490+
entities=["driver_id"],
491+
ttl=timedelta(seconds=10),
492+
online=False,
493+
input=FileSource(path="driver_stats.parquet"),
494+
tags={},
495+
)
496+
497+
customer_stats = FeatureView(
498+
name="driver_hourly_stats",
499+
entities=["id"],
500+
ttl=timedelta(seconds=10),
501+
online=False,
502+
input=FileSource(path="customer_stats.parquet"),
503+
tags={},
504+
)
505+
try:
506+
feature_store_with_local_registry.apply([driver_stats, customer_stats])
507+
error = None
508+
except ValueError as e:
509+
error = e
510+
assert (
511+
isinstance(error, ValueError)
512+
and "Please ensure that all feature view names are unique" in error.args[0]
513+
)
514+
515+
feature_store_with_local_registry.teardown()

0 commit comments

Comments
 (0)