@@ -47,7 +47,7 @@ def create_dataset() -> pd.DataFrame:
4747def get_feature_view (data_source : Union [FileSource , BigQuerySource ]) -> FeatureView :
4848 return FeatureView (
4949 name = "test_bq_correctness" ,
50- entities = ["driver_id " ],
50+ entities = ["driver " ],
5151 features = [Feature ("value" , ValueType .FLOAT )],
5252 ttl = timedelta (days = 5 ),
5353 input = data_source ,
@@ -83,20 +83,20 @@ def prep_bq_fs_and_fv(
8383 event_timestamp_column = "ts" ,
8484 created_timestamp_column = "created_ts" ,
8585 date_partition_column = "" ,
86- field_mapping = {"ts_1" : "ts" , "id" : "driver_ident " },
86+ field_mapping = {"ts_1" : "ts" , "id" : "driver_id " },
8787 )
8888
8989 fv = get_feature_view (bigquery_source )
9090 e = Entity (
91- name = "driver_id " ,
91+ name = "driver " ,
9292 description = "id for driver" ,
93- join_key = "driver_ident " ,
93+ join_key = "driver_id " ,
9494 value_type = ValueType .INT32 ,
9595 )
9696 with tempfile .TemporaryDirectory () as repo_dir_name :
9797 config = RepoConfig (
9898 registry = str (Path (repo_dir_name ) / "registry.db" ),
99- project = f"test_bq_correctness_{ uuid .uuid4 ()} " ,
99+ project = f"test_bq_correctness_{ str ( uuid .uuid4 ()). replace ( '-' , '' )} " ,
100100 provider = "gcp" ,
101101 )
102102 fs = FeatureStore (config = config )
@@ -121,7 +121,10 @@ def prep_local_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]:
121121 )
122122 fv = get_feature_view (file_source )
123123 e = Entity (
124- name = "driver_id" , description = "id for driver" , value_type = ValueType .INT32
124+ name = "driver" ,
125+ description = "id for driver" ,
126+ join_key = "driver_id" ,
127+ value_type = ValueType .INT32 ,
125128 )
126129 with tempfile .TemporaryDirectory () as repo_dir_name , tempfile .TemporaryDirectory () as data_dir_name :
127130 config = RepoConfig (
@@ -138,7 +141,34 @@ def prep_local_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]:
138141 yield fs , fv
139142
140143
141- def run_materialization_test (fs : FeatureStore , fv : FeatureView ) -> None :
144+ # Checks that both offline & online store values are as expected
145+ def check_offline_and_online_features (
146+ fs : FeatureStore ,
147+ fv : FeatureView ,
148+ driver_id : int ,
149+ event_timestamp : datetime ,
150+ expected_value : float ,
151+ ) -> None :
152+ # Check online store
153+ response_dict = fs .get_online_features (
154+ [f"{ fv .name } :value" ], [{"driver" : driver_id }]
155+ ).to_dict ()
156+ assert abs (response_dict [f"{ fv .name } __value" ][0 ] - expected_value ) < 1e-6
157+
158+ # Check offline store
159+ df = fs .get_historical_features (
160+ entity_df = pd .DataFrame .from_dict (
161+ {"driver_id" : [driver_id ], "event_timestamp" : [event_timestamp ]}
162+ ),
163+ feature_refs = [f"{ fv .name } :value" ],
164+ ).to_df ()
165+
166+ assert abs (df .to_dict ()[f"{ fv .name } __value" ][0 ] - expected_value ) < 1e-6
167+
168+
169+ def run_offline_online_store_consistency_test (
170+ fs : FeatureStore , fv : FeatureView
171+ ) -> None :
142172 now = datetime .utcnow ()
143173 # Run materialize()
144174 # use both tz-naive & tz-aware timestamps to test that they're both correctly handled
@@ -147,38 +177,33 @@ def run_materialization_test(fs: FeatureStore, fv: FeatureView) -> None:
147177 fs .materialize (feature_views = [fv .name ], start_date = start_date , end_date = end_date )
148178
149179 # check result of materialize()
150- response_dict = fs .get_online_features (
151- [f"{ fv .name } :value" ], [{"driver_id" : 1 }]
152- ).to_dict ()
153- assert abs (response_dict [f"{ fv .name } __value" ][0 ] - 0.3 ) < 1e-6
180+ check_offline_and_online_features (
181+ fs = fs , fv = fv , driver_id = 1 , event_timestamp = end_date , expected_value = 0.3
182+ )
154183
155184 # check prior value for materialize_incremental()
156- response_dict = fs .get_online_features (
157- [f"{ fv .name } :value" ], [{"driver_id" : 3 }]
158- ).to_dict ()
159- assert abs (response_dict [f"{ fv .name } __value" ][0 ] - 4 ) < 1e-6
185+ check_offline_and_online_features (
186+ fs = fs , fv = fv , driver_id = 3 , event_timestamp = end_date , expected_value = 4
187+ )
160188
161189 # run materialize_incremental()
162- fs .materialize_incremental (
163- feature_views = [fv .name ], end_date = now - timedelta (seconds = 0 ),
164- )
190+ fs .materialize_incremental (feature_views = [fv .name ], end_date = now )
165191
166192 # check result of materialize_incremental()
167- response_dict = fs .get_online_features (
168- [f"{ fv .name } :value" ], [{"driver_id" : 3 }]
169- ).to_dict ()
170- assert abs (response_dict [f"{ fv .name } __value" ][0 ] - 5 ) < 1e-6
193+ check_offline_and_online_features (
194+ fs = fs , fv = fv , driver_id = 3 , event_timestamp = now , expected_value = 5
195+ )
171196
172197
173198@pytest .mark .integration
174199@pytest .mark .parametrize (
175200 "bq_source_type" , ["query" , "table" ],
176201)
177- def test_bq_materialization (bq_source_type : str ):
202+ def test_bq_offline_online_store_consistency (bq_source_type : str ):
178203 with prep_bq_fs_and_fv (bq_source_type ) as (fs , fv ):
179- run_materialization_test (fs , fv )
204+ run_offline_online_store_consistency_test (fs , fv )
180205
181206
182- def test_local_materialization ():
207+ def test_local_offline_online_store_consistency ():
183208 with prep_local_fs_and_fv () as (fs , fv ):
184- run_materialization_test (fs , fv )
209+ run_offline_online_store_consistency_test (fs , fv )
0 commit comments