fix: Use ParquetDataset for Schema Inference (feast-dev#2686)

dvanbrug · web-flow · commit 4f85e3e6b3bb · 2022-05-13T14:10:48.000-07:00
Updates to use ParquetDataset instead of ParquetFile to do schema inference.  This supports both single files and directories of partitioned parquet datasets.

Signed-off-by: Dirk Van Bruggen &lt;dirk@punchcyber.com&gt;
diff --git a/sdk/python/feast/infra/offline_stores/file_source.py b/sdk/python/feast/infra/offline_stores/file_source.py
@@ -3,7 +3,7 @@
 
 from pyarrow._fs import FileSystem
 from pyarrow._s3fs import S3FileSystem
-from pyarrow.parquet import ParquetFile
+from pyarrow.parquet import ParquetDataset
 
 from feast import type_map
 from feast.data_format import FileFormat, ParquetFormat
@@ -179,9 +179,9 @@ def get_table_column_names_and_types(
         filesystem, path = FileSource.create_filesystem_and_path(
             self.path, self.file_options.s3_endpoint_override
         )
-        schema = ParquetFile(
+        schema = ParquetDataset(
             path if filesystem is None else filesystem.open_input_file(path)
-        ).schema_arrow
+        ).schema.to_arrow_schema()
         return zip(schema.names, map(str, schema.types))
 
     @staticmethod