107
107
import bigframes .core .indexes
108
108
import bigframes .dataframe as dataframe
109
109
import bigframes .series
110
+ import bigframes .streaming .dataframe as streaming_dataframe
110
111
111
112
_BIGFRAMES_DEFAULT_CONNECTION_ID = "bigframes-default-connection"
112
113
@@ -749,6 +750,38 @@ def read_gbq_table(
749
750
filters = filters ,
750
751
)
751
752
753
+ def read_gbq_table_streaming (
754
+ self , table : str
755
+ ) -> streaming_dataframe .StreamingDataFrame :
756
+ """Turn a BigQuery table into a StreamingDataFrame.
757
+
758
+ Note: The bigframes.streaming module is a preview feature, and subject to change.
759
+
760
+ **Examples:**
761
+
762
+ >>> import bigframes.streaming as bst
763
+ >>> import bigframes.pandas as bpd
764
+ >>> bpd.options.display.progress_bar = None
765
+
766
+ >>> sdf = bst.read_gbq_table("bigquery-public-data.ml_datasets.penguins")
767
+ """
768
+ warnings .warn (
769
+ "The bigframes.streaming module is a preview feature, and subject to change." ,
770
+ stacklevel = 1 ,
771
+ category = bigframes .exceptions .PreviewWarning ,
772
+ )
773
+
774
+ import bigframes .streaming .dataframe as streaming_dataframe
775
+
776
+ df = self ._read_gbq_table (
777
+ table ,
778
+ api_name = "read_gbq_table_steaming" ,
779
+ enable_snapshot = False ,
780
+ index_col = bigframes .enums .DefaultIndexKind .NULL ,
781
+ )
782
+
783
+ return streaming_dataframe .StreamingDataFrame ._from_table_df (df )
784
+
752
785
def _read_gbq_table (
753
786
self ,
754
787
query : str ,
@@ -759,6 +792,7 @@ def _read_gbq_table(
759
792
api_name : str ,
760
793
use_cache : bool = True ,
761
794
filters : third_party_pandas_gbq .FiltersType = (),
795
+ enable_snapshot : bool = True ,
762
796
) -> dataframe .DataFrame :
763
797
import bigframes .dataframe as dataframe
764
798
@@ -877,7 +911,7 @@ def _read_gbq_table(
877
911
else (* columns , * [col for col in index_cols if col not in columns ])
878
912
)
879
913
880
- supports_snapshot = bf_read_gbq_table .validate_table (
914
+ enable_snapshot = enable_snapshot and bf_read_gbq_table .validate_table (
881
915
self .bqclient , table_ref , all_columns , time_travel_timestamp , filter_str
882
916
)
883
917
@@ -905,7 +939,7 @@ def _read_gbq_table(
905
939
table ,
906
940
schema = schema ,
907
941
predicate = filter_str ,
908
- at_time = time_travel_timestamp if supports_snapshot else None ,
942
+ at_time = time_travel_timestamp if enable_snapshot else None ,
909
943
primary_key = index_cols if is_index_unique else (),
910
944
session = self ,
911
945
)
@@ -2056,17 +2090,20 @@ def _to_sql(
2056
2090
offset_column : typing .Optional [str ] = None ,
2057
2091
col_id_overrides : typing .Mapping [str , str ] = {},
2058
2092
ordered : bool = False ,
2093
+ enable_cache : bool = True ,
2059
2094
) -> str :
2060
2095
if offset_column :
2061
2096
array_value = array_value .promote_offsets (offset_column )
2062
- node_w_cached = self ._with_cached_executions (array_value .node )
2097
+ node = (
2098
+ self ._with_cached_executions (array_value .node )
2099
+ if enable_cache
2100
+ else array_value .node
2101
+ )
2063
2102
if ordered :
2064
2103
return self ._compiler .compile_ordered (
2065
- node_w_cached , col_id_overrides = col_id_overrides
2104
+ node , col_id_overrides = col_id_overrides
2066
2105
)
2067
- return self ._compiler .compile_unordered (
2068
- node_w_cached , col_id_overrides = col_id_overrides
2069
- )
2106
+ return self ._compiler .compile_unordered (node , col_id_overrides = col_id_overrides )
2070
2107
2071
2108
def _get_table_size (self , destination_table ):
2072
2109
table = self .bqclient .get_table (destination_table )
0 commit comments