googleapis · TrevorBergeron · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024
@@ -224,7 +224,7 @@ def json_extract(
 
         >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
         >>> bbq.json_extract(s, json_path="$.class")
-        0    "{\\\"students\\\":[{\\\"id\\\":5},{\\\"id\\\":12}]}"
+        0    {"students":[{"id":5},{"id":12}]}
         dtype: string
 
     Args:

@@ -152,12 +152,7 @@ def _get_ibis_column(self, key: str) -> ibis_types.Value:
             raise ValueError(
                 "Column name {} not in set of values: {}".format(key, self.column_ids)
             )
-        return typing.cast(
-            ibis_types.Value,
-            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
-                self._column_names[key]
-            ),
-        )
+        return typing.cast(ibis_types.Value, self._column_names[key])
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
         ibis_type = typing.cast(
@@ -327,12 +322,7 @@ def _to_ibis_expr(
         if not columns:
             return ibis.memtable([])
 
-        # Make sure all dtypes are the "canonical" ones for BigFrames. This is
-        # important for operations like UNION where the schema must match.
-        table = self._table.select(
-            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(column)
-            for column in columns
-        )
+        table = self._table.select(columns)
         base_table = table
         if self._reduced_predicate is not None:
             table = table.filter(base_table[PREDICATE_COLUMN])
@@ -1039,14 +1029,7 @@ def _to_ibis_expr(
         # Make sure we don't have any unbound (deferred) columns.
         table = self._table.select(columns)
 
-        # Make sure all dtypes are the "canonical" ones for BigFrames. This is
-        # important for operations like UNION where the schema must match.
-        table = table.select(
-            bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
-                table[column]
-            )
-            for column in table.columns
-        )
+        table = table.select(table[column] for column in table.columns)
         base_table = table
         if self._reduced_predicate is not None:
             table = table.filter(base_table[PREDICATE_COLUMN])

@@ -208,6 +208,7 @@ def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
     name = value.get_name()
     if ibis_type.is_json():
         value = vendored_ibis_ops.ToJsonString(value).to_expr()
+        value = value.case().when("null", ibis.null()).else_(value).end()
         return value.name(name)
     # Allow REQUIRED fields to be joined with NULLABLE fields.
     nullable_type = ibis_type.copy(nullable=True)

@@ -23,11 +23,13 @@
 
 
 def _get_series_from_json(json_data):
+    # Note: converts None to sql "null" and not to json none.
+    values = [
+        f"JSON '{json.dumps(data)}'" if data is not None else "NULL"
+        for data in json_data
+    ]
     sql = " UNION ALL ".join(
-        [
-            f"SELECT {id} AS id, JSON '{json.dumps(data)}' AS data"
-            for id, data in enumerate(json_data)
-        ]
+        [f"SELECT {id} AS id, {value} AS data" for id, value in enumerate(values)]
     )
     df = bpd.read_gbq(sql).set_index("id").sort_index()
     return df["data"]
@@ -114,19 +116,19 @@ def test_json_set_w_invalid_series_type():
 
 def test_json_extract_from_json():
     s = _get_series_from_json([{"a": {"b": [1, 2]}}, {"a": {"c": 1}}, {"a": {"b": 0}}])
-    actual = bbq.json_extract(s, "$.a.b")
+    actual = bbq.json_extract(s, "$.a.b").to_pandas()
     # After the introduction of the JSON type, the output should be a JSON-formatted series.
-    expected = _get_series_from_json(["[1,2]", None, "0"])
+    expected = _get_series_from_json([[1, 2], None, 0]).to_pandas()
     pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
+        actual,
+        expected,
     )
 
 
 def test_json_extract_from_string():
     s = bpd.Series(['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'])
     actual = bbq.json_extract(s, "$.a.b")
-    expected = _get_series_from_json(["[1,2]", None, "0"])
+    expected = _get_series_from_json([[1, 2], None, 0])
     pd.testing.assert_series_equal(
         actual.to_pandas(),
         expected.to_pandas(),