delta-io · ion-elgreco · May 24, 2025 · May 24, 2025 · May 24, 2025
@@ -5,7 +5,7 @@ MAX_RETRIES=$2
 RETRY_DELAY=$3
 ATTEMPT=1
 run_command() {
-  uv run --no-sync pytest -m "($TEST_NAME and integration)" --doctest-modules 2>&1
+  uv run --no-sync pytest -m "($TEST_NAME and integration and pyarrow)" --doctest-modules 2>&1
 }
 until [ $ATTEMPT -gt $MAX_RETRIES ]
 do

@@ -39,7 +39,7 @@ jobs:
 
       - name: Run benchmark
         run: |
-          uv run pytest tests/test_benchmark.py -m benchmark --benchmark-json output.json
+          uv run pytest tests/test_benchmark.py -m '(benchmark and pyarrow)' --benchmark-json output.json
 
       - name: Store benchmark result
         uses: benchmark-action/github-action-benchmark@v1
@@ -48,4 +48,3 @@ jobs:
           output-file-path: python/output.json
           external-data-json-path: ./cache/benchmark-data.json
           fail-on-alert: true
-
@@ -33,7 +33,7 @@ jobs:
 
       - name: Check Python
         run: |
-          uv sync --only-group dev --no-install-project
+          uv sync --no-install-project
           make check-python
 
       - name: Check Rust
@@ -70,13 +70,18 @@ jobs:
         run: make develop
 
       - name: Run tests
-        run: uv run --no-sync pytest -m '((s3 or azure) and integration) or not integration and not benchmark' --doctest-modules
+        run: uv run --no-sync pytest -m '((s3 or azure) and integration) or not integration and not benchmark and pyarrow' --doctest-modules
 
       - name: Test without pandas
         run: |
           uv pip uninstall pandas
-          uv run --no-sync pytest -m "not pandas and not integration and not benchmark"
-          uv pip install pandas
+          uv run --no-sync pytest -m "pyarrow and not pandas and not integration and not benchmark"
+
+      - name: Test without pyarrow and without pandas
+        run: |
+          uv pip uninstall pyarrow
+          uv run --no-sync pytest -m "not pyarrow and not pandas and not integration and not benchmark and no_pyarrow"
+
 
   test-lakefs:
     name: Python Build (Python 3.10 LakeFS Integration tests)
@@ -189,4 +194,19 @@ jobs:
 
       - name: Run deltalake
         run: |
-          uv run python -c 'import deltalake'
+          uv run --no-sync python -c 'import deltalake'
+
+      - name: Run deltalake without pyarrow
+        run: |
+          uv pip uninstall pyarrow
+          uv run --no-sync python -c 'import deltalake'
+
+      - name: Run deltalake without pyarrow pandas
+        run: |
+          uv pip uninstall pyarrow pandas
+          uv run --no-sync python -c 'import deltalake'
+
+      - name: Run deltalake without pandas
+        run: |
+          uv pip install pyarrow
+          uv run --no-sync python -c 'import deltalake'
diff --git a/docs/usage/examining-table.md b/docs/usage/examining-table.md
@@ -57,7 +57,7 @@
     let schema = table.get_schema()?;
     println!("schema: {:?}", schema);
     ```
-These schemas have a JSON representation that can be retrieved. 
+These schemas have a JSON representation that can be retrieved.
 
 === "Python"
     To reconstruct from json, use [DeltaTable.schema.to_json()][deltalake.schema.Schema.to_json].
@@ -73,10 +73,10 @@
 It is also possible to retrieve the Arrow schema:
 === "Python"
 
-    Use [DeltaTable.schema.to_pyarrow()][deltalake.schema.Schema.to_pyarrow] to retrieve the PyArrow schema:
+    Use [DeltaTable.schema.to_arrow()][deltalake.schema.Schema.to_arrow] to retrieve the Arro3 schema:
 
     ``` python
-    >>> dt.schema().to_pyarrow()
+    >>> dt.schema().to_arrow()
     id: int64
     ```
 === "Rust"
@@ -135,8 +135,8 @@
    >>> dt = DeltaTable("../rust/tests/data/delta-0.8.0")
    >>> dt.get_add_actions(flatten=True).to_pandas()
                                                        path  size_bytes   modification_time  data_change  num_records  null_count.value  min.value  max.value
    0  part-00000-c9b90f86-73e6-46c8-93ba-ff6bfaf892a...         440 2021-03-06 15:16:07         True            2                 0          0          2
    1  part-00000-04ec9591-0b73-459e-8d18-ba5711d6cbe...         440 2021-03-06 15:16:16         True            2                 0          2          4
    ```

 === "Rust"
@@ -152,7 +152,7 @@
    >>> dt = DeltaTable("../rust/tests/data/delta-0.8.0", version=0)
    >>> dt.get_add_actions(flatten=True).to_pandas()
                                                    path  size_bytes   modification_time  data_change  num_records  null_count.value  min.value  max.value
    0  part-00000-c9b90f86-73e6-46c8-93ba-ff6bfaf892a...         440 2021-03-06 15:16:07         True            2                 0          0          2
    1  part-00001-911a94a2-43f6-4acb-8620-5e68c265498...         445 2021-03-06 15:16:07         True            3                 0          2          4
    ```
 === "Rust"
@@ -161,4 +161,4 @@
     table.load_version(0).await?;
     let actions = table.snapshot()?.add_actions_table(true)?;
     println!("{}", pretty_format_batches(&vec![actions])?);
-    ```
+    ```
diff --git a/docs/usage/querying-delta-tables.md b/docs/usage/querying-delta-tables.md
@@ -18,7 +18,7 @@ To load into Pandas or a PyArrow table use the `DeltaTable.to_pandas` and `Delta
 ``` python
 >>> from deltalake import DeltaTable
 >>> dt = DeltaTable("../rust/tests/data/delta-0.8.0-partitioned")
->>> dt.schema().to_pyarrow()
+>>> dt.schema().to_arrow()
 value: string
 year: string
 month: string
@@ -117,7 +117,7 @@ Dask Name: read-parquet, 6 tasks
 1     7  2021    12  20
 ```
 
-When working with the Rust API, Apache Datafusion can be used to query data from a delta table. 
+When working with the Rust API, Apache Datafusion can be used to query data from a delta table.
 
 ```rust
 let table = deltalake::open_table("../rust/tests/data/delta-0.8.0-partitioned").await?;
@@ -134,4 +134,4 @@ let ctx = SessionContext::new();
 let dataframe = ctx.read_table( Arc::new(table.clone()))?;
 let df = dataframe.filter(col("year").eq(lit(2021)))?.select(vec![col("value")])?;
 df.show().await?;
-```
+```
@@ -17,6 +17,8 @@ doc = false
 [dependencies]
 delta_kernel.workspace = true
 
+pyo3-arrow = { version = "0.9.0", default-features = false}
+
 # arrow
 arrow-schema = { workspace = true, features = ["serde"] }
 

@@ -67,7 +67,7 @@ test-cov: ## Create coverage report
 
 .PHONY: test-pyspark
 test-pyspark:
-	uv run --no-sync pytest -m 'pyspark and integration'
+	uv run --no-sync pytest -m 'pyarrow and pyspark and integration'
 
 .PHONY: build-documentation
 build-documentation: ## Build documentation with Sphinx