delta-io · ion-elgreco · May 28, 2025 · May 28, 2025
@@ -178,11 +178,14 @@ pub fn cast_record_batch(
         ..Default::default()
     };
 
-    let s = StructArray::new(
-        batch.schema().as_ref().to_owned().fields,
-        batch.columns().to_owned(),
-        None,
-    );
+    // Can be simplified with StructArray::try_new_with_length in arrow 55.1
+    let col_arrays = batch.columns().to_owned();
+    let s = if col_arrays.is_empty() {
+        StructArray::new_empty_fields(batch.num_rows(), None)
+    } else {
+        StructArray::new(batch.schema().as_ref().to_owned().fields, col_arrays, None)
+    };
+
     let struct_array = cast_struct(&s, target_schema.fields(), &cast_options, add_missing)?;
 
     Ok(RecordBatch::try_new_with_options(

@@ -27,6 +27,7 @@ use datafusion_proto::bytes::{
 use deltalake_core::delta_datafusion::{DeltaScan, DeltaTableFactory};
 use deltalake_core::kernel::{DataType, MapType, PrimitiveType, StructField, StructType};
 use deltalake_core::operations::create::CreateBuilder;
+use deltalake_core::operations::write::SchemaMode;
 use deltalake_core::protocol::SaveMode;
 use deltalake_core::writer::{DeltaWriter, RecordBatchWriter};
 use deltalake_core::{
@@ -1358,6 +1359,79 @@ async fn simple_query(context: &IntegrationContext) -> TestResult {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_schema_adapter_empty_batch() {
+    let ctx = SessionContext::new();
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let table_uri = tmp_dir.path().to_str().to_owned().unwrap();
+
+    // Create table with a single column
+    let table = DeltaOps::try_from_uri(table_uri)
+        .await
+        .unwrap()
+        .create()
+        .with_column(
+            "a",
+            DataType::Primitive(PrimitiveType::Integer),
+            false,
+            None,
+        )
+        .await
+        .unwrap();
+
+    // Write single column
+    let a_arr = Int32Array::from(vec![1, 2, 3]);
+    let table = DeltaOps(table)
+        .write(vec![RecordBatch::try_from_iter_with_nullable(vec![(
+            "a",
+            Arc::new(a_arr) as ArrayRef,
+            false,
+        )])
+        .unwrap()])
+        .await
+        .unwrap();
+
+    // Evolve schema by writing a batch with new nullable column
+    let a_arr = Int32Array::from(vec![4, 5, 6]);
+    let b_arr = Int32Array::from(vec![7, 8, 9]);
+    let table = DeltaOps(table)
+        .write(vec![RecordBatch::try_from_iter_with_nullable(vec![
+            ("a", Arc::new(a_arr) as ArrayRef, false),
+            ("b", Arc::new(b_arr) as ArrayRef, true),
+        ])
+        .unwrap()])
+        .with_schema_mode(SchemaMode::Merge)
+        .await
+        .unwrap();
+
+    // Ensure we can project only the new column which does not exist in files from first write
+    let batches = ctx
+        .read_table(Arc::new(table))
+        .unwrap()
+        .select_exprs(&["b"])
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    assert_batches_sorted_eq!(
+        #[rustfmt::skip]
+        &[
+            "+---+",
+            "| b |",
+            "+---+",
+            "|   |",
+            "|   |",
+            "|   |",
+            "| 7 |",
+            "| 8 |",
+            "| 9 |",
+            "+---+",
+        ],
+        &batches
+    );
+}
+
 mod date_partitions {
     use super::*;
 

@@ -1,6 +1,6 @@
 .DEFAULT_GOAL := help
 
-PACKAGE_VERSION := $(shell grep version Cargo.toml | head -n 1 | awk '{print $$3}' | tr -d '"' )
+PACKAGE_VERSION := $(shell grep version Cargo.toml | head -n 1 | awk '{print $$3}' | tr -d '"' | tr -d '-' )
 
 .PHONY: setup
 setup: ## Setup the requirements