Skip to content

(feat): full v2 compat via python fallback #84

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Feb 11, 2025
Merged
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c7fb95a
chore(deps): bump zarr to 3.0.0rc1
LDeakin Jan 3, 2025
0a877e0
fmt
LDeakin Jan 3, 2025
def2e70
(feat): python fallack
ilan-gold Jan 28, 2025
622287d
Merge branch 'main' into ig/python_fallback
ilan-gold Jan 29, 2025
b362759
(fix): dtypes
ilan-gold Jan 30, 2025
fba8226
(fix): `object` dtypes + `v2` tests
ilan-gold Jan 30, 2025
4aa21a3
(fix): `object` dtypes + `v2` tests
ilan-gold Jan 30, 2025
a51e810
(fix): `object` dtypes + `v2` tests
ilan-gold Jan 30, 2025
19e90e3
(fix): `object` dtypes in rust
ilan-gold Feb 2, 2025
4a59ec1
(fix): blosc support
ilan-gold Feb 2, 2025
45efee1
(refactor): handle `None` fill-value more gracefully
ilan-gold Feb 2, 2025
59e60fc
fix: V2 codec pipeline creation
LDeakin Feb 3, 2025
1a6dc77
fix: zfpy/pcodec metadata handling
LDeakin Feb 3, 2025
008fd6a
(fix): fall back for unsupported codecs
ilan-gold Feb 4, 2025
9a0daa9
(fix): our decode codec pipeline does not support vlen
ilan-gold Feb 4, 2025
4637d24
(fix): string dtype test to match zarr-python
ilan-gold Feb 4, 2025
cf2e6b5
(chore): add note
ilan-gold Feb 4, 2025
00e73ed
(fix): ruff
ilan-gold Feb 4, 2025
d8aa2cc
(fix): rustfmt
ilan-gold Feb 4, 2025
8ea80bc
(fix): `pyi`
ilan-gold Feb 4, 2025
db255a9
(fix): try removing zarr main branch dep
ilan-gold Feb 4, 2025
cb4bedc
fix: use upstream implicit fill values
LDeakin Feb 5, 2025
26ee516
fix: use upstream metadata handling
LDeakin Feb 5, 2025
6ff6c2b
fix: cleanup fill value handling for string dtype
LDeakin Feb 7, 2025
abe4dd5
Revert "fix: cleanup fill value handling for string dtype"
LDeakin Feb 7, 2025
a618605
fix: cleanup fill value handling for string dtype
LDeakin Feb 7, 2025
4159751
fix: fmt and clippy warnings
LDeakin Feb 7, 2025
ae194a6
fix: zarr-python 0 fill value handling
LDeakin Feb 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: cleanup fill value handling for string dtype
  • Loading branch information
LDeakin committed Feb 7, 2025
commit a6186050120b2833411a75f71cf968345e0a5ca2
59 changes: 34 additions & 25 deletions src/chunk_item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,34 @@ pub(crate) struct Basic {
representation: ChunkRepresentation,
}

fn fill_value_to_bytes(dtype: &str, fill_value: Bound<'_, PyAny>) -> PyResult<Vec<u8>> {
if dtype == "string" {
// Match zarr-python 2.x.x string fill value behaviour with a 0 fill value
if let Ok(fill_value_downcast) = fill_value.downcast::<PyInt>() {
let fill_value_usize: usize = fill_value_downcast.extract()?;
if fill_value_usize == 0 {
return Ok("0".as_bytes().to_vec());
} else {
return Err(PyErr::new::<PyValueError, _>(format!(
"Cannot understand non-zero integer {:?} fill value for dtype {:?}",
fill_value_usize, dtype
)));
}
}
}

if let Ok(fill_value_downcast) = fill_value.downcast::<PyBytes>() {
Ok(fill_value_downcast.as_bytes().to_vec())
} else if fill_value.hasattr("tobytes")? {
Ok(fill_value.call_method0("tobytes")?.extract()?)
} else {
Err(PyErr::new::<PyValueError, _>(format!(
"Unsupported fill value {:?}",
fill_value
)))
}
}

#[gen_stub_pymethods]
#[pymethods]
impl Basic {
Expand All @@ -44,32 +72,13 @@ impl Basic {
.getattr("dtype")?
.call_method0("__str__")?
.extract()?;
let fill_value: Bound<'_, PyAny> = chunk_spec.getattr("fill_value")?;
let fill_value_bytes: Vec<u8>;
if let Ok(fill_value_downcast) = fill_value.downcast::<PyBytes>() {
fill_value_bytes = fill_value_downcast.as_bytes().to_vec();
} else if fill_value.hasattr("tobytes")? {
fill_value_bytes = fill_value.call_method0("tobytes")?.extract()?;
} else if let Ok(fill_value_downcast) = fill_value.downcast::<PyInt>() {
let fill_value_usize: usize = fill_value_downcast.extract()?;
if fill_value_usize == (0 as usize) && dtype == "object" {
// https://github.com/LDeakin/zarrs/pull/140
fill_value_bytes = "".as_bytes().to_vec();
// zarrs doesn't understand `object` which is the output of `np.dtype("|O").__str__()`
// but maps it to "string" internally https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L288
dtype = String::from("string");
} else {
return Err(PyErr::new::<PyValueError, _>(format!(
"Cannot understand non-zero integer {:?} fill value for dtype {:?}",
fill_value_usize, dtype
)));
}
} else {
return Err(PyErr::new::<PyValueError, _>(format!(
"Unsupported fill value {:?}",
fill_value
)));
if dtype == "object" {
// zarrs doesn't understand `object` which is the output of `np.dtype("|O").__str__()`
// but maps it to "string" internally https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L288
dtype = String::from("string");
}
let fill_value: Bound<'_, PyAny> = chunk_spec.getattr("fill_value")?;
let fill_value_bytes = fill_value_to_bytes(&dtype, fill_value)?;
Ok(Self {
store,
key: StoreKey::new(path).map_py_err::<PyValueError>()?,
Expand Down