Skip to content

Using oindex and vindex instead of ExplicitIndexer objects. #9273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 48 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
3029943
temporary enable CI triggers on feature branch
andersy005 Apr 10, 2024
ddd4cdb
add `.oindex` and `.vindex` to `BackendArray` (#8885)
andersy005 Apr 17, 2024
96ac4b7
Enable explicit use of key tuples (instead of *Indexer objects) in in…
andersy005 May 3, 2024
18c5c70
Trigger CI only if code files are modified. (#9006)
dcherian May 6, 2024
b46c320
Merge branch 'main' into backend-indexing
andersy005 May 10, 2024
7d55345
Merge branch 'main' into backend-indexing
andersy005 May 12, 2024
795daf2
fix bad merge
andersy005 May 12, 2024
f2c4659
Micro optimization -- use tuples throughout backend indexing (#9009)
hmaarrfk May 12, 2024
d763c02
Merge branch 'main' into backend-indexing
andersy005 May 13, 2024
45ceac6
Merge branch 'main' into backend-indexing
andersy005 May 21, 2024
8d64f60
Merge branch 'main' into backend-indexing
andersy005 May 23, 2024
46a902f
Merge branch 'main' into backend-indexing
dcherian Jun 24, 2024
62d474f
Merge branch 'main' into backend-indexing
dcherian Jul 24, 2024
07f663e
Merge branch 'main' into backend-indexing
andersy005 Oct 30, 2024
8b591e0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 30, 2024
8cc0d29
enhance type annotations and improve clarity
andersy005 Oct 30, 2024
5884605
Fix indexing logic to correctly handle array with __array_function__ …
andersy005 Oct 30, 2024
146daff
Merge branch 'main' into backend-indexing
andersy005 Nov 1, 2024
50791e0
update indexing methods to use OuterIndexer type
andersy005 Nov 1, 2024
014e7cf
remove unnecessary copy argument from __array__ method in MemoryCache…
andersy005 Nov 1, 2024
5e22be6
another attempt at fixing types
andersy005 Nov 1, 2024
7056aba
remove backend-indexing branch from CI workflows
andersy005 Nov 1, 2024
43046e8
remove unnecessary type ignore comments
andersy005 Nov 1, 2024
2a7e2f2
fix: update indexing to use tuple from indexer for improved compatibi…
andersy005 Nov 1, 2024
ead4251
more type hints
andersy005 Nov 1, 2024
08dc10a
Merge branch 'main' into backend-indexing
andersy005 Nov 4, 2024
ed74afa
Merge branch 'main' into backend-indexing
andersy005 Nov 4, 2024
76b2d5a
update type hints for `expanded_indexer()` function
andersy005 Nov 5, 2024
938a846
Merge branch 'main' into backend-indexing
andersy005 Nov 5, 2024
389a62f
Merge branch 'main' into backend-indexing
andersy005 Nov 7, 2024
ee81af3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 7, 2024
550ba28
Merge branch 'main' into backend-indexing
andersy005 Nov 8, 2024
0c86622
Use tuples for indexing
dcherian Nov 11, 2024
0f54b64
Remove CompatIndexedTuple
dcherian Nov 19, 2024
ab20531
Merge branch 'main' into backend-indexing
dcherian Nov 19, 2024
b60accd
Some typing work
dcherian Nov 19, 2024
222c5c2
more typing
dcherian Nov 19, 2024
810b822
Fix test
dcherian Nov 19, 2024
a414965
strict=True
dcherian Nov 19, 2024
0b99aea
more typing
dcherian Nov 19, 2024
2ceaeac
fix
dcherian Nov 19, 2024
749da0b
some more fixes
dcherian Nov 19, 2024
f58262a
little more type narrowing
dcherian Nov 19, 2024
dcd3ac9
Refactor backend indexing tests
dcherian Nov 19, 2024
2105aa0
Add legacy backend indexing tests
dcherian Nov 19, 2024
fb24e9c
Avoid raising deprecation warning now.
dcherian Nov 19, 2024
1ffe5e9
Merge branch 'main' into backend-indexing
dcherian Jan 30, 2025
6064046
fix
dcherian Jan 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
if TYPE_CHECKING:
from xarray.core.dataset import Dataset
from xarray.core.types import NestedSequence
from xarray.namedarray._typing import _OuterIndexerKey, _VectorizedIndexerKey

T_Name = Union[Hashable, None]

Expand Down Expand Up @@ -268,11 +269,35 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500


class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed):
__slots__ = ()

def get_duck_array(self, dtype: np.typing.DTypeLike = None):
key = indexing.BasicIndexer((slice(None),) * self.ndim)
return self[key] # type: ignore[index]
return self[key] # type: ignore [index]


class NewBackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think its possible to do this without creating a new BackendArray class for backends.

__slots__ = ("indexing_support",)

def get_duck_array(self, dtype: np.typing.DTypeLike = None):
key = (slice(None),) * self.ndim
return self[key] # type: ignore [index]

def _oindex_get(self, key: _OuterIndexerKey) -> Any:
raise NotImplementedError(
f"{self.__class__.__name__}._oindex_get method should be overridden"
)

def _vindex_get(self, key: _VectorizedIndexerKey) -> Any:
raise NotImplementedError(
f"{self.__class__.__name__}._vindex_get method should be overridden"
)

@property
def oindex(self) -> indexing.IndexCallable:
return indexing.IndexCallable(self._oindex_get)

@property
def vindex(self) -> indexing.IndexCallable:
return indexing.IndexCallable(self._vindex_get)
Comment on lines +294 to +300
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as reminder the goal is to use .oindex and .vindex to be explicit about which indexing to use instead of IndexingAdapter subclasses.

I think this gets us nearly all the way to removing the special case for ExplicitlyIndexed in as_compatible_data



class AbstractDataStore:
Expand Down
23 changes: 20 additions & 3 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,33 @@
from xarray.core.dataset import Dataset
from xarray.core.datatree import DataTree
from xarray.core.types import ReadBuffer
from xarray.namedarray._typing import (
_BasicIndexerKey,
_OuterIndexerKey,
_VectorizedIndexerKey,
)


class H5NetCDFArrayWrapper(BaseNetCDF4Array):
indexing_support = indexing.IndexingSupport.OUTER_1VECTOR

def get_array(self, needs_lock=True):
ds = self.datastore._acquire(needs_lock)
return ds.variables[self.variable_name]

def __getitem__(self, key):
return indexing.explicit_indexing_adapter(
key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem
def _oindex_get(self, key: _OuterIndexerKey) -> Any:
return indexing.outer_indexing_adapter(
Copy link
Contributor Author

@dcherian dcherian Dec 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now we have three different adapters . This kind of dispatching was happening in explicit_indexing_adapter using the IndexingAdapter subclasses. Now it is more explicit

key, self.shape, self.indexing_support, self._getitem
)

def _vindex_get(self, key: _VectorizedIndexerKey) -> Any:
return indexing.vectorized_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)

def __getitem__(self, key: _BasicIndexerKey) -> Any:
return indexing.basic_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)

def _getitem(self, key):
Expand Down
27 changes: 21 additions & 6 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from xarray import coding
from xarray.backends.common import (
BACKEND_ENTRYPOINTS,
BackendArray,
BackendEntrypoint,
NewBackendArray,
WritableCFDataStore,
_normalize_path,
datatree_from_dict_with_io_cleanup,
Expand Down Expand Up @@ -48,6 +48,11 @@
from xarray.core.dataset import Dataset
from xarray.core.datatree import DataTree
from xarray.core.types import ReadBuffer
from xarray.namedarray._typing import (
_BasicIndexerKey,
_OuterIndexerKey,
_VectorizedIndexerKey,
)

# This lookup table maps from dtype.byteorder to a readable endian
# string used by netCDF4.
Expand All @@ -56,7 +61,7 @@
NETCDF4_PYTHON_LOCK = combine_locks([NETCDFC_LOCK, HDF5_LOCK])


class BaseNetCDF4Array(BackendArray):
class BaseNetCDF4Array(NewBackendArray):
__slots__ = ("datastore", "dtype", "shape", "variable_name")

def __init__(self, variable_name, datastore):
Expand Down Expand Up @@ -88,7 +93,7 @@ def get_array(self, needs_lock=True):


class NetCDF4ArrayWrapper(BaseNetCDF4Array):
__slots__ = ()
indexing_support = indexing.IndexingSupport.OUTER

def get_array(self, needs_lock=True):
ds = self.datastore._acquire(needs_lock)
Expand All @@ -99,9 +104,19 @@ def get_array(self, needs_lock=True):
variable.set_auto_chartostring(False)
return variable

def __getitem__(self, key):
return indexing.explicit_indexing_adapter(
key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
def _oindex_get(self, key: _OuterIndexerKey):
return indexing.outer_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)

def _vindex_get(self, key: _VectorizedIndexerKey):
return indexing.vectorized_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)

def __getitem__(self, key: _BasicIndexerKey):
return indexing.basic_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)

def _getitem(self, key):
Expand Down
29 changes: 23 additions & 6 deletions xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from xarray.backends.common import (
BACKEND_ENTRYPOINTS,
AbstractDataStore,
BackendArray,
BackendEntrypoint,
NewBackendArray,
robust_getitem,
)
from xarray.backends.store import StoreBackendEntrypoint
Expand All @@ -29,10 +29,17 @@

from xarray.core.dataset import Dataset
from xarray.core.types import ReadBuffer
from xarray.namedarray._typing import (
_BasicIndexerKey,
_OuterIndexerKey,
_VectorizedIndexerKey,
)


class PydapArrayWrapper(BackendArray):
def __init__(self, array):
class PydapArrayWrapper(NewBackendArray):
indexing_support = indexing.IndexingSupport.BASIC

def __init__(self, array) -> None:
self.array = array

@property
Expand All @@ -43,9 +50,19 @@ def shape(self) -> tuple[int, ...]:
def dtype(self):
return self.array.dtype

def __getitem__(self, key):
return indexing.explicit_indexing_adapter(
key, self.shape, indexing.IndexingSupport.BASIC, self._getitem
def _oindex_get(self, key: _OuterIndexerKey) -> Any:
return indexing.outer_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)

def _vindex_get(self, key: _VectorizedIndexerKey) -> Any:
return indexing.vectorized_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)

def __getitem__(self, key: _BasicIndexerKey) -> Any:
return indexing.basic_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)

def _getitem(self, key):
Expand Down
46 changes: 34 additions & 12 deletions xarray/backends/scipy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

from xarray.backends.common import (
BACKEND_ENTRYPOINTS,
BackendArray,
BackendEntrypoint,
NewBackendArray,
WritableCFDataStore,
_normalize_path,
)
Expand All @@ -37,6 +37,11 @@
from xarray.backends.common import AbstractDataStore
from xarray.core.dataset import Dataset
from xarray.core.types import ReadBuffer
from xarray.namedarray._typing import (
_BasicIndexerKey,
_OuterIndexerKey,
_VectorizedIndexerKey,
)


HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0")
Expand All @@ -54,7 +59,9 @@ def _decode_attrs(d):
return {k: v if k == "_FillValue" else _decode_string(v) for (k, v) in d.items()}


class ScipyArrayWrapper(BackendArray):
class ScipyArrayWrapper(NewBackendArray):
indexing_support = indexing.IndexingSupport.OUTER_1VECTOR

def __init__(self, variable_name, datastore):
self.datastore = datastore
self.variable_name = variable_name
Expand All @@ -66,15 +73,7 @@ def get_variable(self, needs_lock=True):
ds = self.datastore._manager.acquire(needs_lock)
return ds.variables[self.variable_name]

def _getitem(self, key):
with self.datastore.lock:
data = self.get_variable(needs_lock=False).data
return data[key]

def __getitem__(self, key):
data = indexing.explicit_indexing_adapter(
key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem
)
def _finalize_result(self, data):
# Copy data if the source file is mmapped. This makes things consistent
# with the netCDF4 library by ensuring we can safely read arrays even
# after closing associated files.
Expand All @@ -87,7 +86,30 @@ def __getitem__(self, key):

return np.array(data, dtype=self.dtype, copy=copy)

def __setitem__(self, key, value):
def _getitem(self, key):
with self.datastore.lock:
data = self.get_variable(needs_lock=False).data
return data[key]

def _vindex_get(self, key: _VectorizedIndexerKey) -> Any:
data = indexing.vectorized_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)
return self._finalize_result(data)

def _oindex_get(self, key: _OuterIndexerKey) -> Any:
data = indexing.outer_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)
return self._finalize_result(data)

def __getitem__(self, key: _BasicIndexerKey) -> Any:
data = indexing.basic_indexing_adapter(
key, self.shape, self.indexing_support, self._getitem
)
return self._finalize_result(data)

def __setitem__(self, key, value) -> None:
with self.datastore.lock:
data = self.get_variable(needs_lock=False)
try:
Expand Down
52 changes: 30 additions & 22 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from xarray.backends.common import (
BACKEND_ENTRYPOINTS,
AbstractWritableDataStore,
BackendArray,
BackendEntrypoint,
NewBackendArray,
_encode_variable_name,
_normalize_path,
datatree_from_dict_with_io_cleanup,
Expand All @@ -42,6 +42,11 @@
from xarray.core.dataset import Dataset
from xarray.core.datatree import DataTree
from xarray.core.types import ReadBuffer, ZarrArray, ZarrGroup
from xarray.namedarray._typing import (
_BasicIndexerKey,
_OuterIndexerKey,
_VectorizedIndexerKey,
)


def _get_mappers(*, storage_options, store, chunk_store):
Expand Down Expand Up @@ -179,8 +184,8 @@ def encode_zarr_attr_value(value):
return encoded


class ZarrArrayWrapper(BackendArray):
__slots__ = ("_array", "dtype", "shape")
class ZarrArrayWrapper(NewBackendArray):
indexing_support = indexing.IndexingSupport.VECTORIZED

def __init__(self, zarr_array):
# some callers attempt to evaluate an array if an `array` property exists on the object.
Expand All @@ -203,25 +208,28 @@ def __init__(self, zarr_array):
def get_array(self):
return self._array

def _oindex(self, key):
return self._array.oindex[key]

def _vindex(self, key):
return self._array.vindex[key]

def _getitem(self, key):
return self._array[key]

def __getitem__(self, key):
array = self._array
if isinstance(key, indexing.BasicIndexer):
method = self._getitem
elif isinstance(key, indexing.VectorizedIndexer):
method = self._vindex
elif isinstance(key, indexing.OuterIndexer):
method = self._oindex
return indexing.explicit_indexing_adapter(
key, array.shape, indexing.IndexingSupport.VECTORIZED, method
def _oindex_get(self, key: _OuterIndexerKey) -> Any:
def raw_indexing_method(key):
return self._array.oindex[key]

return indexing.outer_indexing_adapter(
key, self._array.shape, self.indexing_support, raw_indexing_method
)

def _vindex_get(self, key: _VectorizedIndexerKey) -> Any:
def raw_indexing_method(key):
return self._array.vindex[key]

return indexing.vectorized_indexing_adapter(
key, self._array.shape, self.indexing_support, raw_indexing_method
)

def __getitem__(self, key: _BasicIndexerKey) -> Any:
def raw_indexing_method(key):
return self._array[key]

return indexing.basic_indexing_adapter(
key, self._array.shape, self.indexing_support, raw_indexing_method
)

# if self.ndim == 0:
Expand Down
Loading
Loading