ESMValGroup
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎environment.yml‎
Lines changed: 4 additions & 2 deletions b/‎environment.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎esmvalcore/preprocessor/_io.py‎
Lines changed: 89 additions & 3 deletions b/‎esmvalcore/preprocessor/_io.py‎
Lines changed: 89 additions & 3 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 4 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 4 additions & 1 deletion
@@ -33,13 +33,13 @@ repos:
       - id: codespell
         additional_dependencies: [tomli]  # required for Python 3.10
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.12.4"
+    rev: "v0.12.7"
     hooks:
       - id: ruff-check
         args: [--fix]
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v1.17.0'
+    rev: 'v1.17.1'
     hooks:
       - id: mypy
         additional_dependencies:
 
@@ -5,6 +5,7 @@ channels:
   - nodefaults
 
 dependencies:
+  - aiohttp
   - cartopy
   - cf-units
   - cftime
@@ -13,12 +14,14 @@ dependencies:
   - distributed
   - esgf-pyclient >=0.3.1
   - esmpy
+  - esmvaltool-sample-data
   - filelock
   - fiona
   - fire
   - geopy
   - humanfriendly
   - intake-esgf
+  - intake-esm
   - iris >=3.12.2  # https://github.com/SciTools/iris/issues/6417
   - iris-esmf-regrid >=0.11.0
   - iris-grib >=0.20.0  # github.com/ESMValGroup/ESMValCore/issues/2535
@@ -47,6 +50,7 @@ dependencies:
   - shapely >=2.0.0
   - xarray
   - yamale
+  - zarr >3
   # Python packages needed for building docs
   - autodocsumm >=0.2.2
   - ipython <9.0  # github.com/ESMValGroup/ESMValCore/issues/2680
@@ -67,5 +71,3 @@ dependencies:
   - pydocstyle
   - pylint
   # Not on conda forge - vprof
-  - pip:
-      - ESMValTool_sample_data
 
@@ -9,7 +9,9 @@
 from itertools import groupby
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
+from urllib.parse import urlparse
 
+import fsspec
 import iris
 import ncdata
 import xarray as xr
@@ -82,6 +84,7 @@ def load(
     | xr.Dataset
     | ncdata.NcData,
     ignore_warnings: list[dict[str, Any]] | None = None,
+    backend_kwargs: dict[str, Any] | None = None,
 ) -> CubeList:
     """Load Iris cubes.
 
@@ -90,10 +93,19 @@ def load(
     file:
         File to be loaded. If ``file`` is already a loaded dataset, return it
         as a :class:`~iris.cube.CubeList`.
+        File as ``Path`` object could be a Zarr store.
     ignore_warnings:
         Keyword arguments passed to :func:`warnings.filterwarnings` used to
         ignore warnings issued by :func:`iris.load_raw`. Each list element
         corresponds to one call to :func:`warnings.filterwarnings`.
+    backend_kwargs:
+        Dict to hold info needed by storage backend e.g. to access
+        a PRIVATE S3 bucket containing object stores (e.g. netCDF4 files);
+        needed by ``fsspec`` and its extensions e.g. ``s3fs``, so
+        most of the times this will include ``storage_options``. Note that Zarr
+        files are opened via ``http`` extension of ``fsspec``, so no need
+        for ``storage_options`` in that case (ie anon/anon). Currently only used
+        in Zarr file opening.
 
     Returns
     -------
@@ -108,10 +120,22 @@ def load(
         Invalid type for ``file``.
 
     """
-    if isinstance(file, DataElement):
+    if isinstance(file, (str, Path)):
+        extension = (
+            file.suffix
+            if isinstance(file, Path)
+            else os.path.splitext(file)[1]
+        )
+        if "zarr" not in extension:
+            cubes = _load_from_file(file, ignore_warnings=ignore_warnings)
+        else:
+            cubes = _load_zarr(
+                file,
+                ignore_warnings=ignore_warnings,
+                backend_kwargs=backend_kwargs,
+            )
+    elif isinstance(file, DataElement):
         cubes = file.to_iris()
-    elif isinstance(file, (str, Path)):
-        cubes = _load_from_file(file, ignore_warnings=ignore_warnings)
     elif isinstance(file, Cube):
         cubes = CubeList([file])
     elif isinstance(file, CubeList):
@@ -143,6 +167,68 @@ def load(
     return cubes
 
 
+def _load_zarr(
+    file: str | Path | Cube | CubeList | xr.Dataset | ncdata.NcData,
+    ignore_warnings: list[dict[str, Any]] | None = None,
+    backend_kwargs: dict[str, Any] | None = None,
+) -> CubeList:
+    # note on ``chunks`` kwarg to ``xr.open_dataset()``
+    # docs.xarray.dev/en/stable/generated/xarray.open_dataset.html
+    # this is very important because with ``chunks=None`` (default)
+    # data will be realized as Numpy arrays and transferred in memory;
+    # ``chunks={}`` loads the data with dask using the engine preferred
+    # chunk size, generally identical to the formats chunk size. If not
+    # available, a single chunk for all arrays; testing shows this is the
+    # "best guess" compromise for typically CMIP-like chunked data.
+    # see https://github.com/pydata/xarray/issues/10612 and
+    # https://github.com/pp-mo/ncdata/issues/139
+
+    time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
+    open_kwargs = {
+        "consolidated": False,
+        "decode_times": time_coder,
+        "engine": "zarr",
+        "chunks": {},
+        "backend_kwargs": backend_kwargs,
+    }
+
+    # case 1: Zarr store is on remote object store
+    # file's URI will always be either http or https
+    if urlparse(str(file)).scheme in ["http", "https"]:
+        # basic test that opens the Zarr/.zmetadata file for Zarr2
+        # or Zarr/zarr.json for Zarr3
+        fs = fsspec.filesystem("http")
+        valid_zarr = True
+        try:
+            fs.open(str(file) + "/zarr.json", "rb")  # Zarr3
+        except Exception:  # noqa: BLE001
+            try:
+                fs.open(str(file) + "/.zmetadata", "rb")  # Zarr2
+            except Exception:  # noqa: BLE001
+                valid_zarr = False
+        # we don't want to catch any specific aiohttp/fsspec exception
+        # bottom line is that that file has issues, so raise
+        if not valid_zarr:
+            msg = (
+                f"File '{file}' can not be opened as Zarr file at the moment."
+            )
+            raise ValueError(msg)
+
+        open_kwargs["consolidated"] = True
+        zarr_xr = xr.open_dataset(file, **open_kwargs)
+    # case 2: Zarr store is local to the file system
+    else:
+        zarr_xr = xr.open_dataset(file, **open_kwargs)
+
+    # avoid possible
+    # ValueError: Object has inconsistent chunks along dimension time.
+    # This can be fixed by calling unify_chunks().
+    # when trying to access the ``chunks`` store
+    zarr_xr = zarr_xr.unify_chunks()
+
+    return dataset_to_iris(zarr_xr, ignore_warnings=ignore_warnings)
+
+
 def _load_from_file(
     file: str | Path,
     ignore_warnings: list[dict[str, Any]] | None = None,
 
@@ -32,6 +32,7 @@ dynamic = [
     "version",
 ]
 dependencies = [
+    "aiohttp",
     "cartopy",
     "cf-units",
     "dask[array,distributed]>=2025",  # Core/issues/2503
@@ -45,6 +46,7 @@ dependencies = [
     "geopy",
     "humanfriendly",
     "intake-esgf",
+    "intake-esm",
     "iris-grib>=0.20.0",  # github.com/ESMValGroup/ESMValCore/issues/2535
     "isodate>=0.7.0",
     "jinja2",
@@ -69,6 +71,7 @@ dependencies = [
     "stratify>=0.3",
     "xarray",
     "yamale",
+    "zarr>3",
 ]
 description = "A community tool for pre-processing data from Earth system models in CMIP and running analysis scripts"
 license = {text = "Apache License, Version 2.0"}
@@ -84,7 +87,7 @@ test = [
     "pytest-metadata>=1.5.1",
     "pytest-mock",
     "pytest-xdist",
-    "ESMValTool_sample_data==0.0.3",
+    "ESMValTool_sample_data==0.0.4",
 ]
 doc = [
     "autodocsumm>=0.2.2",