99from itertools import groupby
1010from pathlib import Path
1111from typing import TYPE_CHECKING , Any
12+ from urllib .parse import urlparse
1213
14+ import fsspec
1315import iris
1416import ncdata
1517import xarray as xr
@@ -82,6 +84,7 @@ def load(
8284 | xr .Dataset
8385 | ncdata .NcData ,
8486 ignore_warnings : list [dict [str , Any ]] | None = None ,
87+ backend_kwargs : dict [str , Any ] | None = None ,
8588) -> CubeList :
8689 """Load Iris cubes.
8790
@@ -90,10 +93,19 @@ def load(
9093 file:
9194 File to be loaded. If ``file`` is already a loaded dataset, return it
9295 as a :class:`~iris.cube.CubeList`.
96+ File as ``Path`` object could be a Zarr store.
9397 ignore_warnings:
9498 Keyword arguments passed to :func:`warnings.filterwarnings` used to
9599 ignore warnings issued by :func:`iris.load_raw`. Each list element
96100 corresponds to one call to :func:`warnings.filterwarnings`.
101+ backend_kwargs:
102+ Dict to hold info needed by storage backend e.g. to access
103+ a PRIVATE S3 bucket containing object stores (e.g. netCDF4 files);
104+ needed by ``fsspec`` and its extensions e.g. ``s3fs``, so
105+ most of the times this will include ``storage_options``. Note that Zarr
106+ files are opened via ``http`` extension of ``fsspec``, so no need
107+ for ``storage_options`` in that case (ie anon/anon). Currently only used
108+ in Zarr file opening.
97109
98110 Returns
99111 -------
@@ -108,10 +120,22 @@ def load(
108120 Invalid type for ``file``.
109121
110122 """
111- if isinstance (file , DataElement ):
123+ if isinstance (file , (str , Path )):
124+ extension = (
125+ file .suffix
126+ if isinstance (file , Path )
127+ else os .path .splitext (file )[1 ]
128+ )
129+ if "zarr" not in extension :
130+ cubes = _load_from_file (file , ignore_warnings = ignore_warnings )
131+ else :
132+ cubes = _load_zarr (
133+ file ,
134+ ignore_warnings = ignore_warnings ,
135+ backend_kwargs = backend_kwargs ,
136+ )
137+ elif isinstance (file , DataElement ):
112138 cubes = file .to_iris ()
113- elif isinstance (file , (str , Path )):
114- cubes = _load_from_file (file , ignore_warnings = ignore_warnings )
115139 elif isinstance (file , Cube ):
116140 cubes = CubeList ([file ])
117141 elif isinstance (file , CubeList ):
@@ -143,6 +167,68 @@ def load(
143167 return cubes
144168
145169
170+ def _load_zarr (
171+ file : str | Path | Cube | CubeList | xr .Dataset | ncdata .NcData ,
172+ ignore_warnings : list [dict [str , Any ]] | None = None ,
173+ backend_kwargs : dict [str , Any ] | None = None ,
174+ ) -> CubeList :
175+ # note on ``chunks`` kwarg to ``xr.open_dataset()``
176+ # docs.xarray.dev/en/stable/generated/xarray.open_dataset.html
177+ # this is very important because with ``chunks=None`` (default)
178+ # data will be realized as Numpy arrays and transferred in memory;
179+ # ``chunks={}`` loads the data with dask using the engine preferred
180+ # chunk size, generally identical to the formats chunk size. If not
181+ # available, a single chunk for all arrays; testing shows this is the
182+ # "best guess" compromise for typically CMIP-like chunked data.
183+ # see https://github.com/pydata/xarray/issues/10612 and
184+ # https://github.com/pp-mo/ncdata/issues/139
185+
186+ time_coder = xr .coders .CFDatetimeCoder (use_cftime = True )
187+ open_kwargs = {
188+ "consolidated" : False ,
189+ "decode_times" : time_coder ,
190+ "engine" : "zarr" ,
191+ "chunks" : {},
192+ "backend_kwargs" : backend_kwargs ,
193+ }
194+
195+ # case 1: Zarr store is on remote object store
196+ # file's URI will always be either http or https
197+ if urlparse (str (file )).scheme in ["http" , "https" ]:
198+ # basic test that opens the Zarr/.zmetadata file for Zarr2
199+ # or Zarr/zarr.json for Zarr3
200+ fs = fsspec .filesystem ("http" )
201+ valid_zarr = True
202+ try :
203+ fs .open (str (file ) + "/zarr.json" , "rb" ) # Zarr3
204+ except Exception : # noqa: BLE001
205+ try :
206+ fs .open (str (file ) + "/.zmetadata" , "rb" ) # Zarr2
207+ except Exception : # noqa: BLE001
208+ valid_zarr = False
209+ # we don't want to catch any specific aiohttp/fsspec exception
210+ # bottom line is that that file has issues, so raise
211+ if not valid_zarr :
212+ msg = (
213+ f"File '{ file } ' can not be opened as Zarr file at the moment."
214+ )
215+ raise ValueError (msg )
216+
217+ open_kwargs ["consolidated" ] = True
218+ zarr_xr = xr .open_dataset (file , ** open_kwargs )
219+ # case 2: Zarr store is local to the file system
220+ else :
221+ zarr_xr = xr .open_dataset (file , ** open_kwargs )
222+
223+ # avoid possible
224+ # ValueError: Object has inconsistent chunks along dimension time.
225+ # This can be fixed by calling unify_chunks().
226+ # when trying to access the ``chunks`` store
227+ zarr_xr = zarr_xr .unify_chunks ()
228+
229+ return dataset_to_iris (zarr_xr , ignore_warnings = ignore_warnings )
230+
231+
146232def _load_from_file (
147233 file : str | Path ,
148234 ignore_warnings : list [dict [str , Any ]] | None = None ,
0 commit comments