Skip to content

Commit ef2e7cd

Browse files
committed
Add support for intake-esgf
1 parent cbb23dc commit ef2e7cd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+971
-217
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Load data using intake-esgf
2+
===========================
3+
4+
.. automodule:: esmvalcore.io.intake_esgf
5+
:no-inherited-members:

doc/api/esmvalcore.io.protocol.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Protocols for accessing data
2+
============================
3+
4+
.. automodule:: esmvalcore.io.protocol
5+
:no-inherited-members:

doc/api/esmvalcore.io.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
Access data from any source
2+
===========================
3+
4+
ESMValCore supports a modular system for reading data from various data sources.
5+
In the future, this module may be extended with support for writing output data.
6+
7+
The interface is defined in the :mod:`esmvalcore.io.protocol` module and
8+
the other modules here provide an implementation for a particular data source.
9+
10+
.. toctree::
11+
:maxdepth: 1
12+
13+
esmvalcore.io.protocol
14+
esmvalcore.io.intake_esgf

doc/api/esmvalcore.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ library. This section documents the public API of ESMValCore.
1414
esmvalcore.dataset
1515
esmvalcore.esgf
1616
esmvalcore.exceptions
17+
esmvalcore.io
1718
esmvalcore.iris_helpers
1819
esmvalcore.local
1920
esmvalcore.preprocessor

doc/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@
461461
'dask': ('https://docs.dask.org/en/stable/', None),
462462
'distributed': ('https://distributed.dask.org/en/stable/', None),
463463
'iris': ('https://scitools-iris.readthedocs.io/en/stable/', None),
464+
'intake_esgf': ('https://intake-esgf.readthedocs.io/en/stable/', None),
464465
'esmf_regrid': ('https://iris-esmf-regrid.readthedocs.io/en/stable/', None),
465466
'matplotlib': ('https://matplotlib.org/stable/', None),
466467
'ncdata': ('https://ncdata.readthedocs.io/en/stable/', None),

doc/configurations

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../esmvalcore/config/configurations

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dependencies:
2020
- fire
2121
- geopy
2222
- humanfriendly
23+
- intake-esgf
2324
- intake-esm
2425
- iris >=3.12.2 # https://github.com/SciTools/iris/issues/6417
2526
- iris-esmf-regrid >=0.11.0

esmvalcore/_provenance.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ def __init__(
118118
119119
Arguments
120120
---------
121-
filename: str
122-
Path to the file on disk.
121+
filename: :obj:`pathlib.Path` or :obj:`esmvalcore.io.protocol.DataElement`
122+
Path or data element containing the data described by the provenance.
123123
attributes: dict
124124
Dictionary with facets describing the file. If set to None, this
125125
will be read from the file when provenance is initialized.
@@ -130,11 +130,15 @@ def __init__(
130130
differ from `filename` if the file was moved before resuming
131131
processing.
132132
"""
133-
self._filename = filename
133+
self._filename = (
134+
str(filename) if isinstance(filename, Path) else filename.name
135+
)
134136
if prov_filename is None:
135-
self.prov_filename = filename
137+
self.prov_filename = self._filename
136138
else:
137139
self.prov_filename = prov_filename
140+
# TODO: ensure global attributes are recorded for input data if they're
141+
# not netcdf files.
138142
self.attributes = copy.deepcopy(attributes)
139143

140144
self.provenance = None
@@ -167,20 +171,21 @@ def copy_provenance(self):
167171
if self.provenance is None:
168172
msg = f"Provenance of {self} not initialized"
169173
raise ValueError(msg)
170-
new = TrackedFile(self.filename, self.attributes)
174+
new = TrackedFile(Path(self.filename), self.attributes)
171175
new.provenance = copy.deepcopy(self.provenance)
172176
new.entity = new.provenance.get_record(self.entity.identifier)[0]
173177
new.activity = new.provenance.get_record(self.activity.identifier)[0]
174178
return new
175179

176180
@property
177-
def filename(self):
178-
"""Filename."""
181+
def filename(self) -> str:
182+
"""Name of data described by this provenance document."""
179183
return self._filename
180184

181185
@property
182186
def provenance_file(self):
183-
"""Filename of provenance."""
187+
"""Filename of provenance file."""
188+
# This may not work well if filename is the instance_id.
184189
return os.path.splitext(self.filename)[0] + "_provenance.xml"
185190

186191
def initialize_provenance(self, activity):

esmvalcore/_recipe/check.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import os
88
import subprocess
99
from functools import partial
10+
from pathlib import Path
1011
from pprint import pformat
1112
from shutil import which
1213
from typing import TYPE_CHECKING, Any
@@ -15,6 +16,7 @@
1516
import yamale
1617

1718
import esmvalcore.preprocessor
19+
from esmvalcore.esgf import ESGFFile
1820
from esmvalcore.exceptions import InputFilesNotFound, RecipeError
1921
from esmvalcore.local import _get_start_end_year, _parse_period
2022
from esmvalcore.preprocessor import TIME_PREPROCESSORS, PreprocessingTask
@@ -33,7 +35,6 @@
3335

3436
if TYPE_CHECKING:
3537
from collections.abc import Iterable, Sequence
36-
from pathlib import Path
3738

3839
from esmvalcore._task import TaskSet
3940
from esmvalcore.dataset import Dataset
@@ -241,6 +242,8 @@ def data_availability(dataset: Dataset, log: bool = True) -> None:
241242
available_years: set[int] = set()
242243

243244
for file in input_files:
245+
if not isinstance(file, Path | ESGFFile):
246+
return
244247
start, end = _get_start_end_year(file)
245248
available_years.update(range(start, end + 1))
246249

esmvalcore/_recipe/recipe.py

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from collections.abc import Iterable, Sequence
6161

6262
from esmvalcore.config import Session
63+
from esmvalcore.io.protocol import DataElement
6364
from esmvalcore.typing import Facets
6465

6566
logger = logging.getLogger(__name__)
@@ -328,20 +329,12 @@ def _update_weighting_settings(
328329
_exclude_dataset(settings, facets, "weighting_landsea_fraction")
329330

330331

331-
def _add_to_download_list(dataset: Dataset) -> None:
332-
"""Add the files of `dataset` to `DOWNLOAD_FILES`."""
333-
for i, file in enumerate(dataset.files):
334-
if isinstance(file, esgf.ESGFFile):
335-
DOWNLOAD_FILES.add(file)
336-
dataset.files[i] = file.local_file(dataset.session["download_dir"])
337-
338-
339332
def _schedule_for_download(datasets: Iterable[Dataset]) -> None:
340333
"""Schedule files for download."""
341334
for dataset in datasets:
342-
_add_to_download_list(dataset)
335+
DOWNLOAD_FILES.update(dataset.files)
343336
for supplementary_ds in dataset.supplementaries:
344-
_add_to_download_list(supplementary_ds)
337+
DOWNLOAD_FILES.update(supplementary_ds.files)
345338

346339

347340
def _log_input_files(datasets: Iterable[Dataset]) -> None:
@@ -367,12 +360,7 @@ def _log_input_files(datasets: Iterable[Dataset]) -> None:
367360

368361
def _get_files_str(dataset: Dataset) -> str:
369362
"""Get nice string representation of all files of a dataset."""
370-
return "\n".join(
371-
f" {f}"
372-
if f.exists() # type: ignore
373-
else f" {f} (will be downloaded)"
374-
for f in dataset.files
375-
)
363+
return "\n".join(f" {f}" for f in dataset.files)
376364

377365

378366
def _check_input_files(input_datasets: Iterable[Dataset]) -> set[str]:
@@ -455,10 +443,7 @@ def _get_common_attributes(
455443

456444
# Ensure that attributes start_year and end_year are always available if at
457445
# least one of the input datasets defines it
458-
if "timerange" in attributes:
459-
start_year, end_year = _parse_period(attributes["timerange"])
460-
attributes["start_year"] = int(str(start_year[0:4]))
461-
attributes["end_year"] = int(str(end_year[0:4]))
446+
_set_start_end_year(attributes)
462447

463448
return attributes
464449

@@ -722,7 +707,7 @@ def _get_preprocessor_products(
722707
)
723708

724709
for product in products:
725-
_set_start_end_year(product)
710+
_set_start_end_year(product.attributes)
726711
product.check()
727712

728713
return products
@@ -782,18 +767,18 @@ def _configure_multi_product_preprocessor(
782767

783768
for product in multimodel_products | ensemble_products:
784769
product.check()
785-
_set_start_end_year(product)
770+
_set_start_end_year(product.attributes)
786771

787772

788-
def _set_start_end_year(product: PreprocessorFile) -> None:
773+
def _set_start_end_year(attributes: dict[str, Any]) -> None:
789774
"""Set the attributes `start_year` and `end_year`.
790775
791776
These attributes are used by many diagnostic scripts in ESMValTool.
792777
"""
793-
if "timerange" in product.attributes:
794-
start_year, end_year = _parse_period(product.attributes["timerange"])
795-
product.attributes["start_year"] = int(str(start_year[0:4]))
796-
product.attributes["end_year"] = int(str(end_year[0:4]))
778+
if "timerange" in attributes:
779+
start_year, end_year = _parse_period(attributes["timerange"])
780+
attributes["start_year"] = int(str(start_year[0:4]))
781+
attributes["end_year"] = int(str(end_year[0:4]))
797782

798783

799784
def _update_preproc_functions(
@@ -916,7 +901,7 @@ def __init__(
916901
# Clear the global variable containing the set of files to download
917902
DOWNLOAD_FILES.clear()
918903
USED_DATASETS.clear()
919-
self._download_files: set[esgf.ESGFFile] = set()
904+
self._download_files: set[DataElement] = set()
920905
self.session = session
921906
self.session["write_ncl_interface"] = self._need_ncl(
922907
raw_recipe["diagnostics"],
@@ -1342,8 +1327,8 @@ def run(self) -> None:
13421327
filled_recipe = self.write_filled_recipe()
13431328

13441329
# Download required data
1345-
if self.session["search_esgf"] != "never":
1346-
esgf.download(self._download_files, self.session["download_dir"])
1330+
for file in self._download_files:
1331+
file.prepare()
13471332

13481333
self.tasks.run(max_parallel_tasks=self.session["max_parallel_tasks"])
13491334
logger.info(

0 commit comments

Comments
 (0)