Skip to content

Opendap merra #168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ build/*
docs/build/*
/docs/build/*
/projects
/esmpy


# Byte-compiled / optimized / DLL files
Expand Down
118 changes: 86 additions & 32 deletions globsim/download/MERRAdownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import re
import warnings
import logging
import requests
import concurrent.futures
import subprocess

from datetime import datetime, timedelta
from netCDF4 import Dataset
Expand Down Expand Up @@ -544,15 +547,14 @@ def getURLs(self, date: "dict[str, datetime]"):
list
"""
# Setup the based url strings
baseurl_2d = ('https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/')
baseurl_3d = ('https://goldsmr5.gesdisc.eosdis.nasa.gov/opendap/MERRA2/')
baseurl = ('https://opendap.earthdata.nasa.gov/collections/')

baseurl_3dn = (baseurl_3d + 'M2I6NPANA.5.12.4/{YM}/MERRA2_{FN}.inst6_3d_ana_Np.{YMD}.nc4')
baseurl_3da = (baseurl_3d + 'M2I3NPASM.5.12.4/{YM}/MERRA2_{FN}.inst3_3d_asm_Np.{YMD}.nc4')
baseurl_2dm = (baseurl_2d + 'M2I1NXASM.5.12.4/{YM}/MERRA2_{FN}.inst1_2d_asm_Nx.{YMD}.nc4')
baseurl_2dr = (baseurl_2d + 'M2T1NXRAD.5.12.4/{YM}/MERRA2_{FN}.tavg1_2d_rad_Nx.{YMD}.nc4')
baseurl_2ds = (baseurl_2d + 'M2T1NXFLX.5.12.4/{YM}/MERRA2_{FN}.tavg1_2d_flx_Nx.{YMD}.nc4')
baseurl_2dv = (baseurl_2d + 'M2T1NXSLV.5.12.4/{YM}/MERRA2_{FN}.tavg1_2d_slv_Nx.{YMD}.nc4')
baseurl_3dn = (baseurl + 'C1276812884-GES_DISC/granules/M2I6NPANA.5.12.4%3AMERRA2_{FN}.inst6_3d_ana_Np.{YMD}.nc4')
baseurl_3da = (baseurl + 'C1276812879-GES_DISC/granules/M2I3NPASM.5.12.4%3AMERRA2_{FN}.inst3_3d_asm_Np.{YMD}.nc4')
baseurl_2dm = (baseurl + 'C1276812820-GES_DISC/granules/M2I1NXASM.5.12.4%3AMERRA2_{FN}.inst1_2d_asm_Nx.{YMD}.nc4')
baseurl_2dr = (baseurl + 'C1276812851-GES_DISC/granules/M2T1NXRAD.5.12.4%3AMERRA2_{FN}.tavg1_2d_rad_Nx.{YMD}.nc4')
baseurl_2ds = (baseurl + 'C1276812838-GES_DISC/granules/M2T1NXFLX.5.12.4%3AMERRA2_{FN}.tavg1_2d_flx_Nx.{YMD}.nc4')
baseurl_2dv = (baseurl + 'C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_{FN}.tavg1_2d_slv_Nx.{YMD}.nc4')

# build the urls list
urls_3dmana = []
Expand All @@ -563,20 +565,19 @@ def getURLs(self, date: "dict[str, datetime]"):
urls_2dv = []

for d in pd.date_range(date['beg'], date['end']):
ym = d.strftime("%Y/%m")
ymd = d.strftime("%Y%m%d")

fn = self.get_file_number(d.year, d.month)

urls_3dmana.append(baseurl_3dn.format(YM=ym, FN=fn, YMD=ymd))
urls_3dmasm.append(baseurl_3da.format(YM=ym, FN=fn, YMD=ymd))
urls_2dm.append(baseurl_2dm.format(YM=ym, FN=fn, YMD=ymd))
urls_2ds.append(baseurl_2ds.format(YM=ym, FN=fn, YMD=ymd))
urls_2dr.append(baseurl_2dr.format(YM=ym, FN=fn, YMD=ymd))
urls_2dv.append(baseurl_2dv.format(YM=ym, FN=fn, YMD=ymd))
urls_3dmana.append(baseurl_3dn.format(FN=fn, YMD=ymd))
urls_3dmasm.append(baseurl_3da.format(FN=fn, YMD=ymd))
urls_2dm.append(baseurl_2dm.format(FN=fn, YMD=ymd))
urls_2ds.append(baseurl_2ds.format(FN=fn, YMD=ymd))
urls_2dr.append(baseurl_2dr.format(FN=fn, YMD=ymd))
urls_2dv.append(baseurl_2dv.format(FN=fn, YMD=ymd))

# Setup URL for getting constant model parameters (2D, single-level, full horizontal resolution)
url_2dc = ['https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2_MONTHLY/M2C0NXASM.5.12.4/1980/MERRA2_101.const_2d_asm_Nx.00000000.nc4']
url_2dc = ['https://opendap.earthdata.nasa.gov/collections/C1276812819-GES_DISC/granules/M2C0NXASM.5.12.4%3AMERRA2_101.const_2d_asm_Nx.00000000.nc4']

return urls_3dmana, urls_3dmasm, urls_2dm, urls_2ds, urls_2dr, url_2dc, urls_2dv

Expand Down Expand Up @@ -612,15 +613,15 @@ def get_file_number(year, month=-1):

def start_session(self):
self.session = setup_session(self.username, self.password,
check_url="https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2_MONTHLY/M2C0NXASM.5.12.4/1980/MERRA2_101.const_2d_asm_Nx.00000000.nc4")

check_url="https://opendap.earthdata.nasa.gov/collections/C1276812819-GES_DISC/granules/M2C0NXASM.5.12.4%3AMERRA2_101.const_2d_asm_Nx.00000000.nc4")
def build_subsetters(self):
self.subsetters = {"3dmana": MERRASubsetter('https://goldsmr5.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I6NPANA.5.12.4/2016/01/MERRA2_400.inst6_3d_ana_Np.20160101.nc4', self.session),
"3dmasm": MERRASubsetter('https://goldsmr5.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I3NPASM.5.12.4/2016/01/MERRA2_400.inst3_3d_asm_Np.20160101.nc4', self.session),
"2dm": MERRASubsetter('https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/2016/01/MERRA2_400.inst1_2d_asm_Nx.20160102.nc4', self.session),
"2ds": MERRASubsetter('https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXFLX.5.12.4/2016/01/MERRA2_400.tavg1_2d_flx_Nx.20160101.nc4', self.session),
"2dr": MERRASubsetter('https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXRAD.5.12.4/1981/01/MERRA2_100.tavg1_2d_rad_Nx.19810101.nc4', self.session),
"2dv": MERRASubsetter('https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/2016/01/MERRA2_400.tavg1_2d_slv_Nx.20160101.nc4', self.session)}
self.subsetters = {"3dmana": MERRASubsetter('https://opendap.earthdata.nasa.gov/collections/C1276812884-GES_DISC/granules/M2I6NPANA.5.12.4%3AMERRA2_400.inst6_3d_ana_Np.20160101.nc4', self.session),
"3dmasm": MERRASubsetter('https://opendap.earthdata.nasa.gov/collections/C1276812879-GES_DISC/granules/M2I3NPASM.5.12.4%3AMERRA2_400.inst3_3d_asm_Np.20160101.nc4', self.session),
"2dm": MERRASubsetter('https://opendap.earthdata.nasa.gov/collections/C1276812820-GES_DISC/granules/M2I1NXASM.5.12.4%3AMERRA2_400.inst1_2d_asm_Nx.20160102.nc4', self.session),
"2dr": MERRASubsetter('https://opendap.earthdata.nasa.gov/collections/C1276812851-GES_DISC/granules/M2T1NXRAD.5.12.4%3AMERRA2_100.tavg1_2d_rad_Nx.19810101.nc4', self.session),
"2ds": MERRASubsetter('https://opendap.earthdata.nasa.gov/collections/C1276812838-GES_DISC/granules/M2T1NXFLX.5.12.4%3AMERRA2_400.tavg1_2d_flx_Nx.20160101.nc4', self.session),
"2dv": MERRASubsetter('https://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20160101.nc4', self.session)}

for s in self.subsetters.values():
s.set_lon_range(self.area['west'], self.area['east'])
Expand Down Expand Up @@ -704,6 +705,7 @@ def retrieve(self):
elif self.mode == "links":
url_file = Path(self.directory, "merra-wishlist.txt")
self.download_links(date_range, str(url_file))
self.actual_download_links(str(url_file))

logger.info(f"Created OPeNDAP links file: {url_file}")
logger.info(f"To download the files, use the command 'cat {url_file} | xargs -n 1 -P 6 wget --load-cookies ~/.urs_cookies --save-cookies ~/.urs_cookies --auth-no-challenge=on --keep-session-cookies --content-disposition'")
Expand All @@ -725,6 +727,33 @@ def download_links(self, date_range: "dict[str, datetime]", url_file: str):
_ = [self.subsetters['3dmana'].get_download_link(url, url_file=url_file) for url in urls_3dmana]
_ = [self.subsetters['3dmasm'].get_download_link(url, url_file=url_file) for url in urls_3dmasm]

def actual_download_links(self, url_file: str):
""" Save the links to the datasets (which can be downloaded in paralell with wget + xargs)"""

list_url = []
with open(str(url_file), 'r') as file:
for line in file:
list_url.append(line.strip())

with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
executor.map(self.download_granule, list_url)

def download_granule(self, url):
response = requests.get(url)
base_url = ''.join([url.split('%3A')[-1].split('.dap.nc4?')[0], '.nc4'])

# Download the variable if the response is OK
if response.ok:
with open(base_url, 'wb') as file_handler:
logger.info(f'Downloading: {base_url}')
file_handler.write(response.content)
cmd1 = f"mv {base_url} {self.directory}"
logger.debug(cmd1)
p1 = subprocess.Popen(cmd1.split(" "))
p1.wait()
else:
print(f'Request failed: {response.text}')

def download(self, date_range):
logger.info(f"Downloading chunk {date_range['beg']} to {date_range['end']}")

Expand Down Expand Up @@ -813,7 +842,9 @@ def __init__(self, url, session):

session : pydap access session object created from setup_session

2d VAR[lat:lat][lon:lon][time:time] """
old: 2d VAR[time:time][lat:lat][lon:lon]
new version: old: 2d VAR%5Btime:time%5D%5Blat:lat%5D%5Blon:lon%5D
replace '[' -> '%5B' and ']' -> '%5D' """
self.session = session
logger.info(f"Requesting dataset information from {url}")
self.dataset = open_url(url, session=session)
Expand All @@ -827,31 +858,31 @@ def set_time_step(self, dataset_type):

def subset_time(self):
""" Get the full day """
return "[0:{}]".format(self.n_timesteps - 1)
return "%5B0:{}%5D".format(self.n_timesteps - 1)

def subset_lat(self, lat_min, lat_max):
indices = np.where((self.lat_values >= lat_min) & (self.lat_values <= lat_max))
return "[{}:{}]".format(np.min(indices), np.max(indices))
return "%5B{}:{}%5D".format(np.min(indices), np.max(indices))

def subset_lon(self, lon_min, lon_max):
indices = np.where((self.lon_values >= lon_min) & (self.lon_values <= lon_max))
return "[{}:{}]".format(np.min(indices), np.max(indices))
return "%5B{}:{}%5D".format(np.min(indices), np.max(indices))

def subset_lev(self, elev_min, elev_max):
Pmax = pressure_from_elevation(elev_min) + 55
Pmin = pressure_from_elevation(elev_max) - 55

indices = np.where((self.LEVS >= Pmin) & (self.LEVS <= Pmax))
return "[{}:{}]".format(np.min(indices), np.max(indices))
return "%5B{}:{}%5D".format(np.min(indices), np.max(indices))

def subset_2d_variable(self, variable_name, lat_min, lat_max, lon_min, lon_max):
var_string = "".join([variable_name, self.subset_time(),
var_string = "".join(['/', variable_name, self.subset_time(),
self.subset_lat(lat_min, lat_max), self.subset_lon(lon_min, lon_max)])

return var_string

def subset_3d_variable(self, variable_name, elev_min, elev_max, lat_min, lat_max, lon_min, lon_max):
var_string = "".join([variable_name, self.subset_time(), self.subset_lev(elev_min, elev_max),
var_string = "".join(['/', variable_name, self.subset_time(), self.subset_lev(elev_min, elev_max),
self.subset_lat(lat_min, lat_max), self.subset_lon(lon_min, lon_max)])

return var_string
Expand All @@ -877,6 +908,8 @@ def subset_variable(self, variable):
Returns
-------
str : variable name with slices, e.g. "H[0:3][4:59][23:44]"
but new format is H%5B0:3%5D%5B4:59%5D%5B23:44%5D
replace '[' -> '%5B' and ']' -> '%5D'
"""
is_3d = ('lev' in self.dataset[variable].dimensions)

Expand Down Expand Up @@ -910,7 +943,7 @@ def create_request_url(self, dataset_url, type='nc4'):
if 'lev' in self.dataset:
uri_parameters.append('lev' + self.subset_lev(self.elev_min, self.elev_max))

dods_url = dataset_url + f".{type}?" + ",".join(uri_parameters)
dods_url = dataset_url + f".dap.{type}?dap4.ce=" + ";".join(uri_parameters)

return dods_url

Expand All @@ -929,6 +962,27 @@ def get_download_link(self, url, url_file):

return None

def do_download_link(self, url, url_file):
""" download a link to the dataset that can be downloaded with wget

Parameters
----------
url : [type]
base URL for a dataset that can be opened with pydap.open_url

"""
ncurl = self.create_request_url(url)
base_ncurl = ''.join([ncurl.split('%3A')[-1].split('.dap.nc4?')[0], '.nc4'])
response = requests.get(ncurl)
# Download the variable if the response is OK
if response.ok:
with open(base_ncurl, 'wb') as file_handler:
file_handler.write(response.content)
else:
print(f'Request failed: {response.text}')

return None

def subset_dataset(self, url, metadata=False):
""" Return a subset dataset as

Expand Down
Loading