Skip to content

Commit 7b60eab

Browse files
committed
moved clirmatrix_downloads.json to itself be a downloaded file
1 parent e93c417 commit 7b60eab

File tree

7 files changed

+17
-8
lines changed

7 files changed

+17
-8
lines changed

MANIFEST.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
include ir_datasets/docs/*.yaml
22
include ir_datasets/etc/*.yaml
3-
include ir_datasets/etc/*.json
43
include requirements.txt
54
include LICENSE

ir_datasets/datasets/clirmatrix.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from pathlib import Path
33
from typing import NamedTuple
44
import ir_datasets
5-
from ir_datasets.util import GzipExtract, DownloadConfig_CM
5+
from ir_datasets.util import GzipExtract, DownloadConfig, _DownloadConfig
66
from ir_datasets.datasets.base import Dataset, YamlDocumentation
77
from ir_datasets.formats import TsvDocs, CLIRMatrixQueries, CLIRMatrixQrels
88

@@ -29,7 +29,9 @@ def _init():
2929
base_path = ir_datasets.util.home_path()/NAME
3030

3131
def _dlc_init():
32-
return DownloadConfig_CM
32+
dlc = DownloadConfig.context(NAME, base_path)
33+
clirmatrix_dlc = _DownloadConfig(dlc['downloads'].path(), parser='json')
34+
return clirmatrix_dlc
3335

3436
_dlc = ir_datasets.util.Lazy(_dlc_init)
3537

ir_datasets/etc/clirmatrix_downloads.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

ir_datasets/etc/downloads.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,13 @@ clinicaltrials:
105105
cache_path: 'trec-pm-2019/queries.xml'
106106

107107

108+
clirmatrix:
109+
downloads:
110+
url: 'https://macavaney.us/clirmatrix_downloads.json' # TODO: move this to JHU server?
111+
expected_md5: '9e70cd85ec45caa8c16061c42d1ce9b8'
112+
cache_path: 'clirmatrix_downloads.json'
113+
114+
108115
clueweb09:
109116
docs:
110117
instructions: 'ClueWeb09 is available by hard drives from CMU here: <https://lemurproject.org/clueweb09/>

ir_datasets/util/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pathlib import Path
88
from .. import log
99
from .fileio import IterStream, Cache, TarExtract, TarExtractAll, RelativePath, GzipExtract, ZipExtract, ZipExtractCache, StringFile, ReTar, Bz2Extract
10-
from .download import Download, DownloadConfig, BaseDownload, RequestsDownload, LocalDownload, DownloadConfig_CM
10+
from .download import Download, DownloadConfig, BaseDownload, RequestsDownload, LocalDownload, _DownloadConfig
1111
from .hash import HashVerificationError, HashVerifier, HashStream
1212
from .registry import Registry
1313

ir_datasets/util/download.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,10 @@ def __init__(self, file=None, base_path=None, contents=None, dua=None, parser="y
232232

233233
def contents(self):
234234
if self._contents is None:
235-
data = pkgutil.get_data('ir_datasets', self._file)
235+
if self._file.startswith('/'):
236+
data = open(self._file).read()
237+
else:
238+
data = pkgutil.get_data('ir_datasets', self._file)
236239
if self._parser == "json":
237240
json = ir_datasets.lazy_libs.json()
238241
self._contents = json.loads(data)
@@ -284,4 +287,3 @@ def __getitem__(self, key):
284287

285288

286289
DownloadConfig = _DownloadConfig(file='etc/downloads.yaml')
287-
DownloadConfig_CM = _DownloadConfig(file='etc/clirmatrix_downloads.json', parser="json")

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
'console_scripts': ['ir_datasets=ir_datasets:main_cli'],
2323
},
2424
package_data={
25-
'ir_datasets': glob('docs/*.yaml') + glob('etc/*.yaml') + glob('etc/*.json'),
25+
'ir_datasets': glob('docs/*.yaml') + glob('etc/*.yaml'),
2626
'': ['requirements.txt', 'LICENSE'],
2727
},
2828
)

0 commit comments

Comments
 (0)