Skip to content

Commit 916cac5

Browse files
Update data_lib.py
Add imdb_reviews dataset.
1 parent 2404c85 commit 916cac5

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

hero/data_lib.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,25 @@ def add_c4_task():
115115
add_c4_task()
116116

117117

118+
def add_imdb_reviews_task():
119+
"""Adds imdb_reviews tasks."""
120+
source = seqio.TfdsDataSource(
121+
tfds_name='imdb_reviews:1.0.0',
122+
splits={
123+
'train': 'train[:90%]',
124+
'validation': 'train[90%:]',
125+
'test': 'test'})
126+
vocabs = []
127+
vocabs += [('vb32000_t5_cc', T5_CC_VOCAB)]
128+
name = 'imdb_reviews'
129+
for vocab_name, vocab in vocabs:
130+
task_name = f'{name}.{vocab_name}'
131+
add_pt_task_v1(task_name, source, vocab,
132+
use_reduce_concat_split=False)
133+
134+
add_imdb_reviews_task()
135+
136+
118137
# ###############################################################################
119138
# # Dataset utilities.
120139

0 commit comments

Comments
 (0)