We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2c389b4 commit f85700fCopy full SHA for f85700f
bert_pytorch/dataset/vocab.py
@@ -188,12 +188,13 @@ def build():
188
vocab = WordVocab(f, max_size=args.vocab_size, min_freq=args.min_freq)
189
elif os.path.isdir(args.corpus_path):
190
logger.info(f"is dir")
191
+ print("get corpus")
192
texts = []
193
for index, corpus in tqdm(enumerate(os.listdir(args.corpus_path))):
194
+ print("getting {}".format(corpus))
195
with open(os.path.join(args.corpus_path,corpus), "r", encoding=args.encoding) as f:
196
texts += f.readlines()
197
# print(type(f))
- break
198
vocab = WordVocab(texts, max_size=args.vocab_size, min_freq=args.min_freq)
199
pass
200
else:
0 commit comments