Skip to content

Commit ed68f5a

Browse files
committed
really fix conditions codertimo#13
1 parent 7b145dc commit ed68f5a

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

bert_pytorch/dataset/dataset.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,14 @@ def random_word(self, sentence):
6767
for i, token in enumerate(tokens):
6868
prob = random.random()
6969
if prob < 0.15:
70-
# 80% randomly change token to make token
71-
if prob < prob * 0.8:
70+
prob /= 0.15
71+
72+
# 80% randomly change token to mask token
73+
if prob < 0.8:
7274
tokens[i] = self.vocab.mask_index
7375

7476
# 10% randomly change token to random token
75-
elif 0.15 * 0.8 <= prob < 0.15 * 0.9:
77+
elif prob < 0.9:
7678
tokens[i] = random.randrange(len(self.vocab))
7779

7880
# 10% randomly change token to current token

0 commit comments

Comments
 (0)