We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7b145dc commit ed68f5aCopy full SHA for ed68f5a
bert_pytorch/dataset/dataset.py
@@ -67,12 +67,14 @@ def random_word(self, sentence):
67
for i, token in enumerate(tokens):
68
prob = random.random()
69
if prob < 0.15:
70
- # 80% randomly change token to make token
71
- if prob < prob * 0.8:
+ prob /= 0.15
+
72
+ # 80% randomly change token to mask token
73
+ if prob < 0.8:
74
tokens[i] = self.vocab.mask_index
75
76
# 10% randomly change token to random token
- elif 0.15 * 0.8 <= prob < 0.15 * 0.9:
77
+ elif prob < 0.9:
78
tokens[i] = random.randrange(len(self.vocab))
79
80
# 10% randomly change token to current token
0 commit comments