really fix conditions codertimo#13

artemisart · artemisart · commit ed68f5a41325 · 2018-10-23T00:57:31.000+02:00
diff --git a/bert_pytorch/dataset/dataset.py b/bert_pytorch/dataset/dataset.py
@@ -67,12 +67,14 @@ def random_word(self, sentence):
         for i, token in enumerate(tokens):
             prob = random.random()
             if prob < 0.15:
-                # 80% randomly change token to make token
-                if prob < prob * 0.8:
+                prob /= 0.15
+
+                # 80% randomly change token to mask token
+                if prob < 0.8:
                     tokens[i] = self.vocab.mask_index
 
                 # 10% randomly change token to random token
-                elif 0.15 * 0.8 <= prob < 0.15 * 0.9:
+                elif prob < 0.9:
                     tokens[i] = random.randrange(len(self.vocab))
 
                 # 10% randomly change token to current token