Polished attention

vanpelt · vanpelt · commit 2cc7fb49e5f0 · 2019-05-13T00:34:21.000-07:00
diff --git a/lstm/attention/train.py b/lstm/attention/train.py
@@ -9,7 +9,6 @@
 from keras.layers import CuDNNLSTM
 from keras.layers.wrappers import TimeDistributed, Bidirectional
 from attention_decoder import AttentionDecoder
-from nmt import simpleNMT
 from reader import Data, Vocabulary
 import numpy as np
 from keras import backend as K
@@ -31,13 +30,24 @@
 
 
 def run_example(model, input_vocabulary, output_vocabulary, text):
+    """Predict a single example"""
     encoded = input_vocabulary.string_to_int(text)
     prediction = model.predict(np.array([encoded]))
     prediction = np.argmax(prediction[0], axis=-1)
-    return "".join([s for s in output_vocabulary.int_to_string(prediction) if s != "<unk>"])
+    return output_vocabulary.int_to_string(prediction)
+
+
+def decode(chars, sanitize=False):
+    """Join a list of chars removing <unk> and invalid utf-8"""
+    string = "".join([c for c in chars if c != "<unk>"])
+    if sanitize:
+        string = "".join(i for i in string if ord(i) < 2048)
+    return bytes(string, 'utf-8').decode('utf-8', 'ignore')
 
 
 class Examples(Callback):
+    """Keras callback to log examples"""
+
     def __init__(self, viz):
         self.visualizer = viz
 
@@ -53,22 +63,20 @@ def on_epoch_end(self, epoch, logs):
         self.visualizer.proba_model.get_layer(
             "attention_decoder_prob").set_weights(weights)
         for i, o in zip(data_in, data_out):
-            text = "".join(
-                [s for s in input_vocab.int_to_string(i) if s != "<unk>"])
-            truth = "".join([s for s in output_vocab.int_to_string(
-                np.argmax(o, -1)) if s != "<unk>"])
-            out = run_example(self.model, input_vocab, output_vocab, text)
-            print(f"{text} -> {out} ({truth})")
-            examples.append([bytes(text, 'utf-8').decode('utf-8', 'ignore'), bytes(
-                out, 'utf-8').decode('utf-8', 'ignore'), bytes(truth, 'utf-8').decode('utf-8', 'ignore')])
+            text = decode(input_vocab.int_to_string(i)).replace('<eot>', '')
+            truth = decode(output_vocab.int_to_string(np.argmax(o, -1)), True)
+            pred = run_example(self.model, input_vocab, output_vocab, text)
+            out = decode(pred, True)
+            print(f"{decode(text, True)} -> {out} ({truth})")
+            examples.append([decode(text, True), out, truth])
             amap = self.visualizer.attention_map(text)
             if amap:
-                viz.append(wandb.Image(amap, caption=text))
+                viz.append(wandb.Image(amap,))
                 amap.close()
         if len(viz) > 0:
             logs["attention_map"] = viz[:5]
-        wandb.log(
-            {"examples": wandb.Table(data=examples), **logs})
+        logs["examples"] = wandb.Table(data=examples)
+        wandb.log(logs)
 
 
 def all_acc(y_true, y_pred):
@@ -94,7 +102,7 @@ def all_acc(y_true, y_pred):
 input_vocab = Vocabulary('./human_vocab.json', padding=config.padding)
 output_vocab = Vocabulary('./machine_vocab.json', padding=config.padding)
 
-print('Loading datasets.')
+print('Loading datasets...')
 
 training = Data(training_data, input_vocab, output_vocab)
 validation = Data(validation_data, input_vocab, output_vocab)
@@ -125,7 +133,7 @@ def build_models(pad_length=config.padding, n_chars=input_vocab.size(), n_labels
                               name='attention_decoder_prob',
                               output_dim=n_labels,
                               return_probabilities=True,
-                              trainable=trainable)(rnn_encoded)
+                              trainable=False)(rnn_encoded)
 
     y_pred = AttentionDecoder(decoder_units,
                               name='attention_decoder_1',
@@ -137,7 +145,7 @@ def build_models(pad_length=config.padding, n_chars=input_vocab.size(), n_labels
     model.summary()
     model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
-                  metrics=['accuracy', all_acc])
+                  metrics=['accuracy'])
     prob_model = Model(inputs=input_, outputs=y_prob)
     return model, prob_model
 
diff --git a/lstm/attention/util.py b/lstm/attention/util.py
@@ -1,18 +1,20 @@
+from reader import Vocabulary
+import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import argparse
 import matplotlib  # pylint: disable
 matplotlib.use("Agg")  # pylint: disable
-import argparse
-import os
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.patches as mpatches
-from reader import Vocabulary
+
 
 def run_example(model, input_vocabulary, output_vocabulary, text):
     encoded = input_vocabulary.string_to_int(text)
     prediction = model.predict(np.array([encoded]))
     prediction = np.argmax(prediction[0], axis=-1)
     return output_vocabulary.int_to_string(prediction)
 
+
 class Visualizer(object):
 
     def __init__(self, input_vocab, output_vocab):
@@ -82,38 +84,3 @@ def attention_map(self, text):
         # ax.legend(loc='best')
 
         return plt
-
-
-def main(examples, args):
-    print('Total Number of Examples:', len(examples))
-    weights_file = os.path.expanduser(args.weights)
-    print('Weights loading from:', weights_file)
-    viz = Visualizer(padding=args.padding,
-                     input_vocab=args.human_vocab,
-                     output_vocab=args.machine_vocab)
-    print('Loading models')
-    pred_model = simpleNMT(trainable=False,
-                           pad_length=args.padding,
-                           n_chars=viz.input_vocab.size(),
-                           n_labels=viz.output_vocab.size())
-
-    pred_model.load_weights(weights_file, by_name=True)
-    pred_model.compile(optimizer='adam', loss='categorical_crossentropy')
-
-    proba_model = simpleNMT(trainable=False,
-                            pad_length=args.padding,
-                            n_chars=viz.input_vocab.size(),
-                            n_labels=viz.output_vocab.size(),
-                            return_probabilities=True)
-
-    proba_model.load_weights(weights_file, by_name=True)
-    proba_model.compile(optimizer='adam', loss='categorical_crossentropy')
-
-    viz.set_models(pred_model, proba_model)
-
-    print('Models loaded')
-
-    for example in examples:
-        viz.attention_map(example)
-
-    print('Completed visualizations')