Refactored sampled softmax

eschnou · eschnou · commit fc5b51ab2fe8 · 2017-03-18T08:28:25.000+01:00
diff --git a/chatbot/model.py b/chatbot/model.py
@@ -41,18 +41,19 @@ def __init__(self, shape, scope=None, dtype=None):
 
         # Projection on the keyboard
         with tf.variable_scope('weights_' + self.scope):
-            self.W = tf.get_variable(
+            self.W_t = tf.get_variable(
                 'weights',
                 shape,
                 # initializer=tf.truncated_normal_initializer()  # TODO: Tune value (fct of input size: 1/sqrt(input_dim))
                 dtype=dtype
             )
             self.b = tf.get_variable(
                 'bias',
-                shape[1],
+                shape[0],
                 initializer=tf.constant_initializer(),
                 dtype=dtype
             )
+            self.W = tf.transpose(self.W_t)
 
     def getWeights(self):
         """ Convenience method for some tf arguments
@@ -114,7 +115,7 @@ def buildNetwork(self):
         # Sampled softmax only makes sense if we sample less than vocabulary size.
         if 0 < self.args.softmaxSamples < self.textData.getVocabularySize():
             outputProjection = ProjectionOp(
-                (self.args.hiddenSize, self.textData.getVocabularySize()),
+                (self.textData.getVocabularySize(), self.args.hiddenSize),
                 scope='softmax_projection',
                 dtype=self.dtype
             )
@@ -124,7 +125,7 @@ def sampledSoftmax(labels, inputs):
 
                 # We need to compute the sampled_softmax_loss using 32bit floats to
                 # avoid numerical instabilities.
-                localWt     = tf.cast(tf.transpose(outputProjection.W), tf.float32)
+                localWt     = tf.cast(outputProjection.W_t,             tf.float32)
                 localB      = tf.cast(outputProjection.b,               tf.float32)
                 localInputs = tf.cast(inputs,                           tf.float32)