Fixed positional encoding

Kyubyong · web-flow · commit a6f2fc36ded6 · 2017-10-26T11:29:21.000+09:00
diff --git a/modules.py b/modules.py
@@ -118,49 +118,49 @@ def embedding(inputs,
     
 
 def positional_encoding(inputs,
-            vocab_size,
-            num_units,
-            zero_pad = True,
-            scale = True,
-            scope = "positional_embedding",
-            reuse = None):
-    '''
-    Positional_Encoding for a given tensor.
+                        num_units,
+                        zero_pad=True,
+                        scale=True,
+                        scope="positional_encoding",
+                        reuse=None):
+    '''Sinusoidal Positional_Encoding.
 
     Args:
-      inputs: [Tensor], A tensor contains the ids to be search from the lookup table, shape = [batch_size, 1 + len(inpt)]
-      vocab_size: [Int], Vocabulary size
-      num_units: [Int], Hidden size of embedding
-      zero_pad: [Boolean], If True, all the values of the first row(id = 0) should be constant zero
-      scale: [Boolean], If True, the output will be multiplied by sqrt num_units(check details from paper)
-      scope: [String], Optional scope for 'variable_scope'
-      reuse: [Boolean], If to reuse the weights of a previous layer by the same name
+      inputs: A 2d Tensor with shape of (N, T).
+      num_units: Output dimensionality
+      zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero
+      scale: Boolean. If True, the output will be multiplied by sqrt num_units(check details from paper)
+      scope: Optional scope for `variable_scope`.
+      reuse: Boolean, whether to reuse the weights of a previous layer
+        by the same name.
 
-      Returns:
+    Returns:
         A 'Tensor' with one more rank than inputs's, with the dimensionality should be 'num_units'
     '''
 
-    with tf.variable_scope(scope, reuse = reuse):
+    N, T = inputs.get_shape().as_list()
+    with tf.variable_scope(scope, reuse=reuse):
+        position_ind = tf.tile(tf.expand_dims(tf.range(T), 0), [N, 1])
 
-        input_one = tf.tile(tf.expand_dims(tf.range(tf.shape(inputs)[1]), 0), [tf.shape(inputs)[0], 1])
         # First part of the PE function: sin and cos argument
         position_enc = np.array([
-            [pos / np.power(10000, 2*i/num_units) for i in range(num_units)]
-            for pos in range(max_len)])
+            [pos / np.power(10000, 2.*i/num_units) for i in range(num_units)]
+            for pos in range(T)])
+
         # Second part, apply the cosine to even columns and sin to odds.
-        position_enc[:, 0::2] = np.sin(position_enc[1:, 0::2]) # dim 2i
-        position_enc[:, 1::2] = np.cos(position_enc[1:, 1::2]) # dim 2i+1
+        position_enc[:, 0::2] = np.sin(position_enc[:, 0::2])  # dim 2i
+        position_enc[:, 1::2] = np.cos(position_enc[:, 1::2])  # dim 2i+1
+
         # Convert to a tensor
         lookup_table = tf.convert_to_tensor(position_enc)
 
         if zero_pad:
+            lookup_table = tf.concat((tf.zeros(shape=[1, num_units]),
+                                      lookup_table[1:, :]), 0)
+        outputs = tf.nn.embedding_lookup(lookup_table, position_ind)
 
-            lookup_table = tf.concat((tf.zeros(shape = [1, num_units]),
-                                    lookup_table[1:, :]), 0)
-        outputs = tf.nn.embedding_lookup(lookup_table, input_one)
-    
         if scale:
-            outputs = outputs * math.sqrt(num_units)
+            outputs = outputs * num_units**0.5
 
         return outputs