Skip to content

Commit a6f2fc3

Browse files
authored
Fixed positional encoding
1 parent 24e5e0c commit a6f2fc3

File tree

1 file changed

+27
-27
lines changed

1 file changed

+27
-27
lines changed

modules.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -118,49 +118,49 @@ def embedding(inputs,
118118

119119

120120
def positional_encoding(inputs,
121-
vocab_size,
122-
num_units,
123-
zero_pad = True,
124-
scale = True,
125-
scope = "positional_embedding",
126-
reuse = None):
127-
'''
128-
Positional_Encoding for a given tensor.
121+
num_units,
122+
zero_pad=True,
123+
scale=True,
124+
scope="positional_encoding",
125+
reuse=None):
126+
'''Sinusoidal Positional_Encoding.
129127
130128
Args:
131-
inputs: [Tensor], A tensor contains the ids to be search from the lookup table, shape = [batch_size, 1 + len(inpt)]
132-
vocab_size: [Int], Vocabulary size
133-
num_units: [Int], Hidden size of embedding
134-
zero_pad: [Boolean], If True, all the values of the first row(id = 0) should be constant zero
135-
scale: [Boolean], If True, the output will be multiplied by sqrt num_units(check details from paper)
136-
scope: [String], Optional scope for 'variable_scope'
137-
reuse: [Boolean], If to reuse the weights of a previous layer by the same name
129+
inputs: A 2d Tensor with shape of (N, T).
130+
num_units: Output dimensionality
131+
zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero
132+
scale: Boolean. If True, the output will be multiplied by sqrt num_units(check details from paper)
133+
scope: Optional scope for `variable_scope`.
134+
reuse: Boolean, whether to reuse the weights of a previous layer
135+
by the same name.
138136
139-
Returns:
137+
Returns:
140138
A 'Tensor' with one more rank than inputs's, with the dimensionality should be 'num_units'
141139
'''
142140

143-
with tf.variable_scope(scope, reuse = reuse):
141+
N, T = inputs.get_shape().as_list()
142+
with tf.variable_scope(scope, reuse=reuse):
143+
position_ind = tf.tile(tf.expand_dims(tf.range(T), 0), [N, 1])
144144

145-
input_one = tf.tile(tf.expand_dims(tf.range(tf.shape(inputs)[1]), 0), [tf.shape(inputs)[0], 1])
146145
# First part of the PE function: sin and cos argument
147146
position_enc = np.array([
148-
[pos / np.power(10000, 2*i/num_units) for i in range(num_units)]
149-
for pos in range(max_len)])
147+
[pos / np.power(10000, 2.*i/num_units) for i in range(num_units)]
148+
for pos in range(T)])
149+
150150
# Second part, apply the cosine to even columns and sin to odds.
151-
position_enc[:, 0::2] = np.sin(position_enc[1:, 0::2]) # dim 2i
152-
position_enc[:, 1::2] = np.cos(position_enc[1:, 1::2]) # dim 2i+1
151+
position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i
152+
position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1
153+
153154
# Convert to a tensor
154155
lookup_table = tf.convert_to_tensor(position_enc)
155156

156157
if zero_pad:
158+
lookup_table = tf.concat((tf.zeros(shape=[1, num_units]),
159+
lookup_table[1:, :]), 0)
160+
outputs = tf.nn.embedding_lookup(lookup_table, position_ind)
157161

158-
lookup_table = tf.concat((tf.zeros(shape = [1, num_units]),
159-
lookup_table[1:, :]), 0)
160-
outputs = tf.nn.embedding_lookup(lookup_table, input_one)
161-
162162
if scale:
163-
outputs = outputs * math.sqrt(num_units)
163+
outputs = outputs * num_units**0.5
164164

165165
return outputs
166166

0 commit comments

Comments
 (0)