add functional embedding (pytorch#1987)

hughperkins · soumith · commit e53702314783 · 2017-07-20T01:53:37.000-04:00
diff --git a/test/test_nn.py b/test/test_nn.py
@@ -803,6 +803,20 @@ def test_embedding_padding_idx(self):
         self.assertEqual(output[0][0].sum().data[0], 0)
         self.assertEqual(output[1][2].sum().data[0], 0)
 
+    def test_embedding_functional(self):
+        a = Variable(torch.LongTensor([
+            [1, 3, 2],
+            [0, 2, 1]
+        ]))
+        embeddings = Variable(torch.rand(4, 3), requires_grad=True)
+
+        embed_old = torch.nn.Embedding(4, 3)
+        embed_old.weight.data = embeddings.data
+        res_old = embed_old(a)
+
+        res_F = F.embedding(a, embeddings)
+        self.assertEqual(res_old, res_F)
+
     def _test_EmbeddingBag(self, cuda, mode):
         # check a known test example
         es = nn.EmbeddingBag(5, 2, mode=mode)
diff --git a/torch/nn/functional.py b/torch/nn/functional.py
@@ -565,6 +565,73 @@ def bilinear(input1, input2, weight, bias=None):
         return Bilinear.apply(input1, input2, weight, bias)
 
 
+def embedding(input, embedding_matrix,
+              max_norm=None, norm_type=2, scale_grad_by_freq=False,
+              sparse=False):
+    r"""A simple lookup table that looks up embeddings in a fixed dictionary and size.
+
+    This module is often used to retrieve word embeddings using indices.
+    The input to the module is a list of indices, and the embedding matrix,
+    and the output is the corresponding word embeddings.
+
+    Args:
+        input: tensor, containing indices into the embedding matrix
+        embedding_matrix:
+                Number of rows should correspond to the maximum possible index + 1,
+                number of columns is the embedding size
+        max_norm (float, optional): If given, will renormalize the embeddings to always have a norm lesser than this
+        norm_type (float, optional): The p of the p-norm to compute for the max_norm option
+        scale_grad_by_freq (boolean, optional): if given, this will scale gradients by the frequency of
+                                                the words in the mini-batch.
+
+    Shape:
+        - Input: LongTensor `(N, W)`, N = mini-batch, W = number of indices to extract per mini-batch
+        - Embedding_matrix: FloatTensor `(V, embedding_dim)`, V = maximum index + 1, embedding_dim = embedding size
+        - Output: `(N, W, embedding_dim)`
+
+    Examples::
+
+        >>> # a batch of 2 samples of 4 indices each
+        >>> input = Variable(torch.LongTensor([[1,2,4,5],[4,3,2,9]]))
+        >>> # an embedding matrix containing 10 tensors of size 3
+        >>> embedding_matrix = Variable(torch.rand(10, 3))
+        >>> torch.nn.functional.embedding(input, embedding_matrix)
+
+        Variable containing:
+        (0 ,.,.) =
+         -1.0822  1.2522  0.2434
+          0.8393 -0.6062 -0.3348
+          0.6597  0.0350  0.0837
+          0.5521  0.9447  0.0498
+
+        (1 ,.,.) =
+          0.6597  0.0350  0.0837
+         -0.1527  0.0877  0.4260
+          0.8393 -0.6062 -0.3348
+         -0.8738 -0.9054  0.4281
+        [torch.FloatTensor of size 2x4x3]
+
+        >>> # example with padding_idx
+        >>> embedding_matrix = Variable(torch.rand(10, 3))
+        >>> embedding_matrix[0].zero_()
+        >>> input = Variable(torch.LongTensor([[0,2,0,5]]))
+        >>> torch.nn.functional.embedding(input, embedding_matrix)
+
+        Variable containing:
+        (0 ,.,.) =
+          0.0000  0.0000  0.0000
+          0.3452  0.4937 -0.9361
+          0.0000  0.0000  0.0000
+          0.0706 -2.1962 -0.6276
+        [torch.FloatTensor of size 1x4x3]
+
+    """
+    return torch.nn.backends.thnn.backend.Embedding(
+        -1, max_norm, norm_type,
+        scale_grad_by_freq, sparse
+    )(input, embedding_matrix)
+
+
 def batch_norm(input, running_mean, running_var, weight=None, bias=None,
                training=False, momentum=0.1, eps=1e-5):
     f = torch._C._functions.BatchNorm(running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled)