Convert Embedding to new style. (pytorch#1916)

ezyang · soumith · commit f3f478960e35 · 2017-07-20T02:35:21.000-04:00
Signed-off-by: Edward Z. Yang &lt;ezyang@fb.com&gt;
diff --git a/torch/nn/_functions/thnn/sparse.py b/torch/nn/_functions/thnn/sparse.py
@@ -1,51 +1,51 @@
 import torch
 from torch.autograd.function import Function
 from torch._thnn import type2backend
+from torch.autograd.function import once_differentiable
 
 from . import _all_functions
 
 
 class Embedding(Function):
 
-    def __init__(self, padding_idx, max_norm, norm_type, scale_grad_by_freq,
-                 sparse=False):
-        super(Embedding, self).__init__()
-        self.padding_idx = padding_idx
-        self.max_norm = max_norm
-        self.norm_type = norm_type
-        self.scale_grad_by_freq = scale_grad_by_freq
-        self._indices = None
-        self.sparse = sparse
-
-    def _renorm(self, indices, weight):
+    @staticmethod
+    def _renorm(ctx, indices, weight, max_norm, norm_type):
         if indices.dim() == 2:
             indices = indices.clone().view(-1)
 
-        self._backend.LookupTable_renorm(
-            self._backend.library_state,
+        ctx._backend.LookupTable_renorm(
+            ctx._backend.library_state,
             indices,
             weight,
-            self.max_norm,
-            self.norm_type
+            max_norm,
+            norm_type
         )
 
-    def forward(self, indices, weight):
+    @classmethod
+    def forward(cls, ctx, indices, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq,
+                sparse=False):
+
+        ctx.padding_idx = padding_idx
+        ctx.scale_grad_by_freq = scale_grad_by_freq
+        ctx._indices = None
+        ctx.sparse = sparse
+
         assert indices.dim() <= 2
-        assert not self.needs_input_grad[0], "Embedding doesn't " \
+        assert not ctx.needs_input_grad[0], "Embedding doesn't " \
             "compute the gradient w.r.t. the indices"
 
-        self._backend = type2backend[type(weight)]
-        self._weight_size = weight.size()
+        ctx._backend = type2backend[type(weight)]
+        ctx._weight_size = weight.size()
 
         if not indices.is_contiguous():
-            self._indices = indices.contiguous()
-            indices = self._indices
+            ctx._indices = indices.contiguous()
+            indices = ctx._indices
         else:
-            self.save_for_backward(indices)
+            ctx.save_for_backward(indices)
 
         output = weight.new()
-        if self.max_norm is not None:
-            self._renorm(indices, weight)
+        if max_norm is not None:
+            cls._renorm(indices, weight, max_norm, norm_type)
 
         if indices.dim() == 1:
             output = torch.index_select(weight, 0, indices)
@@ -55,14 +55,16 @@ def forward(self, indices, weight):
 
         return output
 
-    def backward(self, grad_output):
-        if self._indices is not None:
-            indices = self._indices
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        if ctx._indices is not None:
+            indices = ctx._indices
         else:
-            indices, = self.saved_tensors
+            indices, = ctx.saved_tensors
 
         grad_output = grad_output.contiguous()
-        if not self.sparse:
+        if not ctx.sparse:
             if indices.dim() == 2:
                 indices = indices.view(-1)
 
@@ -75,17 +77,18 @@ def backward(self, grad_output):
                     _count = torch.IntTensor()
                     _sorted = _indices = None
 
-            grad_weight = grad_output.new(self._weight_size).zero_()
-            self._backend.LookupTable_accGradParameters(
-                self._backend.library_state,
+            grad_weight = grad_output.new(ctx._weight_size).zero_()
+            # Doesn't support Variable grad_output
+            ctx._backend.LookupTable_accGradParameters(
+                ctx._backend.library_state,
                 indices,
                 grad_output,
                 grad_weight,
                 _count,
                 _sorted,
                 _indices,
-                self.scale_grad_by_freq,
-                self.padding_idx,
+                ctx.scale_grad_by_freq,
+                ctx.padding_idx,
                 1
             )
         else:
@@ -96,10 +99,10 @@ def backward(self, grad_output):
                 SparseTensor = getattr(torch.sparse, tensor_type)
             grad_weight = SparseTensor(
                 indices.view(1, -1),
-                grad_output.view(-1, self._weight_size[1]),
-                self._weight_size,
+                grad_output.view(-1, ctx._weight_size[1]),
+                ctx._weight_size,
             )
-        return None, grad_weight
+        return None, grad_weight, None, None, None, None, None
 
 
 _all_functions.append(Embedding)
diff --git a/torch/nn/functional.py b/torch/nn/functional.py
@@ -626,10 +626,11 @@ def embedding(input, embedding_matrix,
         [torch.FloatTensor of size 1x4x3]
 
     """
-    return torch.nn.backends.thnn.backend.Embedding(
+    return torch.nn.backends.thnn.backend.Embedding.apply(
+        input, embedding_matrix,
         -1, max_norm, norm_type,
         scale_grad_by_freq, sparse
-    )(input, embedding_matrix)
+    )
 
 
 def batch_norm(input, running_mean, running_var, weight=None, bias=None,
diff --git a/torch/nn/modules/sparse.py b/torch/nn/modules/sparse.py
@@ -88,10 +88,11 @@ def forward(self, input):
         padding_idx = self.padding_idx
         if padding_idx is None:
             padding_idx = -1
-        return self._backend.Embedding(
+        return self._backend.Embedding.apply(
+            input, self.weight,
             padding_idx, self.max_norm, self.norm_type,
             self.scale_grad_by_freq, self.sparse
-        )(input, self.weight)
+        )
 
     def __repr__(self):
         s = '{name}({num_embeddings}, {embedding_dim}'