Allow support for negative dimension argument for all functions

albanD · soumith · commit f0c712442075 · 2017-04-06T16:37:00.000-07:00
diff --git a/setup.py b/setup.py
@@ -154,10 +154,11 @@ def run(self):
         from tools.cwrap.plugins.KwargsPlugin import KwargsPlugin
         from tools.cwrap.plugins.NullableArguments import NullableArguments
         from tools.cwrap.plugins.CuDNNPlugin import CuDNNPlugin
+        from tools.cwrap.plugins.WrapDim import WrapDim
         thp_plugin = THPPlugin()
         cwrap('torch/csrc/generic/TensorMethods.cwrap', plugins=[
             BoolOption(), thp_plugin, AutoGPU(condition='IS_CUDA'),
-            ArgcountSortPlugin(), KwargsPlugin()
+            ArgcountSortPlugin(), KwargsPlugin(), WrapDim()
         ])
         cwrap('torch/csrc/cudnn/cuDNN.cwrap', plugins=[
             CuDNNPlugin(), NullableArguments()
diff --git a/test/test_autograd.py b/test/test_autograd.py
diff --git a/test/test_cuda.py b/test/test_cuda.py
@@ -155,12 +155,15 @@ def tmp(t):
     ('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
     ('chunk', medium_2d, lambda t: [4],),
     ('chunk', medium_2d, lambda t: [4, 1], 'dim'),
+    ('chunk', medium_2d, lambda t: [4, -2], 'neg_dim'),
     ('clamp', medium_2d_scaled, lambda t: [-1, 5],),
     ('clone', medium_2d, lambda t: [],),
     ('contiguous', medium_2d, lambda t: [],),
     ('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)],),
     ('cumprod', small_3d, lambda t: [1],),
+    ('cumprod', small_3d, lambda t: [-1], 'neg_dim'),
     ('cumsum', small_3d, lambda t: [1],),
+    ('cumsum', small_3d, lambda t: [-1], 'neg_dim'),
     ('dim', small_3d, lambda t: [],),
     ('dist', small_2d, lambda t: [small_2d(t)],),
     ('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm'),
@@ -188,52 +191,72 @@ def tmp(t):
     # TODO: positive case
     ('kthvalue', small_3d_unique, lambda t: [3],),
     ('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim'),
+    ('kthvalue', small_3d_unique, lambda t: [3, -1], 'neg_dim'),
     ('lerp', small_3d, lambda t: [small_3d(t), 0.3],),
     ('max', small_3d_unique, lambda t: [],),
     ('max', small_3d_unique, lambda t: [1], 'dim'),
+    ('max', small_3d_unique, lambda t: [-1], 'neg_dim'),
     ('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
     ('min', small_3d_unique, lambda t: [],),
     ('min', small_3d_unique, lambda t: [1], 'dim'),
+    ('min', small_3d_unique, lambda t: [-1], 'neg_dim'),
     ('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
     ('mean', small_3d, lambda t: [],),
+    ('mean', small_3d, lambda t: [-1], 'neg_dim'),
     ('mean', small_3d, lambda t: [1], 'dim'),
     ('mode', small_3d, lambda t: [],),
     ('mode', small_3d, lambda t: [1], 'dim'),
+    ('mode', small_3d, lambda t: [-1], 'neg_dim'),
     ('remainder', small_3d, lambda t: [3], 'value'),
     ('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
     ('std', small_3d, lambda t: [],),
     ('std', small_3d, lambda t: [1], 'dim'),
+    ('std', small_3d, lambda t: [-1], 'neg_dim'),
     ('var', small_3d, lambda t: [],),
     ('var', small_3d, lambda t: [1], 'dim'),
+    ('var', small_3d, lambda t: [-1], 'neg_dim'),
     ('ndimension', small_3d, lambda t: [],),
     ('nelement', small_3d, lambda t: [],),
     ('numel', small_3d, lambda t: [],),
     ('narrow', small_3d, lambda t: [1, 3, 2],),
+    ('narrow', small_3d, lambda t: [-1, 3, 2], 'neg_dim'),
     ('nonzero', small_3d, lambda t: [],),
     ('norm', small_3d, lambda t: [],),
     ('norm', small_3d, lambda t: [3], '3_norm'),
     ('norm', small_3d, lambda t: [3, 0], '3_norm_dim'),
+    ('norm', small_3d, lambda t: [3, -2], '3_norm_neg_dim'),
     ('ones', small_3d, lambda t: [1, 2, 3, 4, 5],),
     ('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],),
     ('prod', small_2d_oneish, lambda t: [],),
     ('prod', small_3d, lambda t: [1], 'dim'),
+    ('prod', small_3d, lambda t: [-1], 'neg_dim'),
     ('sum', small_2d, lambda t: [],),
     ('sum', small_3d, lambda t: [1], 'dim'),
+    ('sum', small_3d, lambda t: [-1], 'neg_dim'),
     ('renorm', small_3d, lambda t: [2, 1, 1], '2_norm'),
+    ('renorm', small_3d, lambda t: [2, -1, 1], '2_norm_neg_dim'),
     ('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm'),
     ('repeat', small_2d, lambda t: [2, 2, 2],),
     ('size', new_t(1, 2, 3, 4), lambda t: [],),
+    ('size', new_t(1, 2, 3, 4), lambda t: [1], 'dim'),
+    ('size', new_t(1, 2, 3, 4), lambda t: [-2], 'neg_dim'),
     ('sort', small_3d_unique, lambda t: [],),
     ('sort', small_3d_unique, lambda t: [1], 'dim'),
+    ('sort', small_3d_unique, lambda t: [-1], 'neg_dim'),
     ('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'),
+    ('sort', small_3d_unique, lambda t: [-1, True], 'neg_dim_descending'),
     ('split', small_3d, lambda t: [2],),
     ('split', small_3d, lambda t: [2, 1], 'dim'),
+    ('split', small_3d, lambda t: [2, -3], 'neg_dim'),
     ('squeeze', new_t(1, 2, 1, 4), lambda t: [],),
     ('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim'),
+    ('squeeze', new_t(1, 2, 1, 4), lambda t: [-2], 'neg_dim'),
     ('t', new_t(1, 2), lambda t: [],),
     ('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],),
+    ('transpose', new_t(1, 2, 3, 4), lambda t: [-1, -2], 'neg_dim'),
     ('to_list', small_3d, lambda t: [],),
     ('topk', small_3d, lambda t: [2, 1, False, True], 'dim_sort'),
+    ('topk', small_3d, lambda t: [2, -1, False, True], 'neg_dim_sort'),
     ('topk', small_3d, lambda t: [2, 1, True, True], 'dim_desc_sort'),
     ('trace', medium_2d, lambda t: [],),
     ('tril', medium_2d, lambda t: [],),
@@ -243,6 +266,7 @@ def tmp(t):
     ('triu', medium_2d, lambda t: [2], 'positive'),
     ('triu', medium_2d, lambda t: [-2], 'negative'),
     ('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
+    ('unsqueeze', new_t(2, 3, 4), lambda t: [-2], 'neg_dim'),
     ('view', small_3d, lambda t: [100, 10],),
     ('view_as', small_3d, lambda t: [t(100, 10)],),
     ('zero', small_3d, lambda t: [],),
@@ -467,6 +491,9 @@ def test_scatter_cpu(self):
     def test_scatter_cpu_dim(self):
         self._test_scatter(torch.randn(4, 4), dim=1)
 
+    def test_scatter_cpu_neg_dim(self):
+        self._test_scatter(torch.randn(4, 4), dim=-2)
+
     def test_scatter_cpu_sizes(self):
         self._test_scatter(torch.randn(6, 4), chunk_sizes=(2, 4))
 
@@ -476,6 +503,9 @@ def test_scatter_gpu(self):
     def test_scatter_gpu_dim(self):
         self._test_scatter(torch.randn(4, 4).cuda(), dim=1)
 
+    def test_scatter_gpu_neg_dim(self):
+        self._test_scatter(torch.randn(4, 4).cuda(), dim=-2)
+
     def test_scatter_gpu_sizes(self):
         self._test_scatter(torch.randn(6, 4).cuda(), chunk_sizes=(2, 4))
 
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -2,6 +2,7 @@
 import os
 import math
 import random
+import copy
 import torch
 import torch.cuda
 import tempfile
@@ -3132,23 +3133,107 @@ def test_Size(self):
         self.assertIsInstance(x[:-1], torch.Size)
         self.assertIsInstance(x + x, torch.Size)
 
-    def test_transpose_neg(self):
-        x = torch.randn(10, 20, 30)
-        ndim = 3
+# Functions to test negative dimension wrapping
+METHOD = 1
+INPLACE_METHOD = 2
+FUNCTIONAL = 4
+DIM_ARG = None
 
-        for i, j in combinations(range(ndim), 2):
-            a = x.transpose(i, j)
-            b = x.transpose(i - ndim, j - ndim)
-            self.assertEqual(a, b)
 
-            a = torch.transpose(x, i, j)
-            b = torch.transpose(x, i - ndim, j - ndim)
-            self.assertEqual(a, b)
-
-            a = x.clone()
-            x.transpose_(i, j)
-            x.transpose_(i - ndim, j - ndim)
-            self.assertEqual(a, x)
+def make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim=0):
+    def neg_dim_test(self):
+        if isinstance(tensor_arg, list):
+            assert METHOD not in types and INPLACE_METHOD not in types
+            x = [torch.randn(arg) for arg in tensor_arg]
+            ndim = len(tensor_arg[-1])
+        else:
+            x = torch.randn(*tensor_arg)
+            ndim = len(tensor_arg)
+        ndim += extra_dim
+
+        n_dim_to_test = sum(map(lambda e: e is DIM_ARG, arg_constr()))
+
+        for dims_val in combinations(range(ndim), n_dim_to_test):
+            arg = arg_constr()
+            arg_neg = copy.deepcopy(arg)
+            idx = 0
+            for i, v in enumerate(arg):
+                if v is DIM_ARG:
+                    arg[i] = dims_val[idx]
+                    arg_neg[i] = dims_val[idx] - ndim
+                    idx += 1
+
+            if METHOD in types:
+                a = getattr(x, name)(*arg)
+                b = getattr(x, name)(*arg_neg)
+                self.assertEqual(a, b)
+
+            if INPLACE_METHOD in types:
+                a = x.clone()
+                getattr(a, name + '_')(*arg)
+                b = x.clone()
+                getattr(b, name + '_')(*arg_neg)
+                self.assertEqual(a, b)
+
+            if FUNCTIONAL in types:
+                a = getattr(torch, name)(x, *arg)
+                b = getattr(torch, name)(x, *arg_neg)
+                self.assertEqual(a, b)
+
+    return neg_dim_test
+
+
+def idx_tensor(size, max_val):
+    return torch.LongTensor(*size).random_(0, max_val - 1)
+
+neg_dim_tests = [
+    ('narrow', (10, 20, 30), lambda: [DIM_ARG, 0, 5], [METHOD]),
+    ('transpose', (10, 20, 30), lambda: [DIM_ARG, DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL]),
+    ('size', (10, 20, 30), lambda: [DIM_ARG], [METHOD]),
+    ('cat', [(2, 3, 4), (2, 3, 4)], lambda: [DIM_ARG], [FUNCTIONAL]),
+    ('chunk', (10, 20, 30), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('gather', (10, 20), lambda: [DIM_ARG, idx_tensor((10, 20), 10)], [METHOD, FUNCTIONAL]),
+    ('index_select', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10)], [METHOD, FUNCTIONAL]),
+    ('split', (10, 20), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('squeeze', (10, 1, 20, 1), lambda: [DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL]),
+    ('stack', [(2, 3, 4), (2, 3, 4)], lambda: [DIM_ARG], [FUNCTIONAL]),
+    ('unbind', (2, 3, 4), lambda: [DIM_ARG], [FUNCTIONAL]),
+    ('unsqueeze', (10, 20), lambda: [DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL], 1),
+    ('cumprod', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('cumsum', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('mean', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('median', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('mode', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('norm', (10, 20), lambda: [2, DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('prod', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('std', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('sum', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('var', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('kthvalue', (10, 20), lambda: [3, DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('max', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('min', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('sort', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('topk', (10, 20), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]),
+    ('renorm', (10, 20), lambda: [2, DIM_ARG, 1], [METHOD, INPLACE_METHOD, FUNCTIONAL]),
+    ('index_add', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), torch.randn(10, 10)], [INPLACE_METHOD]),
+    ('index_copy', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), torch.randn(10, 10)], [INPLACE_METHOD]),
+    ('index_fill', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), 12], [INPLACE_METHOD]),
+    ('scatter', (10, 10), lambda: [DIM_ARG, idx_tensor((10, 10), 10), torch.randn(10, 10)], [INPLACE_METHOD]),
+    ('select', (10, 20), lambda: [DIM_ARG, 3], [METHOD]),
+    ('unfold', (10, 20), lambda: [DIM_ARG, 5, 2], [METHOD]),
+]
+
+for decl in neg_dim_tests:
+    if len(decl) == 4:
+        name, tensor_arg, arg_constr, types = decl
+        extra_dim = 0
+    elif len(decl) == 5:
+        name, tensor_arg, arg_constr, types, extra_dim = decl
+
+    test_name = 'test_' + name + '_neg_dim'
+
+    assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name
+    setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim))
 
 if __name__ == '__main__':
     run_tests()
diff --git a/tools/cwrap/cwrap.py b/tools/cwrap/cwrap.py
@@ -198,7 +198,7 @@ def build_option_args(self, arguments, arg_unpack):
         arguments = self.get_assign_args(arguments)
         for arg, unpack in zip(arguments, arg_unpack):
             if arg['type'] == 'CONSTANT':
-                call_arg.append(str(arg['name']))
+                call_arg.append(unpack)
             else:
                 var_name = "arg_" + str(arg.get('assign_name', arg['name']))
                 res = self.ARG_ASSIGN_TEMPLATE.substitute(
diff --git a/tools/cwrap/plugins/WrapDim.py b/tools/cwrap/plugins/WrapDim.py
@@ -0,0 +1,40 @@
+from . import CWrapPlugin
+from string import Template
+
+
+class WrapDim(CWrapPlugin):
+
+    NDIM_TEMPLATE = Template(
+        """${arg_tensor}->nDimension""")
+
+    CODE_TEMPLATE = Template(
+        """THPUtils_assert(${arg_dim} >= -(${ndim}) && ${arg_dim} < (${ndim}),
+         "dimension out of range (expected to be in range of [%d, %d], but got %d)",
+         -(${ndim}), (${ndim})-1, ${arg_dim});
+         if (${arg_dim} < 0) ${arg_dim} += (${ndim});""")
+
+    def initialize(self, cwrap):
+        self.cwrap = cwrap
+
+    def process_option_code_template(self, template, option):
+        new_code = []
+        for i, arg in enumerate(option['arguments']):
+            if 'wrap_dim' not in arg:
+                continue
+
+            params = arg.get('wrap_dim').split("+")
+            arg_tensor = params[0]
+
+            arg_tensor = "arg_" + arg_tensor
+            arg_dim = "arg_" + arg.get('assign_name', arg['name'])
+
+            params[0] = self.NDIM_TEMPLATE.substitute(arg_tensor=arg_tensor)
+            ndim = "+".join(params)
+
+            new_code.append(self.CODE_TEMPLATE.substitute(
+                arg_dim=arg_dim,
+                ndim=ndim))
+            new_code.append("")
+
+        template = new_code + template
+        return template
diff --git a/tools/cwrap/plugins/__init__.py b/tools/cwrap/plugins/__init__.py
@@ -65,3 +65,4 @@ def process_pre_arg_assign(self, template, option):
 from .AutoGPU import AutoGPU
 from .CuDNNPlugin import CuDNNPlugin
 from .GenericNN import GenericNN
+from .WrapDim import WrapDim
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
@@ -4297,6 +4297,8 @@
 
 The returned tensor shares the same underlying data with this tensor.
 
+A negative dim value can be used and will correspond to :math:`dim + input.dim() + 1`
+
 Args:
     input (Tensor): the input `Tensor`
     dim (int): The index at which to insert the singleton dimension
diff --git a/torch/autograd/_functions/reduce.py b/torch/autograd/_functions/reduce.py
@@ -60,8 +60,9 @@ def backward(self, grad_output):
                 return grad_input
         else:
             input, output = self.saved_tensors
+            dim = self.dim if self.dim >= 0 else self.dim + input.dim()
             zero_mask = input == 0
-            slice_zero_count = zero_mask.sum(self.dim)
+            slice_zero_count = zero_mask.sum(dim)
             total_zeros = slice_zero_count.sum()
             grad_input = grad_output.mul(output).expand_as(input).div(input)
             if total_zeros == 0:
@@ -71,17 +72,21 @@ def backward(self, grad_output):
             grad_input[some_zeros] = 0
 
             single_zero_idx = slice_zero_count.eq(1).nonzero()
+
+            if len(single_zero_idx) == 0:
+                return grad_input
+
             for idx in single_zero_idx:
                 idx_tuple = tuple(idx.cpu())
-                input_idx_tuple = idx_tuple[:self.dim] + (slice(0, None),) + idx_tuple[self.dim + 1:]
+                input_idx_tuple = idx_tuple[:dim] + (slice(0, None),) + idx_tuple[dim + 1:]
 
                 # slice_mask and input_copy are 1D
                 slice_mask = zero_mask[input_idx_tuple]
                 input_copy = input[input_idx_tuple].clone()
                 zero_idx = slice_mask.nonzero()[0, 0]
                 input_copy[zero_idx] = 1.
 
-                grad_idx_tuple = idx_tuple[:self.dim] + (zero_idx,) + idx_tuple[self.dim + 1:]
+                grad_idx_tuple = idx_tuple[:dim] + (zero_idx,) + idx_tuple[dim + 1:]
                 grad_input[grad_idx_tuple] = grad_output[idx_tuple] * input_copy.prod()
 
             return grad_input
diff --git a/torch/autograd/variable.py b/torch/autograd/variable.py
@@ -661,10 +661,12 @@ def transpose(self, dim1, dim2):
         return Transpose(dim1, dim2)(self)
 
     def select(self, dim, _index):
+        dim = dim if dim >= 0 else dim + self.dim()
         index = tuple(slice(None, None) for _ in range(dim)) + (_index,)
         return Index(index)(self)
 
     def narrow(self, dim, start_index, length):
+        dim = dim if dim >= 0 else dim + self.dim()
         index = tuple(slice(None, None) for _ in range(dim)) + \
             (slice(start_index, start_index + length),)
 
diff --git a/torch/csrc/generic/methods/Tensor.cwrap b/torch/csrc/generic/methods/Tensor.cwrap
diff --git a/torch/csrc/generic/methods/TensorCompare.cwrap b/torch/csrc/generic/methods/TensorCompare.cwrap
diff --git a/torch/csrc/generic/methods/TensorMath.cwrap b/torch/csrc/generic/methods/TensorMath.cwrap