fix gemm call for CUDABlas for THCUNN conv, #23545 (#23552)

ptrblck · facebook-github-bot · commit 9130ab380a4a · 2019-07-31T10:01:36.000-07:00
Summary: * Swapped `CUBLAS_OP_N` for `'n'` * added a test This PR should fix #23545. Thanks at AlphabetMan for reporting the initial issue reported in [the forum](https://discuss.pytorch.org/t/cuda-10-1-error-using-transposeconv2d-with-output-padding-1/51414?u=ptrblck) as well as ngimel for the guidance. Pull Request resolved: #23552 Differential Revision: D16580986 Pulled By: ezyang fbshipit-source-id: abc0bce1e84d9c9d96d44ae0296951725adc8424
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
@@ -315,7 +315,7 @@ void gemv<at::Half>(CUDABLAS_GEMV_ARGTYPES(at::Half)) {
       incy == 1,
       "at::cuda::blas::gemv<Half>: support for incy != 1 not implemented");
   gemm<at::Half>(
-      stream, trans, CUBLAS_OP_N, m, 1, n, alpha, a, n, x, n, beta, y, m);
+      stream, trans, 'n', m, 1, n, alpha, a, n, x, n, beta, y, m);
 }
 
 } // namespace blas
diff --git a/test/test_nn.py b/test/test_nn.py
@@ -5289,6 +5289,15 @@ def test_ConvTranspose3d_correct_output_size(self):
         i = torch.rand(1, 2, 1, 1, 1)
         out = m(i, output_size=(1, 2, 2, 2, 2))
 
+    @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
+    def test_ConvTranspose2d_half_cublas_gemm(self):
+        with torch.backends.cudnn.flags(enabled=False):
+            inputs = torch.randn(1, 1, 16, 16, device='cuda', dtype=torch.half)
+            deconv = nn.ConvTranspose2d(
+                1, 1, 3, stride=2, padding=1, output_padding=1).cuda().half()
+            output = deconv(inputs)
+            output.mean().backward()
+
     def _test_Conv2d_naive_groups(self, device="cpu", dtype=torch.float):
         # Check that grouped convolutions matches two half convolutions
         m = nn.Conv2d(4, 4, kernel_size=3, groups=2).to(device, dtype)

Original file line number	Diff line number	Diff line change
`@@ -315,7 +315,7 @@ void gemv<at::Half>(CUDABLAS_GEMV_ARGTYPES(at::Half)) {`
`315`	`315`	`incy == 1,`
`316`	`316`	`"at::cuda::blas::gemv<Half>: support for incy != 1 not implemented");`
`317`	`317`	`gemm<at::Half>(`
`318`		`- stream, trans, CUBLAS_OP_N, m, 1, n, alpha, a, n, x, n, beta, y, m);`
	`318`	`+ stream, trans, 'n', m, 1, n, alpha, a, n, x, n, beta, y, m);`
`319`	`319`	`}`
`320`	`320`
`321`	`321`	`} // namespace blas`