Skip to content

Commit fd600b1

Browse files
committed
Merge commit '2b88d85505d7317f980e69201e72694d6d5905a4'
2 parents b5c9f5c + 2b88d85 commit fd600b1

File tree

6 files changed

+14
-6
lines changed

6 files changed

+14
-6
lines changed

torch/lib/THCUNN/LookupTable.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "THCUNN.h"
22
#include "common.h"
33

4+
#include "THCThrustAllocator.cuh"
45
#include <thrust/device_ptr.h>
56
#include <thrust/execution_policy.h>
67
#include <thrust/iterator/constant_iterator.h>

torch/lib/THCUNN/MSECriterion.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "common.h"
33
#include "THCHalf.h"
44
#include "THCHalfAutoNumerics.cuh"
5+
#include "THCThrustAllocator.cuh"
56

67
#include <thrust/fill.h>
78
#include <thrust/functional.h>

torch/lib/THCUNN/SmoothL1Criterion.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "common.h"
33
#include "THCHalf.h"
44
#include "THCHalfAutoNumerics.cuh"
5+
#include "THCThrustAllocator.cuh"
56

67
#include <thrust/fill.h>
78
#include <thrust/functional.h>

torch/lib/THCUNN/generic/LookupTable.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ void THNN_(LookupTable_accGradParameters)(
6464
THCIndexTensor_(resizeAs)(state, count, input);
6565
count_data = THCIndexTensor_(data)(state, count);
6666

67+
THCThrustAllocator thrustAlloc(state);
6768
thrust::device_ptr<THCIndex_t> sorted_ptr(sorted_data);
6869
thrust::device_ptr<THCIndex_t> count_ptr(count_data);
6970

@@ -72,7 +73,7 @@ void THNN_(LookupTable_accGradParameters)(
7273
// count: 1 1 2 3 1 2 1 1 2
7374
thrust::inclusive_scan_by_key(
7475
#if CUDA_VERSION >= 7000
75-
thrust::cuda::par.on(THCState_getCurrentStream(state)),
76+
thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)),
7677
#endif
7778
sorted_ptr,
7879
sorted_ptr + numel,
@@ -85,7 +86,7 @@ void THNN_(LookupTable_accGradParameters)(
8586
// count: 1 3 3 3 2 2 1 2 2
8687
thrust::inclusive_scan_by_key(
8788
#if CUDA_VERSION >= 7000
88-
thrust::cuda::par.on(THCState_getCurrentStream(state)),
89+
thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)),
8990
#endif
9091
thrust::make_reverse_iterator(sorted_ptr + numel),
9192
thrust::make_reverse_iterator(sorted_ptr),

torch/lib/THCUNN/generic/MSECriterion.cu

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ void THNN_(MSECriterion_updateOutput)(
1818
input = THCTensor_(newContiguous)(state, input);
1919
target = THCTensor_(newContiguous)(state, target);
2020

21+
THCThrustAllocator thrustAlloc(state);
2122
thrust::device_ptr<real> input_data(THCTensor_(data)(state, input));
2223
thrust::device_ptr<real> target_data(THCTensor_(data)(state, target));
2324
accreal sum = thrust::inner_product(
2425
#if CUDA_VERSION >= 7000
25-
thrust::cuda::par.on(THCState_getCurrentStream(state)),
26+
thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)),
2627
#endif
2728
input_data, input_data+size, target_data, (accreal) 0,
2829
thrust::plus<accreal>(), mse_functor<real, accreal>());
@@ -54,13 +55,14 @@ void THNN_(MSECriterion_updateGradInput)(
5455

5556
THCTensor_(resizeAs)(state, gradInput, input);
5657

58+
THCThrustAllocator thrustAlloc(state);
5759
thrust::device_ptr<real> input_data(THCTensor_(data)(state, input));
5860
thrust::device_ptr<real> target_data(THCTensor_(data)(state, target));
5961
thrust::device_ptr<real> gradInput_data(THCTensor_(data)(state, gradInput));
6062

6163
thrust::transform(
6264
#if CUDA_VERSION >= 7000
63-
thrust::cuda::par.on(THCState_getCurrentStream(state)),
65+
thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)),
6466
#endif
6567
input_data, input_data+size, target_data, gradInput_data,
6668
mse_updateGradInput_functor<real, accreal>(norm));

torch/lib/THCUNN/generic/SmoothL1Criterion.cu

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@ void THNN_(SmoothL1Criterion_updateOutput)(
2222
input = THCTensor_(newContiguous)(state, input);
2323
target = THCTensor_(newContiguous)(state, target);
2424

25+
THCThrustAllocator thrustAlloc(state);
2526
thrust::device_ptr<real> input_data(THCTensor_(data)(state, input));
2627
thrust::device_ptr<real> target_data(THCTensor_(data)(state, target));
2728
accreal sum = thrust::inner_product(
2829
#if CUDA_VERSION >= 7000
29-
thrust::cuda::par.on(THCState_getCurrentStream(state)),
30+
thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)),
3031
#endif
3132
input_data, input_data+size, target_data, (accreal) 0,
3233
thrust::plus<accreal>(), smoothl1_functor<real, accreal>()
@@ -63,13 +64,14 @@ void THNN_(SmoothL1Criterion_updateGradInput)(
6364

6465
THCTensor_(resizeAs)(state, gradInput, input);
6566

67+
THCThrustAllocator thrustAlloc(state);
6668
thrust::device_ptr<real> input_data(THCTensor_(data)(state, input));
6769
thrust::device_ptr<real> target_data(THCTensor_(data)(state, target));
6870
thrust::device_ptr<real> gradInput_data(THCTensor_(data)(state, gradInput));
6971

7072
thrust::transform(
7173
#if CUDA_VERSION >= 7000
72-
thrust::cuda::par.on(THCState_getCurrentStream(state)),
74+
thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)),
7375
#endif
7476
input_data, input_data+size, target_data, gradInput_data,
7577
smoothl1_updateGradInput_functor<real>(norm)

0 commit comments

Comments
 (0)