Skip to content

Commit 45191dd

Browse files
author
marina.kolpakova
committed
merge CUDA dev branch
2 parents cc21104 + b1aa7ae commit 45191dd

File tree

8 files changed

+68
-53
lines changed

8 files changed

+68
-53
lines changed

cmake/OpenCVDetectCUDA.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ if(CUDA_FOUND)
3434
message(STATUS "CUDA detected: " ${CUDA_VERSION})
3535

3636
if (CARMA)
37-
set(CUDA_ARCH_BIN "3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
37+
set(CUDA_ARCH_BIN "2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
3838
set(CUDA_ARCH_PTX "3.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
3939
else()
4040
set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")

modules/gpu/app/nv_perf_test/main.cpp

+32-33
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,14 @@ int main(int argc, char* argv[])
7575

7676
DEF_PARAM_TEST_1(Image, std::string);
7777

78-
PERF_TEST_P(Image, HoughLinesP,
79-
testing::Values(std::string("im1_1280x800.jpg")))
78+
GPU_PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg")))
8079
{
8180
declare.time(30.0);
8281

8382
std::string fileName = GetParam();
8483

85-
const double rho = 1.0;
86-
const double theta = 1.0;
84+
const float rho = 1.f;
85+
const float theta = 1.f;
8786
const int threshold = 40;
8887
const int minLineLenght = 20;
8988
const int maxLineGap = 5;
@@ -125,8 +124,8 @@ PERF_TEST_P(Image, HoughLinesP,
125124

126125
DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth);
127126

128-
PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
129-
testing::Combine(
127+
GPU_PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
128+
testing::Combine(
130129
testing::Values(std::string("im1_1280x800.jpg")),
131130
testing::Values(CV_8U, CV_16U)
132131
))
@@ -193,12 +192,12 @@ typedef std::pair<std::string, std::string> string_pair;
193192

194193
DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool);
195194

196-
PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
197-
testing::Combine(
198-
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
199-
testing::Values(CV_8U, CV_16U),
200-
testing::Bool()
201-
))
195+
GPU_PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
196+
testing::Combine(
197+
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
198+
testing::Values(CV_8U, CV_16U),
199+
testing::Bool()
200+
))
202201
{
203202
declare.time(60);
204203

@@ -287,11 +286,11 @@ PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
287286

288287
DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth);
289288

290-
PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback,
291-
testing::Combine(
292-
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
293-
testing::Values(CV_8U, CV_16U)
294-
))
289+
GPU_PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback,
290+
testing::Combine(
291+
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
292+
testing::Values(CV_8U, CV_16U)
293+
))
295294
{
296295
declare.time(500);
297296

@@ -384,15 +383,15 @@ void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
384383

385384
DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size);
386385

387-
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
388-
testing::Combine(
389-
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
390-
testing::Values(cv::Size(16, 16)),
391-
testing::Values(cv::Size(2, 2)),
392-
testing::Values(cv::Size(16, 16))
393-
))
386+
GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
387+
testing::Combine(
388+
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
389+
testing::Values(cv::Size(16, 16)),
390+
testing::Values(cv::Size(2, 2)),
391+
testing::Values(cv::Size(16, 16))
392+
))
394393
{
395-
declare.time(1000);
394+
declare.time(3000);
396395

397396
const string_pair fileNames = std::tr1::get<0>(GetParam());
398397
const cv::Size block_size = std::tr1::get<1>(GetParam());
@@ -435,15 +434,15 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
435434
SANITY_CHECK(0);
436435
}
437436

438-
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
439-
testing::Combine(
440-
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
441-
testing::Values(cv::Size(16, 16)),
442-
testing::Values(cv::Size(1, 1)),
443-
testing::Values(cv::Size(16, 16))
444-
))
437+
GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
438+
testing::Combine(
439+
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
440+
testing::Values(cv::Size(16, 16)),
441+
testing::Values(cv::Size(1, 1)),
442+
testing::Values(cv::Size(16, 16))
443+
))
445444
{
446-
declare.time(1000);
445+
declare.time(3000);
447446

448447
const string_pair fileNames = std::tr1::get<0>(GetParam());
449448
const cv::Size block_size = std::tr1::get<1>(GetParam());

modules/gpu/perf/perf_imgproc.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1805,8 +1805,8 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "s
18051805

18061806
std::string fileName = getDataPath(GetParam());
18071807

1808-
const float rho = 1.f;
1809-
const float theta = float(CV_PI) / 180.f;
1808+
const float rho = 1.0f;
1809+
const float theta = static_cast<float>(CV_PI / 180.0);
18101810
const int threshold = 100;
18111811
const int minLineLenght = 50;
18121812
const int maxLineGap = 5;

modules/gpu/perf/perf_softcascade.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include "perf_precomp.hpp"
22

3-
#define GPU_PERF_TEST_P(fixture, name, params) \
3+
#define PERF_TEST_P1(fixture, name, params) \
44
class fixture##_##name : public fixture {\
55
public:\
66
fixture##_##name() {}\
@@ -52,7 +52,7 @@ namespace {
5252
typedef std::tr1::tuple<std::string, std::string> fixture_t;
5353
typedef perf::TestBaseWithParam<fixture_t> SCascadeTest;
5454

55-
GPU_PERF_TEST_P(SCascadeTest, detect,
55+
PERF_TEST_P1(SCascadeTest, detect,
5656
testing::Combine(
5757
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
5858
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png"))))
@@ -108,7 +108,7 @@ static cv::Rect getFromTable(int idx)
108108
typedef std::tr1::tuple<std::string, std::string, int> roi_fixture_t;
109109
typedef perf::TestBaseWithParam<roi_fixture_t> SCascadeTestRoi;
110110

111-
GPU_PERF_TEST_P(SCascadeTestRoi, detectInRoi,
111+
PERF_TEST_P1(SCascadeTestRoi, detectInRoi,
112112
testing::Combine(
113113
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
114114
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")),
@@ -152,7 +152,7 @@ RUN_GPU(SCascadeTestRoi, detectInRoi)
152152
NO_CPU(SCascadeTestRoi, detectInRoi)
153153

154154

155-
GPU_PERF_TEST_P(SCascadeTestRoi, detectEachRoi,
155+
PERF_TEST_P1(SCascadeTestRoi, detectEachRoi,
156156
testing::Combine(
157157
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
158158
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")),
@@ -191,7 +191,7 @@ RUN_GPU(SCascadeTestRoi, detectEachRoi)
191191

192192
NO_CPU(SCascadeTestRoi, detectEachRoi)
193193

194-
GPU_PERF_TEST_P(SCascadeTest, detectOnIntegral,
194+
PERF_TEST_P1(SCascadeTest, detectOnIntegral,
195195
testing::Combine(
196196
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
197197
testing::Values(std::string("cv/cascadeandhog/integrals.xml"))))
@@ -239,7 +239,7 @@ RUN_GPU(SCascadeTest, detectOnIntegral)
239239

240240
NO_CPU(SCascadeTest, detectOnIntegral)
241241

242-
GPU_PERF_TEST_P(SCascadeTest, detectStream,
242+
PERF_TEST_P1(SCascadeTest, detectStream,
243243
testing::Combine(
244244
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
245245
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png"))))

modules/gpu/src/cuda/optflowbm.cu

+5-5
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ namespace optflowbm_fast
210210
{
211211
}
212212

213-
__device__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
213+
__device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
214214
{
215215
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
216216
{
@@ -246,7 +246,7 @@ namespace optflowbm_fast
246246
}
247247
}
248248

249-
__device__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
249+
__device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
250250
{
251251
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
252252
{
@@ -271,7 +271,7 @@ namespace optflowbm_fast
271271
}
272272
}
273273

274-
__device__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
274+
__device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
275275
{
276276
int ay = i;
277277
int ax = j + block_radius;
@@ -298,7 +298,7 @@ namespace optflowbm_fast
298298
}
299299
}
300300

301-
__device__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const
301+
__device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const
302302
{
303303
int bestDist = numeric_limits<int>::max();
304304
int bestInd = -1;
@@ -328,7 +328,7 @@ namespace optflowbm_fast
328328
}
329329
}
330330

331-
__device__ void operator()(PtrStepf velx, PtrStepf vely) const
331+
__device__ __forceinline__ void operator()(PtrStepf velx, PtrStepf vely) const
332332
{
333333
int tbx = blockIdx.x * TILE_COLS;
334334
int tby = blockIdx.y * TILE_ROWS;

modules/gpu/src/softcascade.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,8 @@ struct cv::gpu::SCascade::Fields
276276
int dcs = 0;
277277
for (int sc = 0; sc < totals; ++sc)
278278
{
279-
int width = ::std::max(0.0f, fw - (origObjWidth * scale));
280-
int height = ::std::max(0.0f, fh - (origObjHeight * scale));
279+
int width = (int)::std::max(0.0f, fw - (origObjWidth * scale));
280+
int height = (int)::std::max(0.0f, fh - (origObjHeight * scale));
281281

282282
float logScale = ::log(scale);
283283
int fit = fitOctave(voctaves, logScale);
@@ -457,7 +457,7 @@ cv::gpu::SCascade::~SCascade() { delete fields; }
457457
bool cv::gpu::SCascade::load(const FileNode& fn)
458458
{
459459
if (fields) delete fields;
460-
fields = Fields::parseCascade(fn, minScale, maxScale, scales, flags);
460+
fields = Fields::parseCascade(fn, (float)minScale, (float)maxScale, scales, flags);
461461
return fields != 0;
462462
}
463463

@@ -488,7 +488,7 @@ void cv::gpu::SCascade::detect(InputArray _image, InputArray _rois, OutputArray
488488
{
489489
flds.update(image.rows, image.cols, flds.shrinkage);
490490

491-
if (flds.check(minScale, maxScale, scales))
491+
if (flds.check((float)minScale, (float)maxScale, scales))
492492
flds.createLevels(image.rows, image.cols);
493493

494494
flds.preprocessor->apply(image, flds.shrunk);
@@ -672,4 +672,4 @@ cv::Ptr<cv::gpu::ChannelsProcessor> cv::gpu::ChannelsProcessor::create(const int
672672
cv::gpu::ChannelsProcessor::ChannelsProcessor() { }
673673
cv::gpu::ChannelsProcessor::~ChannelsProcessor() { }
674674

675-
#endif
675+
#endif

modules/ts/include/opencv2/ts/ts_perf.hpp

+15
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,21 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
474474
INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
475475
void fixture##_##name::PerfTestBody()
476476

477+
#define GPU_PERF_TEST_P(fixture, name, params) \
478+
class fixture##_##name : public fixture {\
479+
public:\
480+
fixture##_##name() {}\
481+
protected:\
482+
virtual void PerfTestBody();\
483+
};\
484+
TEST_P(fixture##_##name, name /*perf*/) \
485+
{ \
486+
try { RunPerfTestBody(); } \
487+
catch (...) { cv::gpu::resetDevice(); throw; } \
488+
} \
489+
INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
490+
void fixture##_##name::PerfTestBody()
491+
477492

478493
#define CV_PERF_TEST_MAIN(testsuitname, ...) \
479494
int main(int argc, char **argv)\

samples/gpu/softcascade.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ int main(int argc, char** argv)
9898

9999
std::cout << "working..." << std::endl;
100100
cv::imshow("Soft Cascade demo", result);
101-
cv::waitKey(10);
101+
if (27 == cv::waitKey(10))
102+
break;
102103
}
103104

104105
return 0;

0 commit comments

Comments
 (0)