@@ -57,7 +57,7 @@ void cv::gpu::transpose(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
57
57
58
58
void cv::gpu::flip (InputArray, OutputArray, int , Stream&) { throw_no_cuda (); }
59
59
60
- void cv::gpu::LUT ( const GpuMat&, const Mat&, GpuMat&, Stream& ) { throw_no_cuda (); }
60
+ Ptr <LookUpTable> cv::gpu::createLookUpTable (InputArray ) { throw_no_cuda (); return Ptr <LookUpTable> (); }
61
61
62
62
void cv::gpu::copyMakeBorder (const GpuMat&, GpuMat&, int , int , int , int , int , const Scalar&, Stream&) { throw_no_cuda (); }
63
63
@@ -290,93 +290,214 @@ void cv::gpu::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& stre
290
290
// //////////////////////////////////////////////////////////////////////
291
291
// LUT
292
292
293
- void cv::gpu::LUT (const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
293
+ #if (CUDA_VERSION >= 5000)
294
+
295
+ namespace
294
296
{
295
- const int cn = src.channels ();
297
+ class LookUpTableImpl : public LookUpTable
298
+ {
299
+ public:
300
+ LookUpTableImpl (InputArray lut);
296
301
297
- CV_Assert ( src.type () == CV_8UC1 || src.type () == CV_8UC3 );
298
- CV_Assert ( lut.depth () == CV_8U );
299
- CV_Assert ( lut.channels () == 1 || lut.channels () == cn );
300
- CV_Assert ( lut.rows * lut.cols == 256 && lut.isContinuous () );
302
+ void transform (InputArray src, OutputArray dst, Stream& stream = Stream::Null());
301
303
302
- dst.create (src.size (), CV_MAKE_TYPE (lut.depth (), cn));
304
+ private:
305
+ int lut_cn;
303
306
304
- NppiSize sz ;
305
- sz. height = src. rows ;
306
- sz. width = src. cols ;
307
+ int nValues3[ 3 ] ;
308
+ const Npp32s* pValues3[ 3 ] ;
309
+ const Npp32s* pLevels3[ 3 ] ;
307
310
308
- Mat nppLut;
309
- lut.convertTo (nppLut, CV_32S);
311
+ GpuMat d_pLevels;
312
+ GpuMat d_nppLut;
313
+ GpuMat d_nppLut3[3 ];
314
+ };
310
315
311
- int nValues3[] = {256 , 256 , 256 };
316
+ LookUpTableImpl::LookUpTableImpl (InputArray _lut)
317
+ {
318
+ nValues3[0 ] = nValues3[1 ] = nValues3[2 ] = 256 ;
312
319
313
- Npp32s pLevels[256 ];
314
- for (int i = 0 ; i < 256 ; ++i)
315
- pLevels[i] = i;
320
+ Npp32s pLevels[256 ];
321
+ for (int i = 0 ; i < 256 ; ++i)
322
+ pLevels[i] = i;
316
323
317
- const Npp32s* pLevels3[3 ];
324
+ d_pLevels.upload (Mat (1 , 256 , CV_32S, pLevels));
325
+ pLevels3[0 ] = pLevels3[1 ] = pLevels3[2 ] = d_pLevels.ptr <Npp32s>();
318
326
319
- #if (CUDA_VERSION <= 4020)
320
- pLevels3[0 ] = pLevels3[1 ] = pLevels3[2 ] = pLevels;
321
- #else
322
- GpuMat d_pLevels;
323
- d_pLevels.upload (Mat (1 , 256 , CV_32S, pLevels));
324
- pLevels3[0 ] = pLevels3[1 ] = pLevels3[2 ] = d_pLevels.ptr <Npp32s>();
325
- #endif
327
+ GpuMat lut;
328
+ if (_lut.kind () == _InputArray::GPU_MAT)
329
+ {
330
+ lut = _lut.getGpuMat ();
331
+ }
332
+ else
333
+ {
334
+ Mat hLut = _lut.getMat ();
335
+ CV_Assert ( hLut.total () == 256 && hLut.isContinuous () );
336
+ lut.upload (Mat (1 , 256 , hLut.type (), hLut.data ));
337
+ }
326
338
327
- cudaStream_t stream = StreamAccessor::getStream (s);
328
- NppStreamHandler h (stream);
339
+ lut_cn = lut.channels ();
340
+
341
+ CV_Assert ( lut.depth () == CV_8U );
342
+ CV_Assert ( lut.rows == 1 && lut.cols == 256 );
343
+
344
+ lut.convertTo (d_nppLut, CV_32S);
345
+
346
+ if (lut_cn == 1 )
347
+ {
348
+ pValues3[0 ] = pValues3[1 ] = pValues3[2 ] = d_nppLut.ptr <Npp32s>();
349
+ }
350
+ else
351
+ {
352
+ gpu::split (d_nppLut, d_nppLut3);
353
+
354
+ pValues3[0 ] = d_nppLut3[0 ].ptr <Npp32s>();
355
+ pValues3[1 ] = d_nppLut3[1 ].ptr <Npp32s>();
356
+ pValues3[2 ] = d_nppLut3[2 ].ptr <Npp32s>();
357
+ }
358
+ }
329
359
330
- if (src. type () == CV_8UC1 )
360
+ void LookUpTableImpl::transform (InputArray _src, OutputArray _dst, Stream& _stream )
331
361
{
332
- #if (CUDA_VERSION <= 4020)
333
- nppSafeCall ( nppiLUT_Linear_8u_C1R (src.ptr <Npp8u>(), static_cast <int >(src.step ),
334
- dst.ptr <Npp8u>(), static_cast <int >(dst.step ), sz, nppLut.ptr <Npp32s>(), pLevels, 256 ) );
335
- #else
336
- GpuMat d_nppLut (Mat (1 , 256 , CV_32S, nppLut.data ));
337
- nppSafeCall ( nppiLUT_Linear_8u_C1R (src.ptr <Npp8u>(), static_cast <int >(src.step ),
338
- dst.ptr <Npp8u>(), static_cast <int >(dst.step ), sz, d_nppLut.ptr <Npp32s>(), d_pLevels.ptr <Npp32s>(), 256 ) );
339
- #endif
362
+ GpuMat src = _src.getGpuMat ();
363
+
364
+ const int cn = src.channels ();
365
+
366
+ CV_Assert ( src.type () == CV_8UC1 || src.type () == CV_8UC3 );
367
+ CV_Assert ( lut_cn == 1 || lut_cn == cn );
368
+
369
+ _dst.create (src.size (), src.type ());
370
+ GpuMat dst = _dst.getGpuMat ();
371
+
372
+ cudaStream_t stream = StreamAccessor::getStream (_stream);
373
+
374
+ NppStreamHandler h (stream);
375
+
376
+ NppiSize sz;
377
+ sz.height = src.rows ;
378
+ sz.width = src.cols ;
379
+
380
+ if (src.type () == CV_8UC1)
381
+ {
382
+ nppSafeCall ( nppiLUT_Linear_8u_C1R (src.ptr <Npp8u>(), static_cast <int >(src.step ),
383
+ dst.ptr <Npp8u>(), static_cast <int >(dst.step ), sz, d_nppLut.ptr <Npp32s>(), d_pLevels.ptr <Npp32s>(), 256 ) );
384
+ }
385
+ else
386
+ {
387
+ nppSafeCall ( nppiLUT_Linear_8u_C3R (src.ptr <Npp8u>(), static_cast <int >(src.step ),
388
+ dst.ptr <Npp8u>(), static_cast <int >(dst.step ), sz, pValues3, pLevels3, nValues3) );
389
+ }
390
+
391
+ if (stream == 0 )
392
+ cudaSafeCall ( cudaDeviceSynchronize () );
340
393
}
341
- else
394
+ }
395
+
396
+ #else // (CUDA_VERSION >= 5000)
397
+
398
+ namespace
399
+ {
400
+ class LookUpTableImpl : public LookUpTable
342
401
{
402
+ public:
403
+ LookUpTableImpl (InputArray lut);
404
+
405
+ void transform (InputArray src, OutputArray dst, Stream& stream = Stream::Null());
406
+
407
+ private:
408
+ int lut_cn;
409
+
410
+ Npp32s pLevels[256 ];
411
+ int nValues3[3 ];
343
412
const Npp32s* pValues3[3 ];
413
+ const Npp32s* pLevels3[3 ];
344
414
415
+ Mat nppLut;
345
416
Mat nppLut3[3 ];
346
- if (nppLut.channels () == 1 )
417
+ };
418
+
419
+ LookUpTableImpl::LookUpTableImpl (InputArray _lut)
420
+ {
421
+ nValues3[0 ] = nValues3[1 ] = nValues3[2 ] = 256 ;
422
+
423
+ for (int i = 0 ; i < 256 ; ++i)
424
+ pLevels[i] = i;
425
+ pLevels3[0 ] = pLevels3[1 ] = pLevels3[2 ] = pLevels;
426
+
427
+ Mat lut;
428
+ if (_lut.kind () == _InputArray::GPU_MAT)
429
+ {
430
+ lut = Mat (_lut.getGpuMat ());
431
+ }
432
+ else
433
+ {
434
+ Mat hLut = _lut.getMat ();
435
+ CV_Assert ( hLut.total () == 256 && hLut.isContinuous () );
436
+ lut = hLut;
437
+ }
438
+
439
+ lut_cn = lut.channels ();
440
+
441
+ CV_Assert ( lut.depth () == CV_8U );
442
+ CV_Assert ( lut.rows == 1 && lut.cols == 256 );
443
+
444
+ lut.convertTo (nppLut, CV_32S);
445
+
446
+ if (lut_cn == 1 )
347
447
{
348
- #if (CUDA_VERSION <= 4020)
349
448
pValues3[0 ] = pValues3[1 ] = pValues3[2 ] = nppLut.ptr <Npp32s>();
350
- #else
351
- GpuMat d_nppLut (Mat (1 , 256 , CV_32S, nppLut.data ));
352
- pValues3[0 ] = pValues3[1 ] = pValues3[2 ] = d_nppLut.ptr <Npp32s>();
353
- #endif
354
449
}
355
450
else
356
451
{
357
452
cv::split (nppLut, nppLut3);
358
453
359
- #if (CUDA_VERSION <= 4020)
360
454
pValues3[0 ] = nppLut3[0 ].ptr <Npp32s>();
361
455
pValues3[1 ] = nppLut3[1 ].ptr <Npp32s>();
362
456
pValues3[2 ] = nppLut3[2 ].ptr <Npp32s>();
363
- #else
364
- GpuMat d_nppLut0 (Mat (1 , 256 , CV_32S, nppLut3[0 ].data ));
365
- GpuMat d_nppLut1 (Mat (1 , 256 , CV_32S, nppLut3[1 ].data ));
366
- GpuMat d_nppLut2 (Mat (1 , 256 , CV_32S, nppLut3[2 ].data ));
457
+ }
458
+ }
367
459
368
- pValues3[0 ] = d_nppLut0.ptr <Npp32s>();
369
- pValues3[1 ] = d_nppLut1.ptr <Npp32s>();
370
- pValues3[2 ] = d_nppLut2.ptr <Npp32s>();
371
- #endif
460
+ void LookUpTableImpl::transform (InputArray _src, OutputArray _dst, Stream& _stream)
461
+ {
462
+ GpuMat src = _src.getGpuMat ();
463
+
464
+ const int cn = src.channels ();
465
+
466
+ CV_Assert ( src.type () == CV_8UC1 || src.type () == CV_8UC3 );
467
+ CV_Assert ( lut_cn == 1 || lut_cn == cn );
468
+
469
+ _dst.create (src.size (), src.type ());
470
+ GpuMat dst = _dst.getGpuMat ();
471
+
472
+ cudaStream_t stream = StreamAccessor::getStream (_stream);
473
+
474
+ NppStreamHandler h (stream);
475
+
476
+ NppiSize sz;
477
+ sz.height = src.rows ;
478
+ sz.width = src.cols ;
479
+
480
+ if (src.type () == CV_8UC1)
481
+ {
482
+ nppSafeCall ( nppiLUT_Linear_8u_C1R (src.ptr <Npp8u>(), static_cast <int >(src.step ),
483
+ dst.ptr <Npp8u>(), static_cast <int >(dst.step ), sz, nppLut.ptr <Npp32s>(), pLevels, 256 ) );
484
+ }
485
+ else
486
+ {
487
+ nppSafeCall ( nppiLUT_Linear_8u_C3R (src.ptr <Npp8u>(), static_cast <int >(src.step ),
488
+ dst.ptr <Npp8u>(), static_cast <int >(dst.step ), sz, pValues3, pLevels3, nValues3) );
372
489
}
373
490
374
- nppSafeCall ( nppiLUT_Linear_8u_C3R (src. ptr <Npp8u>(), static_cast < int >(src. step ),
375
- dst. ptr <Npp8u>(), static_cast < int >(dst. step ), sz, pValues3, pLevels3, nValues3 ) );
491
+ if (stream == 0 )
492
+ cudaSafeCall ( cudaDeviceSynchronize ( ) );
376
493
}
494
+ }
377
495
378
- if (stream == 0 )
379
- cudaSafeCall ( cudaDeviceSynchronize () );
496
+ #endif // (CUDA_VERSION >= 5000)
497
+
498
+ Ptr <LookUpTable> cv::gpu::createLookUpTable (InputArray lut)
499
+ {
500
+ return new LookUpTableImpl (lut);
380
501
}
381
502
382
503
// //////////////////////////////////////////////////////////////////////
0 commit comments