Skip to content

Commit 785acc1

Browse files
committed
Fixes global size issue, adds #pragma unroll to loops
1 parent bea2515 commit 785acc1

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

modules/imgproc/src/color.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -2690,6 +2690,8 @@ struct mRGBA2RGBA
26902690

26912691
#ifdef HAVE_OPENCL
26922692

2693+
#define DIVUP(total, grain) (((total) + (grain) - 1) / (grain))
2694+
26932695
static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
26942696
{
26952697
bool ok = false;
@@ -2711,7 +2713,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
27112713
{
27122714
pxPerWIy = 4;
27132715
}
2714-
globalsize[1] /= pxPerWIy;
2716+
globalsize[1] = DIVUP(globalsize[1], pxPerWIy);
27152717
opts += format("-D PIX_PER_WI_Y=%d ", pxPerWIy);
27162718

27172719
switch (code)

modules/imgproc/src/opencl/cvtcolor.cl

+28
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ __kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset,
131131

132132
if (x < cols)
133133
{
134+
#pragma unroll
134135
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
135136
{
136137
if (y < rows)
@@ -158,6 +159,7 @@ __kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
158159

159160
if (x < cols)
160161
{
162+
#pragma unroll
161163
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
162164
{
163165
if (y < rows)
@@ -189,6 +191,7 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
189191

190192
if (x < cols)
191193
{
194+
#pragma unroll
192195
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
193196
{
194197
if (y < rows)
@@ -232,6 +235,7 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
232235

233236
if (x < cols)
234237
{
238+
#pragma unroll
235239
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
236240
{
237241
if (y < rows)
@@ -281,6 +285,7 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
281285

282286
if (x < cols / 2)
283287
{
288+
#pragma unroll
284289
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
285290
{
286291
if (y < rows / 2 )
@@ -353,6 +358,7 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset
353358

354359
if (x < cols)
355360
{
361+
#pragma unroll
356362
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
357363
{
358364
if (y < rows)
@@ -396,6 +402,7 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
396402

397403
if (x < cols)
398404
{
405+
#pragma unroll
399406
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
400407
{
401408
if (y < rows)
@@ -443,6 +450,7 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse
443450

444451
if (dx < cols)
445452
{
453+
#pragma unroll
446454
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
447455
{
448456
if (dy < rows)
@@ -483,6 +491,7 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse
483491

484492
if (dx < cols)
485493
{
494+
#pragma unroll
486495
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
487496
{
488497
if (dy < rows)
@@ -528,6 +537,7 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
528537

529538
if (x < cols)
530539
{
540+
#pragma unroll
531541
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
532542
{
533543
if (y < rows)
@@ -573,6 +583,7 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset
573583

574584
if (x < cols)
575585
{
586+
#pragma unroll
576587
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
577588
{
578589
if (y < rows)
@@ -613,6 +624,7 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset
613624

614625
if (x < cols)
615626
{
627+
#pragma unroll
616628
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
617629
{
618630
if (y < rows)
@@ -646,6 +658,7 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse
646658

647659
if (x < cols)
648660
{
661+
#pragma unroll
649662
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
650663
{
651664
if (y < rows)
@@ -678,6 +691,7 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse
678691

679692
if (x < cols)
680693
{
694+
#pragma unroll
681695
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
682696
{
683697
if (y < rows)
@@ -719,6 +733,7 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
719733

720734
if (x < cols)
721735
{
736+
#pragma unroll
722737
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
723738
{
724739
if (y < rows)
@@ -765,6 +780,7 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
765780

766781
if (x < cols)
767782
{
783+
#pragma unroll
768784
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
769785
{
770786
if (y < rows)
@@ -828,6 +844,7 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
828844

829845
if (x < cols)
830846
{
847+
#pragma unroll
831848
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
832849
{
833850
if (y < rows)
@@ -880,6 +897,7 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
880897

881898
if (x < cols)
882899
{
900+
#pragma unroll
883901
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
884902
{
885903
if (y < rows)
@@ -950,6 +968,7 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
950968

951969
if (x < cols)
952970
{
971+
#pragma unroll
953972
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
954973
{
955974
if (y < rows)
@@ -1004,6 +1023,7 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
10041023

10051024
if (x < cols)
10061025
{
1026+
#pragma unroll
10071027
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
10081028
{
10091029
if (y < rows)
@@ -1066,6 +1086,7 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
10661086

10671087
if (x < cols)
10681088
{
1089+
#pragma unroll
10691090
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
10701091
{
10711092
if (y < rows)
@@ -1123,6 +1144,7 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
11231144

11241145
if (x < cols)
11251146
{
1147+
#pragma unroll
11261148
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
11271149
{
11281150
if (y < rows)
@@ -1193,6 +1215,7 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
11931215

11941216
if (x < cols)
11951217
{
1218+
#pragma unroll
11961219
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
11971220
{
11981221
if (y < rows)
@@ -1223,6 +1246,7 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset
12231246

12241247
if (x < cols)
12251248
{
1249+
#pragma unroll
12261250
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
12271251
{
12281252
if (y < rows)
@@ -1275,6 +1299,7 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
12751299

12761300
if (x < cols)
12771301
{
1302+
#pragma unroll
12781303
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
12791304
{
12801305
if (y < rows)
@@ -1322,6 +1347,7 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
13221347

13231348
if (x < cols)
13241349
{
1350+
#pragma unroll
13251351
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
13261352
{
13271353
if (y < rows)
@@ -1430,6 +1456,7 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
14301456

14311457
if (x < cols)
14321458
{
1459+
#pragma unroll
14331460
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
14341461
{
14351462
if (y < rows)
@@ -1478,6 +1505,7 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
14781505

14791506
if (x < cols)
14801507
{
1508+
#pragma unroll
14811509
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
14821510
{
14831511
if (y < rows)

0 commit comments

Comments
 (0)