Skip to content

Commit 12286c5

Browse files
committed
Add missing file.
1 parent cb95e7c commit 12286c5

File tree

2 files changed

+229
-0
lines changed

2 files changed

+229
-0
lines changed

lib/nnc/cmd/nms/ccv_nnc_nms.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#include <ccv.h>
2+
#include <nnc/ccv_nnc.h>
3+
#include <nnc/ccv_nnc_easy.h>
4+
#include <nnc/ccv_nnc_internal.h>
5+
6+
static int _ccv_nnc_nms_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
7+
{
8+
if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 3u)
9+
return 1;
10+
return 0;
11+
}
12+
13+
static int _ccv_nnc_nms_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
14+
{
15+
// gradient of sorted, gradient of sorting index, input, output of sorted, output of sorting index.
16+
if ((input_bitmasks[0] & 17u) == ((1u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | (1u << 4)) && output_bitmasks[0] == 1u)
17+
return 1;
18+
return 0;
19+
}
20+
21+
static void _ccv_nnc_nms_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* outputs, const int output_size)
22+
{
23+
assert(output_size == 2);
24+
outputs[0] = inputs[0];
25+
const int nd = ccv_nnc_tensor_nd(inputs[0].dim);
26+
assert(nd >= 1);
27+
outputs[1] = inputs[0];
28+
outputs[1].datatype = CCV_32S;
29+
memset(outputs[1].dim, 0, sizeof(outputs[1].dim));
30+
outputs[1].dim[0] = inputs[0].dim[0]; // How many to rank (or batch size).
31+
outputs[1].dim[1] = (nd <= 2) ? 0 : inputs[0].dim[1]; // How many to rank.
32+
}
33+
34+
static void _ccv_nnc_nms_tensor_auto_back(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* outputs, const int output_size)
35+
{
36+
assert(output_size == 1);
37+
outputs[0] = inputs[2];
38+
}
39+
40+
REGISTER_COMMAND(CCV_NNC_NMS_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
41+
FIND_BACKEND(ccv_nnc_nms_cpu_ref.c)
42+
{
43+
registry->bitmask = _ccv_nnc_nms_forw_bitmask;
44+
registry->tensor_auto = _ccv_nnc_nms_tensor_auto_forw;
45+
}
46+
47+
REGISTER_COMMAND(CCV_NNC_NMS_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
48+
FIND_BACKEND(ccv_nnc_nms_cpu_ref.c)
49+
{
50+
registry->bitmask = _ccv_nnc_nms_back_bitmask;
51+
registry->tensor_auto = _ccv_nnc_nms_tensor_auto_back;
52+
}
53+
54+
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_NMS_FORWARD)
55+
#define CMD_NMS_FORWARD(_iou_threshold) ccv_nnc_cmd(CCV_NNC_NMS_FORWARD, 0, ((ccv_nnc_cmd_param_t){.nms={.iou_threshold=_iou_threshold}}), 0)
56+
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_NMS_BACKWARD)
57+
#define CMD_NMS_BACKWARD(_iou_threshold) ccv_nnc_cmd(CCV_NNC_NMS_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.nms={.iou_threshold=_iou_threshold}}), 0)
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#include <ccv.h>
2+
#include <ccv_internal.h>
3+
#include <nnc/ccv_nnc.h>
4+
#include <nnc/ccv_nnc_easy.h>
5+
#include <nnc/ccv_nnc_internal.h>
6+
#ifdef USE_OPENMP
7+
#include <omp.h>
8+
#endif
9+
#ifdef USE_DISPATCH
10+
#include <dispatch/dispatch.h>
11+
#endif
12+
13+
typedef struct {
14+
float v[5];
15+
} float5;
16+
#define less_than(a, b, aux) ((a).v[0] > (b).v[0])
17+
#define swap_func(a, b, array, aux, t) do { \
18+
(t) = (a); \
19+
(a) = (b); \
20+
(b) = (t); \
21+
int _t = aux[&(a) - array]; \
22+
aux[&(a) - array] = aux[&(b) - array]; \
23+
aux[&(b) - array] = _t; \
24+
} while (0)
25+
CCV_IMPLEMENT_QSORT_EX(_ccv_nnc_nms_sortby_f5_32f, float5, less_than, swap_func, int*)
26+
#undef less_than
27+
#undef swap_func
28+
29+
static int _ccv_nnc_nms_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
30+
{
31+
assert(input_size == 1);
32+
const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
33+
assert(output_size == 2);
34+
ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
35+
ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[1];
36+
const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
37+
const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
38+
const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
39+
assert(a_nd == b_nd);
40+
int i;
41+
for (i = 0; i < a_nd; i++)
42+
{ assert(a->info.dim[i] == b->info.dim[i]); }
43+
const int* ainc = CCV_IS_TENSOR_VIEW(a) ? a->inc : a->info.dim;
44+
const int* binc = CCV_IS_TENSOR_VIEW(b) ? b->inc : b->info.dim;
45+
const int* cinc = CCV_IS_TENSOR_VIEW(c) ? c->inc : c->info.dim;
46+
const int n = a_nd >= 3 ? a->info.dim[0] : 1;
47+
const int aninc = a_nd >= 3 ? ainc[1] * ainc[2] : 0;
48+
const int bninc = b_nd >= 3 ? binc[1] * binc[2] : 0;
49+
const int cninc = c_nd >= 2 ? cinc[1] : 0;
50+
const int m = a_nd >= 3 ? a->info.dim[1] : a->info.dim[0];
51+
if (c_nd == 1)
52+
{ assert(m == c->info.dim[0]); }
53+
else
54+
{ assert(c_nd == 2 && n == c->info.dim[0] && m == c->info.dim[1]); }
55+
const int aminc = ainc[a_nd - 1];
56+
const int bminc = binc[b_nd - 1];
57+
const int d = a_nd <= 1 ? 1 : a->info.dim[a_nd - 1];
58+
const float iou_threshold = cmd.info.nms.iou_threshold;
59+
if (d == 5 && aminc == 5 && aminc == bminc) // If it is 5, we can use our quick sort implementation.
60+
{
61+
parallel_for(i, n)
62+
{
63+
int x, y;
64+
const float* const ap = a->data.f32 + n * aninc;
65+
float* const bp = b->data.f32 + n * bninc;
66+
int* const cp = c->data.i32 + n * cninc;
67+
for (x = 0; x < m; x++)
68+
cp[x] = x;
69+
for (x = 0; x < m * d; x++)
70+
bp[x] = ap[x];
71+
_ccv_nnc_nms_sortby_f5_32f((float5*)bp, m, cp);
72+
for (x = 0; x < m; x++)
73+
{
74+
float v = bp[x * 5];
75+
if (v == -FLT_MAX) // Suppressed.
76+
continue;
77+
const float area1 = bp[x * 5 + 3] * bp[x * 5 + 4];
78+
for (y = x + 1; y < m; y++)
79+
{
80+
const float u = bp[y * 5];
81+
if (u == -FLT_MAX) // Suppressed.
82+
continue;
83+
const float area2 = bp[y * 5 + 3] * bp[y * 5 + 4];
84+
const float xdiff = ccv_max(0, ccv_min(bp[x * 5 + 1] + bp[x * 5 + 3], bp[y * 5 + 1] + bp[y * 5 + 3]) - ccv_max(bp[x * 5 + 1], bp[y * 5 + 1]));
85+
const float ydiff = ccv_max(0, ccv_min(bp[x * 5 + 2] + bp[x * 5 + 4], bp[y * 5 + 2] + bp[y * 5 + 4]) - ccv_max(bp[x * 5 + 2], bp[y * 5 + 2]));
86+
const float intersection = xdiff * ydiff;
87+
const float iou = intersection / (area1 + area2 - intersection);
88+
if (iou >= iou_threshold)
89+
bp[y * 5] = -FLT_MAX;
90+
}
91+
}
92+
} parallel_endfor
93+
} else {
94+
// Otherwise, fall to use selection sort.
95+
parallel_for(i, n)
96+
{
97+
int x, y;
98+
const float* const ap = a->data.f32 + n * aninc;
99+
float* const bp = b->data.f32 + n * bninc;
100+
int* const cp = c->data.i32 + n * cninc;
101+
for (x = 0; x < m; x++)
102+
cp[x] = x;
103+
for (x = 0; x < m; x++)
104+
for (y = 0; y < d; y++)
105+
bp[x * bminc + y] = ap[x * aminc + y];
106+
for (x = 0; x < m; x++)
107+
{
108+
float v = bp[x * bminc];
109+
int k = x;
110+
for (y = x + 1; y < m; y++)
111+
{
112+
const float u = bp[y * bminc];
113+
if (u > v)
114+
k = y, v = u;
115+
}
116+
for (y = 0; y < d; y++)
117+
{
118+
const float t = bp[k * bminc + y];
119+
bp[k * bminc + y] = bp[x * bminc + y];
120+
bp[x * bminc + y] = t;
121+
const int u = cp[k];
122+
cp[k] = cp[x];
123+
cp[x] = u;
124+
}
125+
}
126+
for (x = 0; x < m; x++)
127+
{
128+
float v = bp[x * bminc];
129+
if (v == -FLT_MAX) // Suppressed.
130+
continue;
131+
const float area1 = bp[x * bminc + 3] * bp[x * bminc + 4];
132+
for (y = x + 1; y < m; y++)
133+
{
134+
const float u = bp[y * bminc];
135+
if (u == -FLT_MAX) // Suppressed.
136+
continue;
137+
const float area2 = bp[y * bminc + 3] * bp[y * bminc + 4];
138+
const float xdiff = ccv_max(0, ccv_min(bp[x * bminc + 1] + bp[x * bminc + 3], bp[y * bminc + 1] + bp[y * bminc + 3]) - ccv_max(bp[x * bminc + 1], bp[y * bminc + 1]));
139+
const float ydiff = ccv_max(0, ccv_min(bp[x * bminc + 2] + bp[x * bminc + 4], bp[y * bminc + 2] + bp[y * bminc + 4]) - ccv_max(bp[x * bminc + 2], bp[y * bminc + 2]));
140+
const float intersection = xdiff * ydiff;
141+
const float iou = intersection / (area1 + area2 - intersection);
142+
if (iou >= iou_threshold)
143+
bp[y * bminc] = -FLT_MAX;
144+
}
145+
}
146+
} parallel_endfor
147+
}
148+
return CCV_NNC_EXEC_SUCCESS;
149+
}
150+
151+
static int _ccv_nnc_nms_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
152+
{
153+
return CCV_NNC_EXEC_SUCCESS;
154+
}
155+
156+
REGISTER_COMMAND_BACKEND(CCV_NNC_NMS_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
157+
{
158+
registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
159+
registry->tensor_datatypes = CCV_32F | CCV_32S;
160+
registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
161+
registry->algorithms = 1;
162+
registry->exec = _ccv_nnc_nms_forw;
163+
}
164+
165+
REGISTER_COMMAND_BACKEND(CCV_NNC_NMS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
166+
{
167+
registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
168+
registry->tensor_datatypes = CCV_32F | CCV_32S;
169+
registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
170+
registry->algorithms = 1;
171+
registry->exec = _ccv_nnc_nms_back;
172+
}

0 commit comments

Comments
 (0)