Skip to content

Commit 515c4ff

Browse files
committed
Improving Tracker effect to better track occluded objects, follow objects offscreen and back onscreen without getting lost, and improved unit tests.
1 parent c6720bb commit 515c4ff

File tree

3 files changed

+257
-110
lines changed

3 files changed

+257
-110
lines changed

src/CVTracker.cpp

Lines changed: 196 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <fstream>
1515
#include <iomanip>
1616
#include <iostream>
17+
#include <cmath>
1718

1819
#include <google/protobuf/util/time_util.h>
1920

@@ -31,6 +32,7 @@ CVTracker::CVTracker(std::string processInfoJson, ProcessingController &processi
3132
SetJson(processInfoJson);
3233
start = 1;
3334
end = 1;
35+
lostCount = 0;
3436
}
3537

3638
// Set desirable tracker method
@@ -54,152 +56,243 @@ cv::Ptr<OPENCV_TRACKER_TYPE> CVTracker::selectTracker(std::string trackerType){
5456
return nullptr;
5557
}
5658

57-
// Track object in the hole clip or in a given interval
58-
void CVTracker::trackClip(openshot::Clip& video, size_t _start, size_t _end, bool process_interval){
59-
59+
// Track object in the whole clip or in a given interval
60+
void CVTracker::trackClip(openshot::Clip& video,
61+
size_t _start,
62+
size_t _end,
63+
bool process_interval)
64+
{
6065
video.Open();
61-
if(!json_interval){
66+
if (!json_interval) {
6267
start = _start; end = _end;
63-
64-
if(!process_interval || end <= 1 || end-start == 0){
65-
// Get total number of frames in video
66-
start = (int)(video.Start() * video.Reader()->info.fps.ToFloat()) + 1;
67-
end = (int)(video.End() * video.Reader()->info.fps.ToFloat()) + 1;
68+
if (!process_interval || end <= 1 || end - start == 0) {
69+
start = int(video.Start() * video.Reader()->info.fps.ToFloat()) + 1;
70+
end = int(video.End() * video.Reader()->info.fps.ToFloat()) + 1;
6871
}
72+
} else {
73+
start = int(start + video.Start() * video.Reader()->info.fps.ToFloat()) + 1;
74+
end = int(video.End() * video.Reader()->info.fps.ToFloat()) + 1;
6975
}
70-
else{
71-
start = (int)(start + video.Start() * video.Reader()->info.fps.ToFloat()) + 1;
72-
end = (int)(video.End() * video.Reader()->info.fps.ToFloat()) + 1;
73-
}
74-
75-
if(error){
76-
return;
77-
}
78-
76+
if (error) return;
7977
processingController->SetError(false, "");
80-
bool trackerInit = false;
8178

82-
size_t frame;
83-
// Loop through video
84-
for (frame = start; frame <= end; frame++)
85-
{
86-
87-
// Stop the feature tracker process
88-
if(processingController->ShouldStop()){
89-
return;
90-
}
79+
bool trackerInit = false;
80+
lostCount = 0; // reset lost counter once at the start
9181

92-
size_t frame_number = frame;
93-
// Get current frame
94-
std::shared_ptr<openshot::Frame> f = video.GetFrame(frame_number);
82+
for (size_t frame = start; frame <= end; ++frame) {
83+
if (processingController->ShouldStop()) return;
9584

96-
// Grab OpenCV Mat image
97-
cv::Mat cvimage = f->GetImageCV();
85+
auto f = video.GetFrame(frame);
86+
cv::Mat img = f->GetImageCV();
9887

99-
if(frame == start){
100-
// Take the normalized inital bounding box and multiply to the current video shape
101-
bbox = cv::Rect2d(int(bbox.x*cvimage.cols), int(bbox.y*cvimage.rows),
102-
int(bbox.width*cvimage.cols), int(bbox.height*cvimage.rows));
88+
if (frame == start) {
89+
bbox = cv::Rect2d(
90+
int(bbox.x * img.cols),
91+
int(bbox.y * img.rows),
92+
int(bbox.width * img.cols),
93+
int(bbox.height * img.rows)
94+
);
10395
}
10496

105-
// Pass the first frame to initialize the tracker
106-
if(!trackerInit){
107-
108-
// Initialize the tracker
109-
initTracker(cvimage, frame_number);
110-
97+
if (!trackerInit) {
98+
initTracker(img, frame);
11199
trackerInit = true;
100+
lostCount = 0;
112101
}
113-
else{
114-
// Update the object tracker according to frame
115-
trackerInit = trackFrame(cvimage, frame_number);
116-
117-
// Draw box on image
118-
FrameData fd = GetTrackedData(frame_number);
102+
else {
103+
// trackFrame now manages lostCount and will re-init internally
104+
trackFrame(img, frame);
119105

106+
// record whatever bbox we have now
107+
FrameData fd = GetTrackedData(frame);
120108
}
121-
// Update progress
122-
processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
109+
110+
processingController->SetProgress(
111+
uint(100 * (frame - start) / (end - start))
112+
);
123113
}
124114
}
125115

126116
// Initialize the tracker
127-
bool CVTracker::initTracker(cv::Mat &frame, size_t frameId){
128-
117+
bool CVTracker::initTracker(cv::Mat &frame, size_t frameId)
118+
{
129119
// Create new tracker object
130120
tracker = selectTracker(trackerType);
131121

132-
// Correct if bounding box contains negative proportions (width and/or height < 0)
133-
if(bbox.width < 0){
134-
bbox.x = bbox.x - abs(bbox.width);
135-
bbox.width = abs(bbox.width);
122+
// Correct negative width/height
123+
if (bbox.width < 0) {
124+
bbox.x -= bbox.width;
125+
bbox.width = -bbox.width;
136126
}
137-
if(bbox.height < 0){
138-
bbox.y = bbox.y - abs(bbox.height);
139-
bbox.height = abs(bbox.height);
127+
if (bbox.height < 0) {
128+
bbox.y -= bbox.height;
129+
bbox.height = -bbox.height;
140130
}
141131

132+
// Clamp to frame bounds
133+
bbox &= cv::Rect2d(0, 0, frame.cols, frame.rows);
134+
if (bbox.width <= 0) bbox.width = 1;
135+
if (bbox.height <= 0) bbox.height = 1;
136+
142137
// Initialize tracker
143138
tracker->init(frame, bbox);
144139

145-
float fw = frame.size().width;
146-
float fh = frame.size().height;
140+
float fw = float(frame.cols), fh = float(frame.rows);
141+
142+
// record original pixel size
143+
origWidth = bbox.width;
144+
origHeight = bbox.height;
145+
146+
// initialize sub-pixel smoother at true center
147+
smoothC_x = bbox.x + bbox.width * 0.5;
148+
smoothC_y = bbox.y + bbox.height * 0.5;
147149

148150
// Add new frame data
149-
trackedDataById[frameId] = FrameData(frameId, 0, (bbox.x)/fw,
150-
(bbox.y)/fh,
151-
(bbox.x+bbox.width)/fw,
152-
(bbox.y+bbox.height)/fh);
151+
trackedDataById[frameId] = FrameData(
152+
frameId, 0,
153+
bbox.x / fw,
154+
bbox.y / fh,
155+
(bbox.x + bbox.width) / fw,
156+
(bbox.y + bbox.height) / fh
157+
);
153158

154159
return true;
155160
}
156161

157162
// Update the object tracker according to frame
158-
bool CVTracker::trackFrame(cv::Mat &frame, size_t frameId){
159-
// Update the tracking result
160-
bool ok = tracker->update(frame, bbox);
163+
// returns true if KLT succeeded, false otherwise
164+
bool CVTracker::trackFrame(cv::Mat &frame, size_t frameId)
165+
{
166+
const int W = frame.cols, H = frame.rows;
167+
const auto& prev = trackedDataById[frameId - 1];
168+
169+
// Reconstruct last-known box in pixel coords
170+
cv::Rect2d lastBox(
171+
prev.x1 * W, prev.y1 * H,
172+
(prev.x2 - prev.x1) * W,
173+
(prev.y2 - prev.y1) * H
174+
);
175+
176+
// Convert to grayscale
177+
cv::Mat gray;
178+
cv::cvtColor(frame, gray, cv::COLOR_BGR2GRAY);
179+
180+
cv::Rect2d cand;
181+
bool didKLT = false;
182+
183+
// Try KLT-based drift
184+
if (!prevGray.empty() && !prevPts.empty()) {
185+
std::vector<cv::Point2f> currPts;
186+
std::vector<uchar> status;
187+
std::vector<float> err;
188+
cv::calcOpticalFlowPyrLK(
189+
prevGray, gray,
190+
prevPts, currPts,
191+
status, err,
192+
cv::Size(21,21), 3,
193+
cv::TermCriteria{cv::TermCriteria::COUNT|cv::TermCriteria::EPS,30,0.01},
194+
cv::OPTFLOW_LK_GET_MIN_EIGENVALS, 1e-4
195+
);
196+
197+
// collect per-point displacements
198+
std::vector<double> dx, dy;
199+
for (size_t i = 0; i < status.size(); ++i) {
200+
if (status[i] && err[i] < 12.0) {
201+
dx.push_back(currPts[i].x - prevPts[i].x);
202+
dy.push_back(currPts[i].y - prevPts[i].y);
203+
}
204+
}
161205

162-
// Add frame number and box coords if tracker finds the object
163-
// Otherwise add only frame number
164-
if (ok)
165-
{
166-
float fw = frame.size().width;
167-
float fh = frame.size().height;
168-
169-
cv::Rect2d filtered_box = filter_box_jitter(frameId);
170-
// Add new frame data
171-
trackedDataById[frameId] = FrameData(frameId, 0, (filtered_box.x)/fw,
172-
(filtered_box.y)/fh,
173-
(filtered_box.x+filtered_box.width)/fw,
174-
(filtered_box.y+filtered_box.height)/fh);
175-
}
176-
else
177-
{
178-
// Copy the last frame data if the tracker get lost
179-
trackedDataById[frameId] = trackedDataById[frameId-1];
206+
if ((int)dx.size() >= minKltPts) {
207+
auto median = [&](auto &v){
208+
std::nth_element(v.begin(), v.begin()+v.size()/2, v.end());
209+
return v[v.size()/2];
210+
};
211+
double mdx = median(dx), mdy = median(dy);
212+
213+
cand = lastBox;
214+
cand.x += mdx;
215+
cand.y += mdy;
216+
cand.width = origWidth;
217+
cand.height = origHeight;
218+
219+
lostCount = 0;
220+
didKLT = true;
221+
}
180222
}
181223

182-
return ok;
183-
}
224+
// Fallback to whole-frame flow if KLT failed
225+
if (!didKLT) {
226+
++lostCount;
227+
cand = lastBox;
228+
if (!fullPrevGray.empty()) {
229+
cv::Mat flow;
230+
cv::calcOpticalFlowFarneback(
231+
fullPrevGray, gray, flow,
232+
0.5,3,15,3,5,1.2,0
233+
);
234+
cv::Scalar avg = cv::mean(flow);
235+
cand.x += avg[0];
236+
cand.y += avg[1];
237+
}
238+
cand.width = origWidth;
239+
cand.height = origHeight;
184240

185-
cv::Rect2d CVTracker::filter_box_jitter(size_t frameId){
186-
// get tracked data for the previous frame
187-
float last_box_width = trackedDataById[frameId-1].x2 - trackedDataById[frameId-1].x1;
188-
float last_box_height = trackedDataById[frameId-1].y2 - trackedDataById[frameId-1].y1;
241+
if (lostCount >= 10) {
242+
initTracker(frame, frameId);
243+
cand = bbox;
244+
lostCount = 0;
245+
}
246+
}
189247

190-
float curr_box_width = bbox.width;
191-
float curr_box_height = bbox.height;
192-
// keep the last width and height if the difference is less than 1%
193-
float threshold = 0.01;
248+
// Dead-zone sub-pixel smoothing
249+
{
250+
constexpr double JITTER_THRESH = 1.0;
251+
double measCx = cand.x + cand.width * 0.5;
252+
double measCy = cand.y + cand.height * 0.5;
253+
double dx = measCx - smoothC_x;
254+
double dy = measCy - smoothC_y;
255+
256+
if (std::abs(dx) > JITTER_THRESH || std::abs(dy) > JITTER_THRESH) {
257+
smoothC_x = measCx;
258+
smoothC_y = measCy;
259+
}
194260

195-
cv::Rect2d filtered_box = bbox;
196-
if(std::abs(1-(curr_box_width/last_box_width)) <= threshold){
197-
filtered_box.width = last_box_width;
261+
cand.x = smoothC_x - cand.width * 0.5;
262+
cand.y = smoothC_y - cand.height * 0.5;
198263
}
199-
if(std::abs(1-(curr_box_height/last_box_height)) <= threshold){
200-
filtered_box.height = last_box_height;
264+
265+
// Re-seed KLT features
266+
{
267+
cv::Rect roi(
268+
int(std::max(0., cand.x)),
269+
int(std::max(0., cand.y)),
270+
int(std::min(cand.width, double(W - cand.x))),
271+
int(std::min(cand.height, double(H - cand.y)))
272+
);
273+
cv::goodFeaturesToTrack(
274+
gray(roi), prevPts,
275+
kltMaxCorners, kltQualityLevel,
276+
kltMinDist, cv::Mat(), kltBlockSize
277+
);
278+
for (auto &pt : prevPts)
279+
pt += cv::Point2f(float(roi.x), float(roi.y));
201280
}
202-
return filtered_box;
281+
282+
// Commit state
283+
fullPrevGray = gray.clone();
284+
prevGray = gray.clone();
285+
bbox = cand;
286+
float fw = float(W), fh = float(H);
287+
trackedDataById[frameId] = FrameData(
288+
frameId, 0,
289+
cand.x / fw,
290+
cand.y / fh,
291+
(cand.x + cand.width) / fw,
292+
(cand.y + cand.height) / fh
293+
);
294+
295+
return didKLT;
203296
}
204297

205298
bool CVTracker::SaveTrackedData(){

0 commit comments

Comments
 (0)