Skip to content

Commit dffaee2

Browse files
committed
add test scripts
2 parents 26da8bd + f388aed commit dffaee2

File tree

9 files changed

+26772
-26687
lines changed

9 files changed

+26772
-26687
lines changed

README.md

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,58 @@
1-
# two-stream-pytorch
1+
# PyTorch implementation of popular two-stream frameworks for video action recognition
2+
============================
3+
4+
Current release is the PyTorch implementation of the "Towards Good Practices for Very Deep Two-Stream ConvNets". You can refer to paper for more details at [Arxiv](https://arxiv.org/abs/1507.02159).
5+
6+
For future, I will add PyTorch implementation for the following papers:
7+
8+
```
9+
Temporal Segment Networks: Towards Good Practices for Deep Action Recognition,
10+
Limin Wang, Yuanjun Xiong, Zhe Wang, Yu Qiao, Dahua Lin, Xiaoou Tang, Luc Van Gool
11+
ECCV 2016
12+
13+
Deep Temporal Linear Encoding Networks
14+
Ali Diba, Vivek Sharma, Luc Van Gool
15+
https://arxiv.org/abs/1611.06678
16+
17+
Hidden Two-Stream Convolutional Networks for Action Recognition
18+
Yi Zhu, Zhenzhong Lan, Shawn Newsam, Alexander G. Hauptmann
19+
https://arxiv.org/abs/1704.00389
20+
```
21+
22+
Install
23+
=========
24+
25+
Tested on PyTorch:
26+
27+
```
28+
OS: Ubuntu 16.04
29+
Package manager: Conda
30+
Python: 3.5
31+
CUDA: 8.0
32+
```
33+
34+
Code also works for Python 2.7.
35+
36+
Training
37+
========
38+
39+
Simply run:
40+
41+
`python main_single_gpu.py DATA_PATH`
42+
43+
`DATA_PATH` is where you store RGB frames or optical flow images. Change the parameters passing to argparse as you need.
44+
45+
Testing
46+
========
47+
48+
`Will release soon.`
49+
50+
Related Projects
51+
====================
52+
53+
[TSN](https://github.com/yjxiong/temporal-segment-networks): Temporal Segment Networks: Towards Good Practices for Deep Action Recognition
54+
55+
[Hidden Two-Stream](https://github.com/bryanyzhu/Hidden-Two-Stream): Hidden Two-Stream Convolutional Networks for Action Recognition
56+
57+
58+

datasets/ucf101.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def make_dataset(root, source):
1919
sys.exit()
2020
else:
2121
clips = []
22-
with open(source) as split_f:
22+
with open(source) as split_f:
2323
data = split_f.readlines()
2424
for line in data:
2525
line_info = line.split()
@@ -46,14 +46,14 @@ def ReadSegmentRGB(path, offsets, new_height, new_width, new_length, is_color, n
4646
cv_img_origin = cv2.imread(frame_path, cv_read_flag)
4747
if cv_img_origin is None:
4848
print("Could not load file %s" % (frame_path))
49-
sys.exit()
49+
sys.exit()
5050
# TODO: error handling here
5151
if new_width > 0 and new_height > 0:
5252
cv_img = cv2.resize(cv_img_origin, (new_width, new_height), interpolation)
5353
else:
5454
cv_img = cv_img_origin
5555
cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
56-
sampled_list.append(cv_img)
56+
sampled_list.append(cv_img)
5757
clip_input = np.concatenate(sampled_list, axis=2)
5858
return clip_input
5959

@@ -62,7 +62,7 @@ def ReadSegmentFlow(path, offsets, new_height, new_width, new_length, is_color,
6262
cv_read_flag = cv2.IMREAD_COLOR # > 0
6363
else:
6464
cv_read_flag = cv2.IMREAD_GRAYSCALE # = 0
65-
interpolation = cv2.INTER_LINEAR
65+
interpolation = cv2.INTER_LINEAR
6666

6767
sampled_list = []
6868
for offset_id in range(len(offsets)):
@@ -76,50 +76,50 @@ def ReadSegmentFlow(path, offsets, new_height, new_width, new_length, is_color,
7676
cv_img_origin_y = cv2.imread(frame_path_y, cv_read_flag)
7777
if cv_img_origin_x is None or cv_img_origin_y is None:
7878
print("Could not load file %s or %s" % (frame_path_x, frame_path_y))
79-
sys.exit()
79+
sys.exit()
8080
# TODO: error handling here
8181
if new_width > 0 and new_height > 0:
8282
cv_img_x = cv2.resize(cv_img_origin_x, (new_width, new_height), interpolation)
8383
cv_img_y = cv2.resize(cv_img_origin_y, (new_width, new_height), interpolation)
8484
else:
8585
cv_img_x = cv_img_origin_x
8686
cv_img_y = cv_img_origin_y
87-
sampled_list.append(np.expand_dims(cv_img_x, 2))
88-
sampled_list.append(np.expand_dims(cv_img_y, 2))
87+
sampled_list.append(np.expand_dims(cv_img_x, 2))
88+
sampled_list.append(np.expand_dims(cv_img_y, 2))
8989

9090
clip_input = np.concatenate(sampled_list, axis=2)
9191
return clip_input
9292

9393

9494
class ucf101(data.Dataset):
9595

96-
def __init__(self,
97-
root,
98-
source,
99-
phase,
96+
def __init__(self,
97+
root,
98+
source,
99+
phase,
100100
modality,
101101
name_pattern=None,
102-
is_color=True,
102+
is_color=True,
103103
num_segments=1,
104-
new_length=1,
104+
new_length=1,
105105
new_width=0,
106106
new_height=0,
107-
transform=None,
108-
target_transform=None,
107+
transform=None,
108+
target_transform=None,
109109
video_transform=None):
110110

111111
classes, class_to_idx = find_classes(root)
112112
clips = make_dataset(root, source)
113-
113+
114114
if len(clips) == 0:
115115
raise(RuntimeError("Found 0 video clips in subfolders of: " + root + "\n"
116116
"Check your data directory."))
117-
117+
118118
self.root = root
119119
self.source = source
120120
self.phase = phase
121121
self.modality = modality
122-
122+
123123
self.classes = classes
124124
self.class_to_idx = class_to_idx
125125
self.clips = clips
@@ -131,13 +131,13 @@ def __init__(self,
131131
self.name_pattern = "image_%04d.jpg"
132132
elif self.modality == "flow":
133133
self.name_pattern = "flow_%s_%04d.jpg"
134-
134+
135135
self.is_color = is_color
136136
self.num_segments = num_segments
137137
self.new_length = new_length
138138
self.new_width = new_width
139139
self.new_height = new_height
140-
140+
141141
self.transform = transform
142142
self.target_transform = target_transform
143143
self.video_transform = video_transform
@@ -165,24 +165,24 @@ def __getitem__(self, index):
165165

166166
if self.modality == "rgb":
167167
clip_input = ReadSegmentRGB(path,
168-
offsets,
168+
offsets,
169169
self.new_height,
170-
self.new_width,
171-
self.new_length,
172-
self.is_color,
170+
self.new_width,
171+
self.new_length,
172+
self.is_color,
173173
self.name_pattern
174174
)
175175
elif self.modality == "flow":
176176
clip_input = ReadSegmentFlow(path,
177-
offsets,
177+
offsets,
178178
self.new_height,
179-
self.new_width,
180-
self.new_length,
181-
self.is_color,
179+
self.new_width,
180+
self.new_length,
181+
self.is_color,
182182
self.name_pattern
183183
)
184184
else:
185-
print("No such modality %s" % (self.modality))
185+
print("No such modality %s" % (self.modality))
186186

187187
if self.transform is not None:
188188
clip_input = self.transform(clip_input)

main_single_gpu.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
help='path to dataset')
2929
parser.add_argument('--settings', metavar='DIR', default='./settings',
3030
help='path to datset setting files')
31-
parser.add_argument('--modality', '-m', metavar='MODALITY', default='rgb',
31+
parser.add_argument('--modality', '-m', metavar='MODALITY', default='flow',
3232
choices=["rgb", "flow"],
3333
help='modality: rgb | flow')
3434
parser.add_argument('--dataset', '-d', default='ucf101',
@@ -43,21 +43,25 @@
4343
help='which split of data to work on (default: 1)')
4444
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
4545
help='number of data loading workers (default: 4)')
46+
<<<<<<< HEAD
4647
parser.add_argument('--epochs', default=250, type=int, metavar='N',
48+
=======
49+
parser.add_argument('--epochs', default=750, type=int, metavar='N',
50+
>>>>>>> f388aed3118e80cf51805afefbc78da8405f9e6a
4751
help='number of total epochs to run')
4852
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
4953
help='manual epoch number (useful on restarts)')
5054
parser.add_argument('-b', '--batch-size', default=25, type=int,
5155
metavar='N', help='mini-batch size (default: 50)')
5256
parser.add_argument('--iter-size', default=5, type=int,
5357
metavar='I', help='iter size as in Caffe to reduce memory usage (default: 5)')
54-
parser.add_argument('--new_length', default=1, type=int,
58+
parser.add_argument('--new_length', default=10, type=int,
5559
metavar='N', help='length of sampled video frames (default: 1)')
5660
parser.add_argument('--new_width', default=340, type=int,
5761
metavar='N', help='resize width (default: 340)')
5862
parser.add_argument('--new_height', default=256, type=int,
5963
metavar='N', help='resize height (default: 256)')
60-
parser.add_argument('--lr', '--learning-rate', default=0.001, type=float,
64+
parser.add_argument('--lr', '--learning-rate', default=0.005, type=float,
6165
metavar='LR', help='initial learning rate')
6266
parser.add_argument('--lr_steps', default=[100, 200], type=float, nargs="+",
6367
metavar='LRSteps', help='epochs to decay learning rate by 10')
@@ -87,23 +91,24 @@ def main():
8791
model = build_model()
8892
print("Model %s is loaded. " % (args.arch))
8993

90-
if not os.path.exists(args.resume):
91-
os.makedirs(args.resume)
92-
print("Saving everything to directory %s." % (args.resume))
93-
9494
# define loss function (criterion) and optimizer
9595
criterion = nn.CrossEntropyLoss().cuda()
9696

9797
optimizer = torch.optim.SGD(model.parameters(), args.lr,
9898
momentum=args.momentum,
9999
weight_decay=args.weight_decay)
100100

101+
if not os.path.exists(args.resume):
102+
os.makedirs(args.resume)
103+
print("Saving everything to directory %s." % (args.resume))
104+
101105
cudnn.benchmark = True
102106

103107
# Data transforming
104108
if args.modality == "rgb":
105109
is_color = True
106110
scale_ratios = [1.0, 0.875, 0.75, 0.66]
111+
<<<<<<< HEAD
107112
clip_mean = [0.485, 0.456, 0.406] * args.new_length
108113
clip_std = [0.229, 0.224, 0.225] * args.new_length
109114
elif args.modality == "flow":
@@ -116,7 +121,21 @@ def main():
116121

117122
normalize = video_transforms.Normalize(mean=clip_mean,
118123
std=clip_std)
124+
=======
125+
is_color = True
126+
clip_mean = [0.485, 0.456, 0.406] * args.new_length
127+
clip_std = [0.229, 0.224, 0.225] * args.new_length
128+
elif args.modality == "flow":
129+
scale_ratios = [1.0, 0.875, 0.75]
130+
is_color = False
131+
clip_mean = [0.5, 0.5] * args.new_length
132+
clip_std = [0.5, 0.5] * args.new_length
133+
else:
134+
print("No such modality. Only rgb and flow supported.")
135+
>>>>>>> f388aed3118e80cf51805afefbc78da8405f9e6a
119136

137+
normalize = video_transforms.Normalize(mean=clip_mean,
138+
std=clip_std)
120139
train_transform = video_transforms.Compose([
121140
# video_transforms.Scale((256)),
122141
video_transforms.MultiScaleCrop((224, 224), scale_ratios),
@@ -258,6 +277,10 @@ def train(train_loader, model, criterion, optimizer, epoch):
258277
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
259278
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
260279
epoch, i+1, len(train_loader)+1, batch_time=batch_time, loss=losses, top1=top1))
280+
<<<<<<< HEAD
281+
=======
282+
283+
>>>>>>> f388aed3118e80cf51805afefbc78da8405f9e6a
261284

262285
def validate(val_loader, model, criterion):
263286
batch_time = AverageMeter()
@@ -329,9 +352,14 @@ def update(self, val, n=1):
329352

330353
def adjust_learning_rate(optimizer, epoch):
331354
"""Sets the learning rate to the initial LR decayed by 10 every 150 epochs"""
355+
<<<<<<< HEAD
332356
decay = 0.1 ** (sum(epoch >= np.array(args.lr_steps)))
333357
lr = args.lr * decay
334358
print("Current learning rate is %4.6f:" % lr)
359+
=======
360+
lr = args.lr * (0.1 ** (epoch // 250))
361+
print(lr)
362+
>>>>>>> f388aed3118e80cf51805afefbc78da8405f9e6a
335363
for param_group in optimizer.param_groups:
336364
param_group['lr'] = lr
337365

models/flow_vgg16.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def change_key_names(old_params, in_channels):
9191
new_params[layer_key] = old_params[layer_key]
9292
layer_count += 1
9393
# print(layer_key, new_params[layer_key].size())
94-
94+
9595
return new_params
9696

9797
def flow_vgg16(pretrained=False, **kwargs):
@@ -112,7 +112,7 @@ def flow_vgg16(pretrained=False, **kwargs):
112112
# 1. filter out unnecessary keys
113113
new_pretrained_dict = {k: v for k, v in new_pretrained_dict.items() if k in model_dict}
114114
# 2. overwrite entries in the existing state dict
115-
model_dict.update(new_pretrained_dict)
115+
model_dict.update(new_pretrained_dict)
116116
# 3. load the new state dict
117117
model.load_state_dict(model_dict)
118118

0 commit comments

Comments
 (0)