Skip to content

Commit 26da8bd

Browse files
committed
add test scripts
1 parent 5ca01a1 commit 26da8bd

File tree

10 files changed

+8103
-46
lines changed

10 files changed

+8103
-46
lines changed

main_single_gpu.py

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
import time
44
import argparse
55
import shutil
6+
import numpy as np
67

78
import torch
89
import torch.nn as nn
910
import torch.nn.parallel
1011
import torch.backends.cudnn as cudnn
1112
import torch.optim
1213
import torch.utils.data
14+
import torchvision.models as basic_models
1315

1416
import video_transforms
1517
import models
@@ -24,28 +26,28 @@
2426
parser = argparse.ArgumentParser(description='PyTorch Two-Stream Action Recognition')
2527
parser.add_argument('data', metavar='DIR',
2628
help='path to dataset')
27-
parser.add_argument('--settings', metavar='DIR', default='./settings',
29+
parser.add_argument('--settings', metavar='DIR', default='./settings',
2830
help='path to datset setting files')
2931
parser.add_argument('--modality', '-m', metavar='MODALITY', default='rgb',
3032
choices=["rgb", "flow"],
3133
help='modality: rgb | flow')
3234
parser.add_argument('--dataset', '-d', default='ucf101',
3335
choices=["ucf101", "hmdb51"],
3436
help='dataset: ucf101 | hmdb51')
35-
parser.add_argument('--arch', '-a', metavar='ARCH', default='vgg16',
37+
parser.add_argument('--arch', '-a', metavar='ARCH', default='rgb_vgg16',
3638
choices=model_names,
3739
help='model architecture: ' +
3840
' | '.join(model_names) +
39-
' (default: vgg16)')
41+
' (default: rgb_vgg16)')
4042
parser.add_argument('-s', '--split', default=1, type=int, metavar='S',
4143
help='which split of data to work on (default: 1)')
4244
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
4345
help='number of data loading workers (default: 4)')
44-
parser.add_argument('--epochs', default=400, type=int, metavar='N',
46+
parser.add_argument('--epochs', default=250, type=int, metavar='N',
4547
help='number of total epochs to run')
4648
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
4749
help='manual epoch number (useful on restarts)')
48-
parser.add_argument('-b', '--batch-size', default=50, type=int,
50+
parser.add_argument('-b', '--batch-size', default=25, type=int,
4951
metavar='N', help='mini-batch size (default: 50)')
5052
parser.add_argument('--iter-size', default=5, type=int,
5153
metavar='I', help='iter size as in Caffe to reduce memory usage (default: 5)')
@@ -57,20 +59,24 @@
5759
metavar='N', help='resize height (default: 256)')
5860
parser.add_argument('--lr', '--learning-rate', default=0.001, type=float,
5961
metavar='LR', help='initial learning rate')
62+
parser.add_argument('--lr_steps', default=[100, 200], type=float, nargs="+",
63+
metavar='LRSteps', help='epochs to decay learning rate by 10')
6064
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
6165
help='momentum')
62-
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
66+
parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float,
6367
metavar='W', help='weight decay (default: 1e-4)')
64-
parser.add_argument('--print-freq', default=45, type=int,
68+
parser.add_argument('--print-freq', default=50, type=int,
6569
metavar='N', help='print frequency (default: 20)')
66-
parser.add_argument('--save-freq', default=40, type=int,
70+
parser.add_argument('--save-freq', default=25, type=int,
6771
metavar='N', help='save frequency (default: 20)')
6872
parser.add_argument('--resume', default='./checkpoints', type=str, metavar='PATH',
6973
help='path to latest checkpoint (default: none)')
7074
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
7175
help='evaluate model on validation set')
7276

7377
best_prec1 = 0
78+
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
79+
os.environ["CUDA_VISIBLE_DEVICES"]="0"
7480

7581
def main():
7682
global args, best_prec1
@@ -79,7 +85,7 @@ def main():
7985
# create model
8086
print("Building model ... ")
8187
model = build_model()
82-
print("Model %s is loaded. " % (args.modality + "_" + args.arch))
88+
print("Model %s is loaded. " % (args.arch))
8389

8490
if not os.path.exists(args.resume):
8591
os.makedirs(args.resume)
@@ -95,26 +101,21 @@ def main():
95101
cudnn.benchmark = True
96102

97103
# Data transforming
98-
# clip_mean = [0.485, 0.456, 0.406] * args.new_length
99-
# clip_std = [0.229, 0.224, 0.225] * args.new_length
100-
clip_mean = [0.5, 0.5, 0.5] * args.new_length
101-
clip_std = [0.5, 0.5, 0.5] * args.new_length
102-
normalize = video_transforms.Normalize(mean=clip_mean,
103-
std=clip_std)
104-
105104
if args.modality == "rgb":
105+
is_color = True
106106
scale_ratios = [1.0, 0.875, 0.75, 0.66]
107-
elif args.modality == "flow":
107+
clip_mean = [0.485, 0.456, 0.406] * args.new_length
108+
clip_std = [0.229, 0.224, 0.225] * args.new_length
109+
elif args.modality == "flow":
110+
is_color = False
108111
scale_ratios = [1.0, 0.875, 0.75]
112+
clip_mean = [0.5, 0.5] * args.new_length
113+
clip_std = [0.226, 0.226] * args.new_length
109114
else:
110115
print("No such modality. Only rgb and flow supported.")
111116

112-
if args.modality == "rgb":
113-
is_color = True
114-
elif args.modality == "flow":
115-
is_color = False
116-
else:
117-
print("No such modality. Only rgb and flow supported.")
117+
normalize = video_transforms.Normalize(mean=clip_mean,
118+
std=clip_std)
118119

119120
train_transform = video_transforms.Compose([
120121
# video_transforms.Scale((256)),
@@ -130,29 +131,29 @@ def main():
130131
video_transforms.ToTensor(),
131132
normalize,
132133
])
133-
134-
# data loading
134+
135+
# data loading
135136
train_setting_file = "train_%s_split%d.txt" % (args.modality, args.split)
136137
train_split_file = os.path.join(args.settings, args.dataset, train_setting_file)
137138
val_setting_file = "val_%s_split%d.txt" % (args.modality, args.split)
138139
val_split_file = os.path.join(args.settings, args.dataset, val_setting_file)
139140
if not os.path.exists(train_split_file) or not os.path.exists(val_split_file):
140141
print("No split file exists in %s directory. Preprocess the dataset first" % (args.settings))
141142

142-
train_dataset = datasets.__dict__[args.dataset](root=args.data,
143-
source=train_split_file,
144-
phase="train",
143+
train_dataset = datasets.__dict__[args.dataset](root=args.data,
144+
source=train_split_file,
145+
phase="train",
145146
modality=args.modality,
146-
is_color=is_color,
147+
is_color=is_color,
147148
new_length=args.new_length,
148149
new_width=args.new_width,
149150
new_height=args.new_height,
150151
video_transform=train_transform)
151-
val_dataset = datasets.__dict__[args.dataset](root=args.data,
152-
source=val_split_file,
153-
phase="val",
154-
modality=args.modality,
155-
is_color=is_color,
152+
val_dataset = datasets.__dict__[args.dataset](root=args.data,
153+
source=val_split_file,
154+
phase="val",
155+
modality=args.modality,
156+
is_color=is_color,
156157
new_length=args.new_length,
157158
new_width=args.new_width,
158159
new_height=args.new_height,
@@ -182,7 +183,9 @@ def main():
182183
train(train_loader, model, criterion, optimizer, epoch)
183184

184185
# evaluate on validation set
185-
prec1 = validate(val_loader, model, criterion)
186+
prec1 = 0.0
187+
if (epoch + 1) % args.save_freq == 0:
188+
prec1 = validate(val_loader, model, criterion)
186189

187190
# remember best prec@1 and save checkpoint
188191
is_best = prec1 > best_prec1
@@ -200,8 +203,7 @@ def main():
200203

201204
def build_model():
202205

203-
model_name = args.modality + "_" + args.arch
204-
model = models.__dict__[model_name](pretrained=True, num_classes=101)
206+
model = models.__dict__[args.arch](pretrained=True, num_classes=101)
205207
model.cuda()
206208
return model
207209

@@ -256,7 +258,6 @@ def train(train_loader, model, criterion, optimizer, epoch):
256258
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
257259
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
258260
epoch, i+1, len(train_loader)+1, batch_time=batch_time, loss=losses, top1=top1))
259-
260261

261262
def validate(val_loader, model, criterion):
262263
batch_time = AverageMeter()
@@ -302,15 +303,13 @@ def validate(val_loader, model, criterion):
302303

303304
return top1.avg
304305

305-
306306
def save_checkpoint(state, is_best, filename, resume_path):
307307
cur_path = os.path.join(resume_path, filename)
308308
best_path = os.path.join(resume_path, 'model_best.pth.tar')
309309
torch.save(state, cur_path)
310310
if is_best:
311311
shutil.copyfile(cur_path, best_path)
312312

313-
314313
class AverageMeter(object):
315314
"""Computes and stores the average and current value"""
316315
def __init__(self):
@@ -328,14 +327,13 @@ def update(self, val, n=1):
328327
self.count += n
329328
self.avg = self.sum / self.count
330329

331-
332330
def adjust_learning_rate(optimizer, epoch):
333331
"""Sets the learning rate to the initial LR decayed by 10 every 150 epochs"""
334-
lr = args.lr * (0.1 ** (epoch // 150))
335-
print(lr)
332+
decay = 0.1 ** (sum(epoch >= np.array(args.lr_steps)))
333+
lr = args.lr * decay
334+
print("Current learning rate is %4.6f:" % lr)
336335
for param_group in optimizer.param_groups:
337336
param_group['lr'] = lr
338-
# param_group['lr'] = param_group['lr']/2
339337

340338
def accuracy(output, target, topk=(1,)):
341339
"""Computes the precision@k for the specified values of k"""

models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
from .rgb_vgg16 import *
22
from .flow_vgg16 import *
3+
from .rgb_resnet import *
4+
from .flow_resnet import *

models/flow_vgg16.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ def flow_vgg16(pretrained=False, **kwargs):
101101
pretrained (bool): If True, returns a model pre-trained on ImageNet
102102
"""
103103
model = VGG(make_layers(cfg['D']), **kwargs)
104-
in_channels = 20
104+
# TODO: hardcoded for now for 10 optical flow images, set it as an argument later
105+
in_channels = 20
105106
if pretrained:
106107
# model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
107108
pretrained_dict = model_zoo.load_url(model_urls['vgg16'])
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
'''
2+
A sample function for classification using spatial network
3+
Customize as needed:
4+
e.g. num_categories, layer for feature extraction, batch_size
5+
'''
6+
7+
import os
8+
import sys
9+
import numpy as np
10+
import math
11+
import cv2
12+
import scipy.io as sio
13+
14+
import torch
15+
import torch.nn as nn
16+
import torch.nn.parallel
17+
import torch.backends.cudnn as cudnn
18+
import torch.optim
19+
import torch.utils.data
20+
import torchvision.transforms as transforms
21+
import torchvision.datasets as datasets
22+
import torchvision.models as models
23+
24+
sys.path.insert(0, "../../")
25+
import video_transforms
26+
27+
def VideoSpatialPrediction(
28+
vid_name,
29+
net,
30+
num_categories,
31+
start_frame=0,
32+
num_frames=0,
33+
num_samples=25
34+
):
35+
36+
if num_frames == 0:
37+
imglist = os.listdir(vid_name)
38+
duration = len(imglist)
39+
# print(duration)
40+
else:
41+
duration = num_frames
42+
43+
clip_mean = [0.485, 0.456, 0.406]
44+
clip_std = [0.229, 0.224, 0.225]
45+
normalize = video_transforms.Normalize(mean=clip_mean,
46+
std=clip_std)
47+
val_transform = video_transforms.Compose([
48+
video_transforms.ToTensor(),
49+
normalize,
50+
])
51+
52+
# selection
53+
step = int(math.floor((duration-1)/(num_samples-1)))
54+
dims = (256,340,3,num_samples)
55+
rgb = np.zeros(shape=dims, dtype=np.float64)
56+
rgb_flip = np.zeros(shape=dims, dtype=np.float64)
57+
58+
for i in range(num_samples):
59+
img_file = os.path.join(vid_name, 'image_{0:04d}.jpg'.format(i*step+1))
60+
img = cv2.imread(img_file, cv2.IMREAD_UNCHANGED)
61+
img = cv2.resize(img, dims[1::-1])
62+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
63+
rgb[:,:,:,i] = img
64+
rgb_flip[:,:,:,i] = img[:,::-1,:]
65+
66+
# crop
67+
rgb_1 = rgb[:224, :224, :,:]
68+
rgb_2 = rgb[:224, -224:, :,:]
69+
rgb_3 = rgb[16:240, 60:284, :,:]
70+
rgb_4 = rgb[-224:, :224, :,:]
71+
rgb_5 = rgb[-224:, -224:, :,:]
72+
rgb_f_1 = rgb_flip[:224, :224, :,:]
73+
rgb_f_2 = rgb_flip[:224, -224:, :,:]
74+
rgb_f_3 = rgb_flip[16:240, 60:284, :,:]
75+
rgb_f_4 = rgb_flip[-224:, :224, :,:]
76+
rgb_f_5 = rgb_flip[-224:, -224:, :,:]
77+
78+
rgb = np.concatenate((rgb_1,rgb_2,rgb_3,rgb_4,rgb_5,rgb_f_1,rgb_f_2,rgb_f_3,rgb_f_4,rgb_f_5), axis=3)
79+
80+
_, _, _, c = rgb.shape
81+
rgb_list = []
82+
for c_index in range(c):
83+
cur_img = rgb[:,:,:,c_index].squeeze()
84+
cur_img_tensor = val_transform(cur_img)
85+
rgb_list.append(np.expand_dims(cur_img_tensor.numpy(), 0))
86+
87+
rgb_np = np.concatenate(rgb_list,axis=0)
88+
# print(rgb_np.shape)
89+
batch_size = 25
90+
prediction = np.zeros((num_categories,rgb.shape[3]))
91+
num_batches = int(math.ceil(float(rgb.shape[3])/batch_size))
92+
93+
for bb in range(num_batches):
94+
span = range(batch_size*bb, min(rgb.shape[3],batch_size*(bb+1)))
95+
input_data = rgb_np[span,:,:,:]
96+
imgDataTensor = torch.from_numpy(input_data).type(torch.FloatTensor).cuda()
97+
imgDataVar = torch.autograd.Variable(imgDataTensor)
98+
output = net(imgDataVar)
99+
result = output.data.cpu().numpy()
100+
prediction[:, span] = np.transpose(result)
101+
102+
return prediction

0 commit comments

Comments
 (0)