Skip to content

Commit efe37f9

Browse files
Aapo Kyrolafacebook-github-bot
authored andcommitted
support appending net and converting them
Summary: As per rushabhmshah99 request: he wants to append a pre-trained model (without training that) to the model. So added data_parallel_model.ConvertNetForDevice() to enable that. The unit test shows example how to use this with AppendNet, and I also added a blurb to the function. Differential Revision: D5503335 fbshipit-source-id: b2a5db5c1739dc97f46dd0d7606ed555d99255b8
1 parent a61d04a commit efe37f9

File tree

2 files changed

+92
-1
lines changed

2 files changed

+92
-1
lines changed

caffe2/python/data_parallel_model.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,44 @@ def Synchronize(model, timeout_sec=30):
563563
workspace.RunNetOnce(barrier_net)
564564

565565

566+
def ConvertNetForDevice(net, device=None):
567+
'''
568+
Converts all blobs in the net to have namescope gpu_X, and correct
569+
device scope. You can use this to enable AppendNet with a
570+
forward_pass_builder_fun:
571+
572+
def builder_fun(model):
573+
...
574+
model.net.AppendNet(
575+
data_parallel_model.ConvertNetForDevice(othermodel.net))
576+
model.param_init_net.AppendNet(
577+
data_parallel_model.ConvertNetForDevice(othermodel.param_init_net))
578+
'''
579+
mnet = copy.deepcopy(net)
580+
581+
if device is None:
582+
device = scope.CurrentDeviceScope()
583+
584+
device_prefix = "gpu" if device.device_type == caffe2_pb2.CUDA else "cpu"
585+
586+
namescope = "{}_{}/".format(device_prefix, device.cuda_gpu_id)
587+
for op in mnet.Proto().op:
588+
if "RecurrentNetwork" in op.type:
589+
raise("RecurrentNetwork conversion not yet supported")
590+
for i, inputb in enumerate(op.input):
591+
op.input[i] = namescope + inputb
592+
for i, outputb in enumerate(op.output):
593+
op.output[i] = namescope + outputb
594+
for i, blob in enumerate(op.control_input):
595+
op.control_input[i] = namescope + blob
596+
op.device_option.CopyFrom(device)
597+
for i, einp in enumerate(mnet.Proto().external_input):
598+
mnet.Proto().external_input[i] = namescope + einp
599+
for i, eoutp in enumerate(mnet.Proto().external_output):
600+
mnet.Proto().external_output[i] = namescope + eoutp
601+
return mnet
602+
603+
566604
def _ForEachGPU(gpu_ids, f, scoped=False, *args, **kwargs):
567605
for gpu_id in gpu_ids:
568606
device_opt = core.DeviceOption(caffe2_pb2.CUDA, gpu_id)

caffe2/python/data_parallel_model_test.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from multiprocessing import Process, Queue
1010
from caffe2.proto import caffe2_pb2
1111
from caffe2.python import core, cnn, data_parallel_model, dyndep, optimizer, \
12-
rnn_cell, workspace
12+
rnn_cell, workspace, model_helper, brew
1313
from caffe2.python.test_util import TestCase
1414
from future.utils import viewkeys
1515

@@ -208,6 +208,59 @@ def add_optimizer(model):
208208
self.assertFalse(core.BlobReference("cpu_1/data") in checkpoint_params)
209209
self.assertTrue(core.BlobReference("optimizer_iteration") in checkpoint_params)
210210

211+
def test_net_conversion_and_append_net(self):
212+
other = model_helper.ModelHelper()
213+
fc1 = brew.fc(other, "data", "other_fc1", dim_in=3*227*227, dim_out=10)
214+
fc2 = brew.fc(other, fc1, "other_fc2", dim_in=10, dim_out=10)
215+
brew.fc(other, fc2, "other_fc3", dim_in=10, dim_out=10)
216+
217+
def add_input_ops(model):
218+
model.net.UniformFill([], ["data"], shape=[4, 227, 227, 3])
219+
model.net.UniformFill([], ["label"], shape=[4])
220+
221+
def add_model_ops(model, loss_scale):
222+
model.NHWC2NCHW("data", "data_nchw")
223+
model.Conv("data_nchw", 'conv1', 3, 64,
224+
weight_init=("MSRAFill", {}), kernel=7,
225+
stride=2, pad=3, no_bias=0)
226+
model.SpatialBN('conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3)
227+
model.Relu('conv1_spatbn_relu', 'conv1_spatbn_relu')
228+
model.MaxPool('conv1_spatbn_relu', 'pool1', kernel=3, stride=2)
229+
model.FC('pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=10)
230+
231+
# Append the net and param_init_net of the other model
232+
appendnet = data_parallel_model.ConvertNetForDevice(other.net)
233+
model.net.AppendNet(appendnet)
234+
235+
model.param_init_net.AppendNet(
236+
data_parallel_model.ConvertNetForDevice(other.param_init_net))
237+
238+
model.Sigmoid('fc', 'fc_sigm')
239+
model.Softmax('fc_sigm', 'softmax')
240+
loss = model.AveragedLoss('softmax', 'loss')
241+
return [loss]
242+
243+
def add_optimizer(model):
244+
optimizer.build_sgd(model, 0.1, policy="fixed", momentum=0.9)
245+
246+
model = cnn.CNNModelHelper(
247+
order="NCHW",
248+
name="test",
249+
)
250+
data_parallel_model.Parallelize_CPU(
251+
model,
252+
input_builder_fun=add_input_ops,
253+
forward_pass_builder_fun=add_model_ops,
254+
optimizer_builder_fun=add_optimizer,
255+
devices=range(4)
256+
)
257+
258+
# Just create and run net and confirm no exception is thrown
259+
workspace.RunNetOnce(model.param_init_net)
260+
workspace.CreateNet(model.net)
261+
workspace.RunNet(model.net)
262+
263+
211264
def test_synchronization_barrier(self):
212265

213266
def run(comm_rank, comm_size, tmpdir):

0 commit comments

Comments
 (0)