|
26 | 26 | import math
|
27 | 27 | import sys
|
28 | 28 |
|
29 |
| -batch_size = 32 |
30 |
| -epoch = 120 |
31 | 29 | IMAGENET1000 = 1281167
|
32 | 30 | base_lr = 0.1
|
33 | 31 | momentum_rate = 0.9
|
34 | 32 | l2_decay = 1e-4
|
35 | 33 |
|
36 | 34 |
|
37 | 35 | def parse_args():
|
38 |
| - parser = argparse.ArgumentParser("Training for Mnist.") |
| 36 | + parser = argparse.ArgumentParser("Training for Resnet.") |
39 | 37 | parser.add_argument(
|
40 | 38 | "--use_data_parallel",
|
41 | 39 | type=ast.literal_eval,
|
42 | 40 | default=False,
|
43 | 41 | help="The flag indicating whether to shuffle instances in each pass.")
|
| 42 | + parser.add_argument("-e", "--epoch", default=120, type=int, help="set epoch") |
| 43 | + parser.add_argument("-b", "--batch_size", default=32, type=int, help="set epoch") |
| 44 | + parser.add_argument("--ce", action="store_true", help="run ce") |
44 | 45 | args = parser.parse_args()
|
45 | 46 | return args
|
46 | 47 |
|
47 | 48 |
|
48 | 49 | args = parse_args()
|
49 |
| - |
| 50 | +batch_size = args.batch_size |
50 | 51 |
|
51 | 52 | def optimizer_setting():
|
52 | 53 |
|
@@ -263,16 +264,28 @@ def eval(model, data):
|
263 | 264 | print("test | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
|
264 | 265 | ( batch_id, total_loss / total_sample, \
|
265 | 266 | total_acc1 / total_sample, total_acc5 / total_sample))
|
| 267 | + if args.ce: |
| 268 | + print("kpis\ttest_acc1\t%0.3f" % (total_acc1 / total_sample)) |
| 269 | + print("kpis\ttest_acc5\t%0.3f" % (total_acc5 / total_sample)) |
| 270 | + print("kpis\ttest_loss\t%0.3f" % (total_loss / total_sample)) |
266 | 271 | print("final eval loss %0.3f acc1 %0.3f acc5 %0.3f" % \
|
267 | 272 | (total_loss / total_sample, \
|
268 | 273 | total_acc1 / total_sample, total_acc5 / total_sample))
|
269 | 274 |
|
270 | 275 |
|
271 | 276 | def train_resnet():
|
| 277 | + epoch = args.epoch |
272 | 278 | trainer_count = fluid.dygraph.parallel.Env().nranks
|
273 | 279 | place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
|
274 | 280 | if args.use_data_parallel else fluid.CUDAPlace(0)
|
275 | 281 | with fluid.dygraph.guard(place):
|
| 282 | + if args.ce: |
| 283 | + print("ce mode") |
| 284 | + seed = 33 |
| 285 | + np.random.seed(seed) |
| 286 | + fluid.default_startup_program().random_seed = seed |
| 287 | + fluid.default_main_program().random_seed = seed |
| 288 | + |
276 | 289 | if args.use_data_parallel:
|
277 | 290 | strategy = fluid.dygraph.parallel.prepare_context()
|
278 | 291 |
|
@@ -340,24 +353,27 @@ def train_resnet():
|
340 | 353 | optimizer.minimize(avg_loss)
|
341 | 354 | resnet.clear_gradients()
|
342 | 355 |
|
343 |
| - framework._dygraph_tracer_._clear_ops() |
344 | 356 |
|
345 | 357 | total_loss += dy_out
|
346 | 358 | total_acc1 += acc_top1.numpy()
|
347 | 359 | total_acc5 += acc_top5.numpy()
|
348 | 360 | total_sample += 1
|
349 |
| - |
350 | 361 | #print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out))
|
351 | 362 | if batch_id % 10 == 0:
|
352 | 363 | print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
|
353 | 364 | ( eop, batch_id, total_loss / total_sample, \
|
354 | 365 | total_acc1 / total_sample, total_acc5 / total_sample))
|
355 | 366 |
|
| 367 | + if args.ce: |
| 368 | + print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample)) |
| 369 | + print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample)) |
| 370 | + print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample)) |
356 | 371 | print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
|
357 | 372 | (eop, batch_id, total_loss / total_sample, \
|
358 | 373 | total_acc1 / total_sample, total_acc5 / total_sample))
|
359 | 374 | resnet.eval()
|
360 | 375 | eval(resnet, test_reader)
|
| 376 | + fluid.dygraph.save_persistables(resnet.state_dict(), 'resnet_params') |
361 | 377 |
|
362 | 378 |
|
363 | 379 | if __name__ == '__main__':
|
|
0 commit comments