lee35460
diff --git a/‎references/classification/README.md‎
Lines changed: 29 additions & 0 deletions b/‎references/classification/README.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎references/classification/train.py‎
Lines changed: 33 additions & 27 deletions b/‎references/classification/train.py‎
Lines changed: 33 additions & 27 deletions
@@ -28,3 +28,32 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
      --model mobilenet_v2 --epochs 300 --lr 0.045 --wd 0.00004\
      --lr-step-size 1 --lr-gamma 0.98
 ```
+
+## Quantized
+
+### Parameters used for generating quantized models:
+
+For all post training quantized models (All quantized models except mobilenet-v2), the settings are:
+
+1. num_calibration_batches: 32
+2. num_workers: 16
+3. batch_size: 32
+4. eval_batch_size: 128
+5. backend: 'fbgemm'
+
+For Mobilenet-v2, the model was trained with quantization aware training, the settings used are:
+1. num_workers: 16
+2. batch_size: 32
+3. eval_batch_size: 128
+4. backend: 'qnnpack'
+5. learning-rate: 0.0001
+6. num_epochs: 90
+7. num_observer_update_epochs:4
+8. num_batch_norm_update_epochs:3
+9. momentum: 0.9
+10. lr_step_size:30
+11. lr_gamma: 0.1
+
+Training converges at about 10 epochs.
+
+For post training quant, device is set to CPU. For training, the device is set to CUDA
@@ -47,12 +47,12 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, pri
         metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time))
 
 
-def evaluate(model, criterion, data_loader, device):
+def evaluate(model, criterion, data_loader, device, print_freq=100):
     model.eval()
     metric_logger = utils.MetricLogger(delimiter="  ")
     header = 'Test:'
     with torch.no_grad():
-        for image, target in metric_logger.log_every(data_loader, 100, header):
+        for image, target in metric_logger.log_every(data_loader, print_freq, header):
             image = image.to(device, non_blocking=True)
             target = target.to(device, non_blocking=True)
             output = model(image)
@@ -81,35 +81,16 @@ def _get_cache_path(filepath):
     return cache_path
 
 
-def main(args):
-    if args.apex:
-        if sys.version_info < (3, 0):
-            raise RuntimeError("Apex currently only supports Python 3. Aborting.")
-        if amp is None:
-            raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex "
-                               "to enable mixed-precision training.")
-
-    if args.output_dir:
-        utils.mkdir(args.output_dir)
-
-    utils.init_distributed_mode(args)
-    print(args)
-
-    device = torch.device(args.device)
-
-    torch.backends.cudnn.benchmark = True
-
+def load_data(traindir, valdir, cache_dataset, distributed):
     # Data loading code
     print("Loading data")
-    traindir = os.path.join(args.data_path, 'train')
-    valdir = os.path.join(args.data_path, 'val')
     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
 
     print("Loading training data")
     st = time.time()
     cache_path = _get_cache_path(traindir)
-    if args.cache_dataset and os.path.exists(cache_path):
+    if cache_dataset and os.path.exists(cache_path):
         # Attention, as the transforms are also cached!
         print("Loading dataset_train from {}".format(cache_path))
         dataset, _ = torch.load(cache_path)
@@ -122,15 +103,15 @@ def main(args):
                 transforms.ToTensor(),
                 normalize,
             ]))
-        if args.cache_dataset:
+        if cache_dataset:
             print("Saving dataset_train to {}".format(cache_path))
             utils.mkdir(os.path.dirname(cache_path))
             utils.save_on_master((dataset, traindir), cache_path)
     print("Took", time.time() - st)
 
     print("Loading validation data")
     cache_path = _get_cache_path(valdir)
-    if args.cache_dataset and os.path.exists(cache_path):
+    if cache_dataset and os.path.exists(cache_path):
         # Attention, as the transforms are also cached!
         print("Loading dataset_test from {}".format(cache_path))
         dataset_test, _ = torch.load(cache_path)
@@ -143,19 +124,44 @@ def main(args):
                 transforms.ToTensor(),
                 normalize,
             ]))
-        if args.cache_dataset:
+        if cache_dataset:
             print("Saving dataset_test to {}".format(cache_path))
             utils.mkdir(os.path.dirname(cache_path))
             utils.save_on_master((dataset_test, valdir), cache_path)
 
     print("Creating data loaders")
-    if args.distributed:
+    if distributed:
         train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
         test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
     else:
         train_sampler = torch.utils.data.RandomSampler(dataset)
         test_sampler = torch.utils.data.SequentialSampler(dataset_test)
 
+    return dataset, dataset_test, train_sampler, test_sampler
+
+
+def main(args):
+    if args.apex:
+        if sys.version_info < (3, 0):
+            raise RuntimeError("Apex currently only supports Python 3. Aborting.")
+        if amp is None:
+            raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex "
+                               "to enable mixed-precision training.")
+
+    if args.output_dir:
+        utils.mkdir(args.output_dir)
+
+    utils.init_distributed_mode(args)
+    print(args)
+
+    device = torch.device(args.device)
+
+    torch.backends.cudnn.benchmark = True
+
+    train_dir = os.path.join(args.data_path, 'train')
+    val_dir = os.path.join(args.data_path, 'val')
+    dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir,
+                                                                   args.cache_dataset, args.distributed)
     data_loader = torch.utils.data.DataLoader(
         dataset, batch_size=args.batch_size,
         sampler=train_sampler, num_workers=args.workers, pin_memory=True)