Skip to content

Commit 9bb2f67

Browse files
authored
add int8 quantization (wang-xinyu#612)
* add detr * Update README.md * add int8 quantization fix some known bugs
1 parent 14e220f commit 9bb2f67

File tree

4 files changed

+139
-27
lines changed

4 files changed

+139
-27
lines changed

detr/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ sudo ./detr -d detr.engine ../samples
5252

5353
average cost of doInference(in detr.cpp) from second time with batch=1 under the ubuntu environment above
5454

55-
| | fp32 | fp16 | int8 |
56-
| ---- | ------- | ------- | ---- |
57-
| R50 | 19.57ms | 9.424ms | TODO |
55+
| | fp32 | fp16 | int8 |
56+
| ---- | ------- | ------- | ------ |
57+
| R50 | 19.57ms | 9.424ms | 8.38ms |
5858

detr/calibrator.hpp

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#pragma once
2+
3+
#include "NvInfer.h"
4+
#include <string>
5+
#include <vector>
6+
#include <iostream>
7+
#include <iterator>
8+
#include <fstream>
9+
#include <algorithm>
10+
#include "common.hpp"
11+
12+
//! \class Int8EntropyCalibrator2
13+
//!
14+
//! \brief Implements Entropy calibrator 2.
15+
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
16+
//!
17+
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
18+
public:
19+
Int8EntropyCalibrator2(int batchsize, int input_w, int input_h,
20+
const char* img_dir, const char* calib_table_name,
21+
const char* input_blob_name, bool read_cache = true);
22+
23+
virtual ~Int8EntropyCalibrator2();
24+
int getBatchSize() const override;
25+
bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
26+
const void* readCalibrationCache(size_t& length) override;
27+
void writeCalibrationCache(const void* cache, size_t length) override;
28+
29+
private:
30+
int batchsize_;
31+
int input_w_;
32+
int input_h_;
33+
int img_idx_;
34+
std::string img_dir_;
35+
std::vector<std::string> img_files_;
36+
size_t input_count_;
37+
std::string calib_table_name_;
38+
const char* input_blob_name_;
39+
bool read_cache_;
40+
void* device_input_;
41+
std::vector<char> calib_cache_;
42+
};
43+
44+
Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize,
45+
int input_w, int input_h, const char* img_dir,
46+
const char* calib_table_name, const char* input_blob_name,
47+
bool read_cache)
48+
: batchsize_(batchsize)
49+
, input_w_(input_w)
50+
, input_h_(input_h)
51+
, img_idx_(0)
52+
, img_dir_(img_dir)
53+
, calib_table_name_(calib_table_name)
54+
, input_blob_name_(input_blob_name)
55+
, read_cache_(read_cache) {
56+
input_count_ = 3 * input_w * input_h * batchsize;
57+
CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
58+
read_files_in_dir(img_dir, img_files_);
59+
}
60+
61+
Int8EntropyCalibrator2::~Int8EntropyCalibrator2() {
62+
CUDA_CHECK(cudaFree(device_input_));
63+
}
64+
65+
int Int8EntropyCalibrator2::getBatchSize() const {
66+
return batchsize_;
67+
}
68+
69+
bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings) {
70+
if (img_idx_ + batchsize_ > static_cast<int>(img_files_.size())) {
71+
return false;
72+
}
73+
74+
std::vector<float> input_imgs_(input_count_, 0);
75+
for (int i = img_idx_; i < img_idx_ + batchsize_; i++) {
76+
std::cout << img_files_[i] << " " << i << std::endl;
77+
cv::Mat temp = cv::imread(img_dir_ + img_files_[i]);
78+
if (temp.empty()) {
79+
std::cerr << "Fatal error: image cannot open!" << std::endl;
80+
return false;
81+
}
82+
preprocessImg(temp, input_w_, input_h_);
83+
for (int c = 0; c < 3; c++) {
84+
for (int h = 0; h < input_h_; h++) {
85+
for (int w = 0; w < input_w_; w++) {
86+
input_imgs_[(i-img_idx_)*input_w_*input_h_*3 +
87+
c * input_h_ * input_w_ + h * input_w_ + w] = temp.at<cv::Vec3f>(h, w)[c];
88+
}
89+
}
90+
}
91+
}
92+
img_idx_ += batchsize_;
93+
94+
CUDA_CHECK(cudaMemcpy(device_input_, input_imgs_.data(), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
95+
assert(!strcmp(names[0], input_blob_name_));
96+
bindings[0] = device_input_;
97+
return true;
98+
}
99+
100+
const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length) {
101+
std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
102+
calib_cache_.clear();
103+
std::ifstream input(calib_table_name_, std::ios::binary);
104+
input >> std::noskipws;
105+
if (read_cache_ && input.good()) {
106+
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
107+
}
108+
length = calib_cache_.size();
109+
return length ? calib_cache_.data() : nullptr;
110+
}
111+
112+
void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length) {
113+
std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
114+
std::ofstream output(calib_table_name_, std::ios::binary);
115+
output.write(reinterpret_cast<const char*>(cache), length);
116+
}

detr/common.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,16 @@ static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::str
7878
return 0;
7979
}
8080

81+
void preprocessImg(cv::Mat& img, int newh, int neww) {
82+
// convert to rgb
83+
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
84+
cv::resize(img, img, cv::Size(neww, newh));
85+
img.convertTo(img, CV_32FC3);
86+
img /= 255;
87+
img -= cv::Scalar(0.485, 0.456, 0.406);
88+
img /= cv::Scalar(0.229, 0.224, 0.225);
89+
}
90+
8191
#ifndef CUDA_CHECK
8292
#define CUDA_CHECK(callstr)\
8393
{\

detr/detr.cpp

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
#include <unordered_map>
44
#include "./logging.h"
55
#include "backbone.hpp"
6+
#include "calibrator.hpp"
67

78
#define DEVICE 0
89
#define BATCH_SIZE 1
910

1011
// 1 / math.sqrt(head_dim) https://github.com/pytorch/pytorch/blob/master/torch/csrc/api/include/torch/nn/functional/activation.h#623
1112
static const float SCALING = 0.17677669529663687;
12-
static const float MIN_SIZE = 800.0;
1313
static const int INPUT_H = 800;
1414
static const int INPUT_W = 1066;
1515
static const int NUM_CLASS = 92; // include background
@@ -28,25 +28,6 @@ static const float SCORE_THRESH = 0.5;
2828
const char* INPUT_NODE_NAME = "images";
2929
const std::vector<std::string> OUTPUT_NAMES = { "scores", "boxes"};
3030

31-
void preprocessImg(cv::Mat& img) {
32-
// convert to rgb
33-
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
34-
float ratio = static_cast<float>(MIN_SIZE) / std::min(img.rows, img.cols);
35-
int newh = 0, neww = 0;
36-
if (img.rows < img.cols) {
37-
newh = MIN_SIZE;
38-
neww = ratio * img.cols;
39-
} else {
40-
newh = ratio * img.rows;
41-
neww = MIN_SIZE;
42-
}
43-
cv::resize(img, img, cv::Size(neww, newh));
44-
img.convertTo(img, CV_32FC3);
45-
img /= 255;
46-
img -= cv::Scalar(0.485, 0.456, 0.406);
47-
img /= cv::Scalar(0.229, 0.224, 0.225);
48-
}
49-
5031
ITensor* PositionEmbeddingSine(
5132
INetworkDefinition *network,
5233
std::unordered_map<std::string, Weights>& weightMap,
@@ -555,7 +536,7 @@ const std::string& modelType = "fp16"
555536
INetworkDefinition* network = builder->createNetworkV2(0U);
556537

557538
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
558-
ITensor* data = network->addInput("data", dt, Dims3{ 3, INPUT_H, INPUT_W });
539+
ITensor* data = network->addInput(INPUT_NODE_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
559540

560541
// preprocess
561542
std::unordered_map<std::string, Weights> weightMap;
@@ -605,7 +586,12 @@ const std::string& modelType = "fp16"
605586
} else if (modelType == "fp16") {
606587
config->setFlag(BuilderFlag::kFP16);
607588
} else if (modelType == "int8") {
608-
// TODO: test with int8 quantization
589+
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
590+
assert(builder->platformHasFastInt8());
591+
config->setFlag(BuilderFlag::kINT8);
592+
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(BATCH_SIZE, INPUT_W, INPUT_H, "./coco_calib/",
593+
"int8calib.table", INPUT_NODE_NAME);
594+
config->setInt8Calibrator(calibrator);
609595
} else {
610596
throw("does not support model type");
611597
}
@@ -761,9 +747,9 @@ int main(int argc, char** argv) {
761747

762748
for (int b = 0; b < fcount; b++) {
763749
cv::Mat img = cv::imread(imgDir + "/" + fileList[f - fcount + 1 + b]);
764-
preprocessImg(img);
765-
assert(img.cols * img.rows * 3 == input_size);
766750
if (img.empty()) continue;
751+
preprocessImg(img, INPUT_H, INPUT_W);
752+
assert(img.cols * img.rows * 3 == input_size);
767753
for (int c = 0; c < 3; c++) {
768754
for (int h = 0; h < img.rows; h++) {
769755
for (int w = 0; w < img.cols; w++) {

0 commit comments

Comments
 (0)