Skip to content

Commit ec20cea

Browse files
authored
guide for measuring performance (wang-xinyu#676)
1 parent ccdf0e6 commit ec20cea

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed

tutorials/measure_performance.md

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# Measure performance of TensorRT
2+
3+
## 1. add some variables and structures
4+
5+
see https://github.com/NVIDIA/TensorRT/tree/master/samples/sampleNMT for more detail.
6+
7+
```c++
8+
// for rcnn, you can put these code into common.hpp
9+
#include "logging.h" // rcnn/logging.h
10+
static Logger gLogger{ Logger::Severity::kINFO };
11+
static LogStreamConsumer gLogInfo{ LOG_INFO(gLogger) };
12+
13+
struct SimpleProfiler : public nvinfer1::IProfiler
14+
{
15+
struct Record
16+
{
17+
float time{ 0 };
18+
int count{ 0 };
19+
};
20+
21+
virtual void reportLayerTime(const char* layerName, float ms)
22+
{
23+
mProfile[layerName].count++;
24+
mProfile[layerName].time += ms;
25+
if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == mLayerNames.end())
26+
{
27+
mLayerNames.push_back(layerName);
28+
}
29+
}
30+
31+
SimpleProfiler(const char* name, const std::vector<SimpleProfiler>& srcProfilers = std::vector<SimpleProfiler>())
32+
: mName(name)
33+
{
34+
for (const auto& srcProfiler : srcProfilers)
35+
{
36+
for (const auto& rec : srcProfiler.mProfile)
37+
{
38+
auto it = mProfile.find(rec.first);
39+
if (it == mProfile.end())
40+
{
41+
mProfile.insert(rec);
42+
}
43+
else
44+
{
45+
it->second.time += rec.second.time;
46+
it->second.count += rec.second.count;
47+
}
48+
}
49+
}
50+
}
51+
52+
friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value)
53+
{
54+
out << "========== " << value.mName << " profile ==========" << std::endl;
55+
float totalTime = 0;
56+
std::string layerNameStr = "TensorRT layer name";
57+
int maxLayerNameLength = std::max(static_cast<int>(layerNameStr.size()), 70);
58+
for (const auto& elem : value.mProfile)
59+
{
60+
totalTime += elem.second.time;
61+
maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
62+
}
63+
64+
auto old_settings = out.flags();
65+
auto old_precision = out.precision();
66+
// Output header
67+
{
68+
out << std::setw(maxLayerNameLength) << layerNameStr << " ";
69+
out << std::setw(12) << "Runtime, "
70+
<< "%"
71+
<< " ";
72+
out << std::setw(12) << "Invocations"
73+
<< " ";
74+
out << std::setw(12) << "Runtime, ms" << std::endl;
75+
}
76+
for (size_t i = 0; i < value.mLayerNames.size(); i++)
77+
{
78+
const std::string layerName = value.mLayerNames[i];
79+
auto elem = value.mProfile.at(layerName);
80+
out << std::setw(maxLayerNameLength) << layerName << " ";
81+
out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.time * 100.0F / totalTime) << "%"
82+
<< " ";
83+
out << std::setw(12) << elem.count << " ";
84+
out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time << std::endl;
85+
}
86+
out.flags(old_settings);
87+
out.precision(old_precision);
88+
out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl;
89+
90+
return out;
91+
}
92+
93+
private:
94+
std::string mName;
95+
std::vector<std::string> mLayerNames;
96+
std::map<std::string, Record> mProfile;
97+
};
98+
```
99+
100+
101+
102+
## 2. set profiler for context and print the log
103+
104+
```c++
105+
// you'd better set name for every layers
106+
// build engine
107+
// build context
108+
auto sp = SimpleProfiler("test");
109+
context->setProfiler(&sp);
110+
context->enqueue(...);
111+
gLogInfo << sp << std::endl;
112+
```
113+

0 commit comments

Comments
 (0)