2323
2424#define USE_FP16 // comment out this if want to use FP32
2525#define DEVICE 0 // GPU id
26+ #define BATCH_SIZE 1
2627
2728// stuff we know about the network and the input/output blobs
2829static const int INPUT_H = decodeplugin::INPUT_H; // H, W must be able to be divided by 32.
@@ -482,7 +483,7 @@ int main(int argc, char** argv) {
482483
483484 if (std::string (argv[1 ]) == " -s" ) {
484485 IHostMemory* modelStream{nullptr };
485- APIToModel (1 , &modelStream);
486+ APIToModel (BATCH_SIZE , &modelStream);
486487 assert (modelStream != nullptr );
487488
488489 std::ofstream p (" retina_r50.engine" , std::ios::binary);
@@ -509,17 +510,23 @@ int main(int argc, char** argv) {
509510 }
510511
511512 // prepare input data ---------------------------
512- static float data[3 * INPUT_H * INPUT_W];
513+ static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
513514 // for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
514515 // data[i] = 1.0;
515516
516517 cv::Mat img = cv::imread (" worlds-largest-selfie.jpg" );
517518 cv::Mat pr_img = preprocess_img (img);
518519 // cv::imwrite("preprocessed.jpg", pr_img);
519- for (int i = 0 ; i < INPUT_H * INPUT_W; i++) {
520- data[i] = pr_img.at <cv::Vec3b>(i)[0 ] - 104.0 ;
521- data[i + INPUT_H * INPUT_W] = pr_img.at <cv::Vec3b>(i)[1 ] - 117.0 ;
522- data[i + 2 * INPUT_H * INPUT_W] = pr_img.at <cv::Vec3b>(i)[2 ] - 123.0 ;
520+
521+ // For multi-batch, I feed the same image multiple times.
522+ // If you want to process different images in a batch, you need adapt it.
523+ for (int b = 0 ; b < BATCH_SIZE; b++) {
524+ float *p_data = &data[b * 3 * INPUT_H * INPUT_W];
525+ for (int i = 0 ; i < INPUT_H * INPUT_W; i++) {
526+ p_data[i] = pr_img.at <cv::Vec3b>(i)[0 ] - 104.0 ;
527+ p_data[i + INPUT_H * INPUT_W] = pr_img.at <cv::Vec3b>(i)[1 ] - 117.0 ;
528+ p_data[i + 2 * INPUT_H * INPUT_W] = pr_img.at <cv::Vec3b>(i)[2 ] - 123.0 ;
529+ }
523530 }
524531
525532 IRuntime* runtime = createInferRuntime (gLogger );
@@ -531,28 +538,30 @@ int main(int argc, char** argv) {
531538 assert (context != nullptr );
532539
533540 // Run inference
534- static float prob[OUTPUT_SIZE];
535- std::vector<decodeplugin::Detection> res;
536- for (int i = 0 ; i < 20 ; i++) {
537- res.clear ();
538- auto start = std::chrono::system_clock::now ();
539- doInference (*context, data, prob, 1 );
540- nms (res, prob);
541- auto end = std::chrono::system_clock::now ();
542- std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count () << " ms" << std::endl;
543- }
544- std::cout << " detected before nms -> " << prob[0 ] << std::endl;
545- std::cout << " after nms -> " << res.size () << std::endl;
546- for (size_t j = 0 ; j < res.size (); j++) {
547- if (res[j].class_confidence < 0.1 ) continue ;
548- cv::Rect r = get_rect_adapt_landmark (img, res[j].bbox , res[j].landmark );
549- cv::rectangle (img, r, cv::Scalar (0x27 , 0xC1 , 0x36 ), 2 );
550- // cv::putText(img, std::to_string((int)(res[j].class_confidence * 100)) + "%", cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 1);
551- for (int k = 0 ; k < 10 ; k += 2 ) {
552- cv::circle (img, cv::Point (res[j].landmark [k], res[j].landmark [k + 1 ]), 1 , cv::Scalar (255 * (k > 2 ), 255 * (k > 0 && k < 8 ), 255 * (k < 6 )), 4 );
541+ static float prob[BATCH_SIZE * OUTPUT_SIZE];
542+ auto start = std::chrono::system_clock::now ();
543+ doInference (*context, data, prob, BATCH_SIZE);
544+ auto end = std::chrono::system_clock::now ();
545+ std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count () << " ms" << std::endl;
546+
547+ for (int b = 0 ; b < BATCH_SIZE; b++) {
548+ std::vector<decodeplugin::Detection> res;
549+ nms (res, &prob[b * OUTPUT_SIZE]);
550+ std::cout << " number of detections -> " << prob[b * OUTPUT_SIZE] << std::endl;
551+ std::cout << " -> " << prob[b * OUTPUT_SIZE + 10 ] << std::endl;
552+ std::cout << " after nms -> " << res.size () << std::endl;
553+ cv::Mat tmp = img.clone ();
554+ for (size_t j = 0 ; j < res.size (); j++) {
555+ if (res[j].class_confidence < 0.1 ) continue ;
556+ cv::Rect r = get_rect_adapt_landmark (tmp, res[j].bbox , res[j].landmark );
557+ cv::rectangle (tmp, r, cv::Scalar (0x27 , 0xC1 , 0x36 ), 2 );
558+ // cv::putText(tmp, std::to_string((int)(res[j].class_confidence * 100)) + "%", cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 1);
559+ for (int k = 0 ; k < 10 ; k += 2 ) {
560+ cv::circle (tmp, cv::Point (res[j].landmark [k], res[j].landmark [k + 1 ]), 1 , cv::Scalar (255 * (k > 2 ), 255 * (k > 0 && k < 8 ), 255 * (k < 6 )), 4 );
561+ }
553562 }
563+ cv::imwrite (std::to_string (b) + " _result.jpg" , tmp);
554564 }
555- cv::imwrite (" result.jpg" , img);
556565
557566 // Destroy the engine
558567 context->destroy ();
0 commit comments