diff --git a/demos/crossroad_camera_demo/cpp/README.md b/demos/crossroad_camera_demo/cpp/README.md index 26a1e50f04b..8e2cfa15c23 100644 --- a/demos/crossroad_camera_demo/cpp/README.md +++ b/demos/crossroad_camera_demo/cpp/README.md @@ -72,27 +72,30 @@ omz_converter --list models.lst Running the application with the `-h` option yields the following usage message: ``` -crossroad_camera_demo [OPTION] -Options: - - -h Print a usage message. - -i Required. An input to process. The input must be a single image, a folder of images, video file or camera id. - -loop Optional. Enable reading the input in a loop. - -o "" Optional. Name of the output file(s) to save. - -limit "" Optional. Number of frames to store in output. If 0 is set, all frames are stored. - -m "" Required. Path to the Person/Vehicle/Bike Detection Crossroad model (.xml) file. - -m_pa "" Optional. Path to the Person Attributes Recognition Crossroad model (.xml) file. - -m_reid "" Optional. Path to the Person Reidentification Retail model (.xml) file. - -d "" Optional. Specify the target device for Person/Vehicle/Bike Detection. The list of available devices is shown below. Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. The application looks for a suitable plugin for the specified device. - -d_pa "" Optional. Specify the target device for Person Attributes Recognition. The list of available devices is shown below. Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. The application looks for a suitable plugin for the specified device. - -d_reid "" Optional. Specify the target device for Person Reidentification Retail. The list of available devices is shown below. Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. The application looks for a suitable plugin for the specified device. - -r Optional. Output Inference results as raw values. - -t Optional. Probability threshold for person/vehicle/bike crossroad detections. - -t_reid Optional. Cosine similarity threshold between two vectors for person reidentification. - -no_show Optional. Don't show output. - -auto_resize Optional. Enables resizable input with support of ROI crop & auto resize. - -u Optional. List of monitors to show initially. - -person_label Optional. The integer index of the objects' category corresponding to persons (as it is returned from the detection network, may vary from one network to another). The default value is 1. + [ -h] show the help message and exit + [--help] print help on all arguments + -m path to the Person/Vehicle/Bike Detection Crossroad model (.xml) file + [ -i ] an input to process. The input must be a single image, a folder of images, video file or camera id. Default is 0 + [--auto_resize] enables resizable input with support of ROI crop & auto resize + [ -d ] specify a device to infer on (the list of available devices is shown below). Use '-d HETERO:' format to specify HETERO plugin. Use '-d MULTI:' format to specify MULTI plugin. Default is CPU + [--dpa ] specify the target device for Person Attributes Recognition. Use '-d HETERO:' format to specify HETERO plugin. Default is CPU + [--dpr ] specify the target device for Person Reidentification Retail. Use '-d HETERO:' format to specify HETERO plugin. Default is CPU + [--lim ] number of frames to store in output. If 0 is set, all frames are stored. Default is 1000 + [--loop] enable reading the input in a loop + [--mpa ] path to the Person Attributes Recognition Crossroad model (.xml) file + [--mpr ] path to the Person Reidentification Retail model (.xml) file + [ -o ] name of the output file(s) to save + [--person_label ] the integer index of the objects' category corresponding to persons (as it is returned from the detection network, may vary from one network to another). Default is 1 + [ -r] output inference results as raw values + [--show] ([--noshow]) (don't) show output + [ -t ] probability threshold for detections. Default is 0.5 + [--tpr ] cosine similarity threshold between two vectors for person reidentification. Default is 0.7 + [ -u ] resource utilization graphs. c - average CPU load, d - load distribution over cores, m - memory usage, h - hide + Key bindings: + Q, q, Esc - Quit + P, p, 0, spacebar - Pause + C - average CPU load, D - load distribution over cores, M - memory usage, H - hide + ``` Running the application with an empty list of options yields the usage message given above and an error message. @@ -100,7 +103,7 @@ Running the application with an empty list of options yields the usage message g For example, to do inference on a GPU with the OpenVINO™ toolkit pre-trained models, run the following command: ```sh -./crossroad_camera_demo -i /inputVideo.mp4 -m /person-vehicle-bike-detection-crossroad-0078.xml -m_pa /person-attributes-recognition-crossroad-0230.xml -m_reid /person-reidentification-retail-0079.xml -d GPU +./crossroad_camera_demo -i /inputVideo.mp4 -m /person-vehicle-bike-detection-crossroad-0078.xml --mpa /person-attributes-recognition-crossroad-0230.xml --mpr /person-reidentification-retail-0079.xml -d GPU ``` > **NOTE**: The detection network returns as the result a set of detected objects, where each detected object consists of a bounding box and an index of the object's category (person/vehicle/bike). The demo runs Person Attributes Recognition and Person Reidentification networks only for the bounding boxes that have the category "person". @@ -115,7 +118,7 @@ You can save processed results to a Motion JPEG AVI file or separate JPEG or PNG * To save processed results in an AVI file, specify the name of the output file with `avi` extension, for example: `-o output.avi`. * To save processed results as images, specify the template name of the output image file with `jpg` or `png` extension, for example: `-o output_%03d.jpg`. The actual file names are constructed from the template at runtime by replacing regular expression `%03d` with the frame number, resulting in the following: `output_000.jpg`, `output_001.jpg`, and so on. -To avoid disk space overrun in case of continuous input stream, like camera, you can limit the amount of data stored in the output file(s) with the `limit` option. The default value is 1000. To change it, you can apply the `-limit N` option, where `N` is the number of frames to store. +To avoid disk space overrun in case of continuous input stream, like camera, you can limit the amount of data stored in the output file(s) with the `lim` option. The default value is 1000. To change it, you can apply the `--lim N` option, where `N` is the number of frames to store. >**NOTE**: Windows\* systems may not have the Motion JPEG codec installed by default. If this is the case, you can download OpenCV FFMPEG back end using the PowerShell script provided with the OpenVINO ™ install package and located at `/opencv/ffmpeg-download.ps1`. The script should be run with administrative privileges if OpenVINO ™ is installed in a system protected folder (this is a typical case). Alternatively, you can save results as images. diff --git a/demos/crossroad_camera_demo/cpp/crossroad_camera_demo.hpp b/demos/crossroad_camera_demo/cpp/crossroad_camera_demo.hpp deleted file mode 100644 index c198a88430a..00000000000 --- a/demos/crossroad_camera_demo/cpp/crossroad_camera_demo.hpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once - -#include "gflags/gflags.h" -#include "utils/default_flags.hpp" - -DEFINE_INPUT_FLAGS -DEFINE_OUTPUT_FLAGS - -static const char help_message[] = "Print a usage message."; -static const char person_vehicle_bike_detection_model_message[] = "Required. Path to the Person/Vehicle/Bike Detection Crossroad model (.xml) file."; -static const char person_attribs_model_message[] = "Optional. Path to the Person Attributes Recognition Crossroad model (.xml) file."; -static const char person_reid_model_message[] = "Optional. Path to the Person Reidentification Retail model (.xml) file."; -static const char target_device_message[] = "Optional. Specify the target device for Person/Vehicle/Bike Detection. " - "The list of available devices is shown below. Default value is CPU. " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The application looks for a suitable plugin for the specified device."; -static const char target_device_message_person_attribs[] = "Optional. Specify the target device for Person Attributes Recognition. " - "The list of available devices is shown below. Default value is CPU. " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The application looks for a suitable plugin for the specified device."; -static const char target_device_message_person_reid[] = "Optional. Specify the target device for Person Reidentification Retail. " - "The list of available devices is shown below. Default value is CPU. " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The application looks for a suitable plugin for the specified device."; -static const char threshold_output_message[] = "Optional. Probability threshold for person/vehicle/bike crossroad detections."; -static const char threshold_output_message_person_reid[] = "Optional. Cosine similarity threshold between two vectors for person reidentification."; -static const char raw_output_message[] = "Optional. Output Inference results as raw values."; -static const char no_show_message[] = "Optional. Don't show output."; -static const char input_resizable_message[] = "Optional. Enables resizable input with support of ROI crop & auto resize."; -static const char utilization_monitors_message[] = "Optional. List of monitors to show initially."; -static const char person_label_message[] = "Optional. The integer index of the objects' category corresponding to persons " - "(as it is returned from the detection network, may vary from one network to another). " - "The default value is 1."; - - -DEFINE_bool(h, false, help_message); -DEFINE_string(m, "", person_vehicle_bike_detection_model_message); -DEFINE_string(m_pa, "", person_attribs_model_message); -DEFINE_string(m_reid, "", person_reid_model_message); -DEFINE_string(d, "CPU", target_device_message); -DEFINE_string(d_pa, "CPU", target_device_message_person_attribs); -DEFINE_string(d_reid, "CPU", target_device_message_person_reid); -DEFINE_bool(r, false, raw_output_message); -DEFINE_double(t, 0.5, threshold_output_message); -DEFINE_double(t_reid, 0.7, threshold_output_message_person_reid); -DEFINE_bool(no_show, false, no_show_message); -DEFINE_bool(auto_resize, false, input_resizable_message); -DEFINE_string(u, "", utilization_monitors_message); -DEFINE_int32(person_label, 1, person_label_message); - - -/** -* @brief This function show a help message -*/ -static void showUsage() { - std::cout << std::endl; - std::cout << "crossroad_camera_demo [OPTION]" << std::endl; - std::cout << "Options:" << std::endl; - std::cout << std::endl; - std::cout << " -h " << help_message << std::endl; - std::cout << " -i " << input_message << std::endl; - std::cout << " -loop " << loop_message << std::endl; - std::cout << " -o \"\" " << output_message << std::endl; - std::cout << " -limit \"\" " << limit_message << std::endl; - std::cout << " -m \"\" " << person_vehicle_bike_detection_model_message<< std::endl; - std::cout << " -m_pa \"\" " << person_attribs_model_message << std::endl; - std::cout << " -m_reid \"\" " << person_reid_model_message << std::endl; - std::cout << " -d \"\" " << target_device_message << std::endl; - std::cout << " -d_pa \"\" " << target_device_message_person_attribs << std::endl; - std::cout << " -d_reid \"\" " << target_device_message_person_reid << std::endl; - std::cout << " -r " << raw_output_message << std::endl; - std::cout << " -t " << threshold_output_message << std::endl; - std::cout << " -t_reid " << threshold_output_message_person_reid << std::endl; - std::cout << " -no_show " << no_show_message << std::endl; - std::cout << " -auto_resize " << input_resizable_message << std::endl; - std::cout << " -u " << utilization_monitors_message << std::endl; - std::cout << " -person_label " << person_label_message << std::endl; -} diff --git a/demos/crossroad_camera_demo/cpp/detection_person.hpp b/demos/crossroad_camera_demo/cpp/detection_person.hpp index 0663ddbc1a4..18dbec1a06c 100644 --- a/demos/crossroad_camera_demo/cpp/detection_person.hpp +++ b/demos/crossroad_camera_demo/cpp/detection_person.hpp @@ -9,7 +9,6 @@ #include "gflags/gflags.h" #include "utils/slog.hpp" #include "detection_base.hpp" -#include "crossroad_camera_demo.hpp" struct PersonDetection : BaseDetection { size_t maxProposalCount; diff --git a/demos/crossroad_camera_demo/cpp/detection_person_attr.hpp b/demos/crossroad_camera_demo/cpp/detection_person_attr.hpp index e2117762a3f..5864529210c 100644 --- a/demos/crossroad_camera_demo/cpp/detection_person_attr.hpp +++ b/demos/crossroad_camera_demo/cpp/detection_person_attr.hpp @@ -17,7 +17,7 @@ struct PersonAttribsDetection : BaseDetection { bool hasTopBottomColor; - PersonAttribsDetection() : BaseDetection(FLAGS_m_pa, "Person Attributes Recognition"), hasTopBottomColor(false) {} + PersonAttribsDetection() : BaseDetection(FLAGS_mpa, "Person Attributes Recognition"), hasTopBottomColor(false) {} struct AttributesAndColorPoints { std::vector attributes_strings; @@ -127,8 +127,8 @@ struct PersonAttribsDetection : BaseDetection { std::shared_ptr read(const ov::Core& core) override { // Read network model - slog::info << "Reading model: " << FLAGS_m_pa << slog::endl; - std::shared_ptr model = core.read_model(FLAGS_m_pa); + slog::info << "Reading model: " << FLAGS_mpa << slog::endl; + std::shared_ptr model = core.read_model(FLAGS_mpa); logBasicModelInfo(model); // set batch size 1 diff --git a/demos/crossroad_camera_demo/cpp/detection_person_reid.hpp b/demos/crossroad_camera_demo/cpp/detection_person_reid.hpp index 84347f06250..1a619776e50 100644 --- a/demos/crossroad_camera_demo/cpp/detection_person_reid.hpp +++ b/demos/crossroad_camera_demo/cpp/detection_person_reid.hpp @@ -13,7 +13,7 @@ struct PersonReIdentification : BaseDetection { std::vector> globalReIdVec; // contains vectors characterising all detected persons - PersonReIdentification() : BaseDetection(FLAGS_m_reid, "Person Re-Identification Retail") {} + PersonReIdentification() : BaseDetection(FLAGS_mpr, "Person Re-Identification Retail") {} unsigned long int findMatchingPerson(const std::vector& newReIdVec) { auto size = globalReIdVec.size(); @@ -24,7 +24,7 @@ struct PersonReIdentification : BaseDetection { if (FLAGS_r) { slog::debug << "cosineSimilarity: " << cosSim << slog::endl; } - if (cosSim > FLAGS_t_reid) { + if (cosSim > FLAGS_tpr) { // We substitute previous person's vector by a new one characterising // last person's position globalReIdVec[i] = newReIdVec; @@ -70,8 +70,8 @@ struct PersonReIdentification : BaseDetection { std::shared_ptr read(const ov::Core& core) override { // Read network model - slog::info << "Reading model: " << FLAGS_m_reid << slog::endl; - std::shared_ptr model = core.read_model(FLAGS_m_reid); + slog::info << "Reading model: " << FLAGS_mpr << slog::endl; + std::shared_ptr model = core.read_model(FLAGS_mpr); logBasicModelInfo(model); // set batch size 1 diff --git a/demos/crossroad_camera_demo/cpp/main.cpp b/demos/crossroad_camera_demo/cpp/main.cpp index 5f81cb21821..e38bb8ce825 100644 --- a/demos/crossroad_camera_demo/cpp/main.cpp +++ b/demos/crossroad_camera_demo/cpp/main.cpp @@ -11,323 +11,397 @@ #include #include -#include "openvino/openvino.hpp" +#include +#include +#include +#include +#include +#include -#include "gflags/gflags.h" -#include "monitors/presenter.h" -#include "utils/images_capture.h" -#include "utils/ocv_common.hpp" -#include "utils/performance_metrics.hpp" -#include "utils/slog.hpp" +#include #include "detection_base.hpp" -#include "detection_person.hpp" -#include "detection_person_attr.hpp" -#include "detection_person_reid.hpp" -#include "crossroad_camera_demo.hpp" - -bool ParseAndCheckCommandLine(int argc, char* argv[]) { - // Parsing and validation of input args - gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); - if (FLAGS_h) { - showUsage(); +namespace { +constexpr char h_msg[] = "show the help message and exit"; +DEFINE_bool(h, false, h_msg); + +constexpr char m_msg[] = "path to the Person/Vehicle/Bike Detection Crossroad model (.xml) file"; +DEFINE_string(m, "", m_msg); + +constexpr char i_msg[] = "an input to process. The input must be a single image, a folder of images, video file or camera id. Default is 0"; +DEFINE_string(i, "0", i_msg); + +constexpr char auto_resize_msg[] = "enables resizable input with support of ROI crop & auto resize"; +DEFINE_bool(auto_resize, false, auto_resize_msg); + +constexpr char d_msg[] = + "specify a device to infer on (the list of available devices is shown below). " + "Use '-d HETERO:' format to specify HETERO plugin. " + "Use '-d MULTI:' format to specify MULTI plugin. " + "Default is CPU"; +DEFINE_string(d, "CPU", d_msg); + +constexpr char dpa_msg[] = + "specify the target device for Person Attributes Recognition. " + "Use '-d HETERO:' format to specify HETERO plugin. " + "Default is CPU"; +DEFINE_string(dpa, "CPU", dpa_msg); + +constexpr char dpr_msg[] = + "specify the target device for Person Reidentification Retail. " + "Use '-d HETERO:' format to specify HETERO plugin. " + "Default is CPU"; +DEFINE_string(dpr, "CPU", dpr_msg); + +constexpr char lim_msg[] = "number of frames to store in output. If 0 is set, all frames are stored. Default is 1000"; +DEFINE_uint32(lim, 1000, lim_msg); + +constexpr char loop_msg[] = "enable reading the input in a loop"; +DEFINE_bool(loop, false, loop_msg); + +constexpr char mpa_msg[] = "path to the Person Attributes Recognition Crossroad model (.xml) file"; +DEFINE_string(mpa, "", mpa_msg); + +constexpr char mpr_msg[] = "path to the Person Reidentification Retail model (.xml) file"; +DEFINE_string(mpr, "", mpr_msg); + +constexpr char o_msg[] = "name of the output file(s) to save"; +DEFINE_string(o, "", o_msg); + +constexpr char person_label_msg[] = "the integer index of the objects' category corresponding to persons " + "(as it is returned from the detection network, may vary from one network to another). " + "Default is 1"; +DEFINE_int32(person_label, 1, person_label_msg); + +constexpr char r_msg[] = "output inference results as raw values"; +DEFINE_bool(r, false, r_msg); + +constexpr char show_msg[] = "(don't) show output"; +DEFINE_bool(show, true, show_msg); + +constexpr char t_msg[] = "probability threshold for detections. Default is 0.5"; +DEFINE_double(t, 0.5, t_msg); + +constexpr char tpr_msg[] = "cosine similarity threshold between two vectors for person reidentification. Default is 0.7"; +DEFINE_double(tpr, 0.7, tpr_msg); + +constexpr char u_msg[] = "resource utilization graphs. " + "c - average CPU load, d - load distribution over cores, m - memory usage, h - hide"; +DEFINE_string(u, "", u_msg); + +void parse(int argc, char *argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, false); + if (FLAGS_h || 1 == argc) { + std::cout << "\t[ -h] " << h_msg + << "\n\t[--help] print help on all arguments" + << "\n\t -m " << m_msg + << "\n\t[ -i ] " << i_msg + << "\n\t[--auto_resize] " << auto_resize_msg + << "\n\t[ -d ] " << d_msg + << "\n\t[--dpa ] " << dpa_msg + << "\n\t[--dpr ] " << dpr_msg + << "\n\t[--lim ] " << lim_msg + << "\n\t[--loop] " << loop_msg + << "\n\t[--mpa ] " << mpa_msg + << "\n\t[--mpr ] " << mpr_msg + << "\n\t[ -o ] " << o_msg + << "\n\t[--person_label ] " << person_label_msg + << "\n\t[ -r] " << r_msg + << "\n\t[--show] ([--noshow]) " << show_msg + << "\n\t[ -t ] " << t_msg + << "\n\t[--tpr ] " << tpr_msg + << "\n\t[ -u ] " << u_msg + << "\n\tKey bindings:" + "\n\t\tQ, q, Esc - Quit" + "\n\t\tP, p, 0, spacebar - Pause" + "\n\t\tC - average CPU load, D - load distribution over cores, M - memory usage, H - hide\n"; showAvailableDevices(); - return false; - } - - if (FLAGS_i.empty()) { - throw std::logic_error("Parameter -i is not set"); - } - - if (FLAGS_m.empty()) { - throw std::logic_error("Parameter -m is not set"); + slog::info << ov::get_openvino_version() << slog::endl; + exit(0); + } if (FLAGS_i.empty()) { + throw std::invalid_argument{"-i can't be empty"}; + } if (FLAGS_m.empty()) { + throw std::invalid_argument{"-m can't be empty"}; } - - return true; + std::cout << ov::get_openvino_version() << std::endl; } +} // namespace + +#include "detection_person.hpp" +#include "detection_person_attr.hpp" +#include "detection_person_reid.hpp" int main(int argc, char* argv[]) { - try { - PerformanceMetrics metrics; + std::set_terminate(catcher); + parse(argc, argv); + PerformanceMetrics metrics; - // This demo covers 3 certain topologies and cannot be generalized - // Parsing and validation of input args - if (!ParseAndCheckCommandLine(argc, argv)) { - return 0; - } + // This demo covers 3 certain topologies and cannot be generalized - std::unique_ptr cap = openImagesCapture(FLAGS_i, FLAGS_loop); + std::unique_ptr cap = openImagesCapture(FLAGS_i, FLAGS_loop); - // 1. Load OpenVINO runtime - slog::info << ov::get_openvino_version() << slog::endl; + // 1. Load OpenVINO runtime - ov::Core core; + ov::Core core; - PersonDetection personDetection; - PersonAttribsDetection personAttribs; - PersonReIdentification personReId; + PersonDetection personDetection; + PersonAttribsDetection personAttribs; + PersonReIdentification personReId; - // 2. Read IR models and load them to devices - Load(personDetection).into(core, FLAGS_d); - Load(personAttribs).into(core, FLAGS_d_pa); - Load(personReId).into(core, FLAGS_d_reid); + // 2. Read IR models and load them to devices + Load(personDetection).into(core, FLAGS_d); + Load(personAttribs).into(core, FLAGS_dpa); + Load(personReId).into(core, FLAGS_dpr); - // 3. Do inference - cv::Rect cropRoi; // cropped image coordinates - ov::Tensor frameTensor; - ov::Tensor roiTensor; - cv::Mat person; // Mat object containing person data cropped by openCV + // 3. Do inference + cv::Rect cropRoi; // cropped image coordinates + ov::Tensor frameTensor; + ov::Tensor roiTensor; + cv::Mat person; // Mat object containing person data cropped by openCV - // Start inference & calc performance - typedef std::chrono::duration> ms; + // Start inference & calc performance + typedef std::chrono::duration> ms; - auto startTime = std::chrono::steady_clock::now(); - cv::Mat frame = cap->read(); + auto startTime = std::chrono::steady_clock::now(); + cv::Mat frame = cap->read(); - LazyVideoWriter videoWriter{FLAGS_o, cap->fps(), FLAGS_limit}; - cv::Size graphSize{frame.cols / 4, 60}; - Presenter presenter(FLAGS_u, frame.rows - graphSize.height - 10, graphSize); + LazyVideoWriter videoWriter{FLAGS_o, cap->fps(), FLAGS_lim}; + cv::Size graphSize{frame.cols / 4, 60}; + Presenter presenter(FLAGS_u, frame.rows - graphSize.height - 10, graphSize); - bool shouldHandleTopBottomColors = personAttribs.HasTopBottomColor(); + bool shouldHandleTopBottomColors = personAttribs.HasTopBottomColor(); - do { - if (FLAGS_auto_resize) { - // just wrap Mat object with Tensor without additional memory allocation - frameTensor = wrapMat2Tensor(frame); - personDetection.setRoiTensor(frameTensor); - } else { - // resize Mat and copy data into OpenVINO allocated Tensor - personDetection.enqueue(frame); - } - // Run Person detection inference - auto t0 = std::chrono::high_resolution_clock::now(); - personDetection.submitRequest(); - personDetection.wait(); - auto t1 = std::chrono::high_resolution_clock::now(); - ms detection = std::chrono::duration_cast(t1 - t0); - // parse inference results internally (e.g. apply a threshold, etc) - personDetection.fetchResults(); - - // Process the results down to the pipeline - ms personAttribsNetworkTime(0), personReIdNetworktime(0); - int personAttribsInferred = 0, personReIdInferred = 0; - for (PersonDetection::Result& result : personDetection.results) { - if (result.label == FLAGS_person_label) { - // person + do { + if (FLAGS_auto_resize) { + // just wrap Mat object with Tensor without additional memory allocation + frameTensor = wrapMat2Tensor(frame); + personDetection.setRoiTensor(frameTensor); + } else { + // resize Mat and copy data into OpenVINO allocated Tensor + personDetection.enqueue(frame); + } + // Run Person detection inference + auto t0 = std::chrono::high_resolution_clock::now(); + personDetection.submitRequest(); + personDetection.wait(); + auto t1 = std::chrono::high_resolution_clock::now(); + ms detection = std::chrono::duration_cast(t1 - t0); + // parse inference results internally (e.g. apply a threshold, etc) + personDetection.fetchResults(); + + // Process the results down to the pipeline + ms personAttribsNetworkTime(0), personReIdNetworktime(0); + int personAttribsInferred = 0, personReIdInferred = 0; + for (PersonDetection::Result& result : personDetection.results) { + if (result.label == FLAGS_person_label) { + // person + if (FLAGS_auto_resize) { + cropRoi.x = (result.location.x < 0) ? 0 : result.location.x; + cropRoi.y = (result.location.y < 0) ? 0 : result.location.y; + cropRoi.width = std::min(result.location.width, frame.cols - cropRoi.x); + cropRoi.height = std::min(result.location.height, frame.rows - cropRoi.y); + ov::Coordinate p00({ 0, (size_t)cropRoi.y, (size_t)cropRoi.x, 0 }); + ov::Coordinate p01({ 1, (size_t)(cropRoi.y + cropRoi.height), (size_t)(cropRoi.x + cropRoi.width), 3 }); + roiTensor = ov::Tensor(frameTensor, p00, p01); + } else { + // To crop ROI manually and allocate required memory (cv::Mat) again + auto clippedRect = result.location & cv::Rect(0, 0, frame.cols, frame.rows); + person = frame(clippedRect); + } + + PersonAttribsDetection::AttributesAndColorPoints resPersAttrAndColor; + if (personAttribs.enabled()) { + // Run Person Attributes Recognition if (FLAGS_auto_resize) { - cropRoi.x = (result.location.x < 0) ? 0 : result.location.x; - cropRoi.y = (result.location.y < 0) ? 0 : result.location.y; - cropRoi.width = std::min(result.location.width, frame.cols - cropRoi.x); - cropRoi.height = std::min(result.location.height, frame.rows - cropRoi.y); - ov::Coordinate p00({ 0, (size_t)cropRoi.y, (size_t)cropRoi.x, 0 }); - ov::Coordinate p01({ 1, (size_t)(cropRoi.y + cropRoi.height), (size_t)(cropRoi.x + cropRoi.width), 3 }); - roiTensor = ov::Tensor(frameTensor, p00, p01); + personAttribs.setRoiTensor(roiTensor); } else { - // To crop ROI manually and allocate required memory (cv::Mat) again - auto clippedRect = result.location & cv::Rect(0, 0, frame.cols, frame.rows); - person = frame(clippedRect); + personAttribs.enqueue(person); } - PersonAttribsDetection::AttributesAndColorPoints resPersAttrAndColor; - if (personAttribs.enabled()) { - // Run Person Attributes Recognition - if (FLAGS_auto_resize) { - personAttribs.setRoiTensor(roiTensor); - } else { - personAttribs.enqueue(person); - } + t0 = std::chrono::high_resolution_clock::now(); + personAttribs.submitRequest(); + personAttribs.wait(); + t1 = std::chrono::high_resolution_clock::now(); + personAttribsNetworkTime += std::chrono::duration_cast(t1 - t0); + personAttribsInferred++; + // Process outputs - t0 = std::chrono::high_resolution_clock::now(); - personAttribs.submitRequest(); - personAttribs.wait(); - t1 = std::chrono::high_resolution_clock::now(); - personAttribsNetworkTime += std::chrono::duration_cast(t1 - t0); - personAttribsInferred++; - // Process outputs + resPersAttrAndColor = personAttribs.GetPersonAttributes(); - resPersAttrAndColor = personAttribs.GetPersonAttributes(); + if (shouldHandleTopBottomColors) { + cv::Point top_color_p; + cv::Point bottom_color_p; - if (shouldHandleTopBottomColors) { - cv::Point top_color_p; - cv::Point bottom_color_p; + top_color_p.x = static_cast(resPersAttrAndColor.top_color_point.x) * person.cols; + top_color_p.y = static_cast(resPersAttrAndColor.top_color_point.y) * person.rows; - top_color_p.x = static_cast(resPersAttrAndColor.top_color_point.x) * person.cols; - top_color_p.y = static_cast(resPersAttrAndColor.top_color_point.y) * person.rows; + bottom_color_p.x = static_cast(resPersAttrAndColor.bottom_color_point.x) * person.cols; + bottom_color_p.y = static_cast(resPersAttrAndColor.bottom_color_point.y) * person.rows; - bottom_color_p.x = static_cast(resPersAttrAndColor.bottom_color_point.x) * person.cols; - bottom_color_p.y = static_cast(resPersAttrAndColor.bottom_color_point.y) * person.rows; + cv::Rect person_rect(0, 0, person.cols, person.rows); - cv::Rect person_rect(0, 0, person.cols, person.rows); + // Define area around top color's location + cv::Rect tc_rect; + tc_rect.x = top_color_p.x - person.cols / 6; + tc_rect.y = top_color_p.y - person.rows / 10; + tc_rect.height = 2 * person.rows / 8; + tc_rect.width = 2 * person.cols / 6; - // Define area around top color's location - cv::Rect tc_rect; - tc_rect.x = top_color_p.x - person.cols / 6; - tc_rect.y = top_color_p.y - person.rows / 10; - tc_rect.height = 2 * person.rows / 8; - tc_rect.width = 2 * person.cols / 6; + tc_rect = tc_rect & person_rect; - tc_rect = tc_rect & person_rect; + // Define area around bottom color's location + cv::Rect bc_rect; + bc_rect.x = bottom_color_p.x - person.cols / 6; + bc_rect.y = bottom_color_p.y - person.rows / 10; + bc_rect.height = 2 * person.rows / 8; + bc_rect.width = 2 * person.cols / 6; - // Define area around bottom color's location - cv::Rect bc_rect; - bc_rect.x = bottom_color_p.x - person.cols / 6; - bc_rect.y = bottom_color_p.y - person.rows / 10; - bc_rect.height = 2 * person.rows / 8; - bc_rect.width = 2 * person.cols / 6; + bc_rect = bc_rect & person_rect; - bc_rect = bc_rect & person_rect; + if (!tc_rect.empty()) + resPersAttrAndColor.top_color = PersonAttribsDetection::GetAvgColor(person(tc_rect)); + if (!bc_rect.empty()) + resPersAttrAndColor.bottom_color = PersonAttribsDetection::GetAvgColor(person(bc_rect)); + } + } - if (!tc_rect.empty()) - resPersAttrAndColor.top_color = PersonAttribsDetection::GetAvgColor(person(tc_rect)); - if (!bc_rect.empty()) - resPersAttrAndColor.bottom_color = PersonAttribsDetection::GetAvgColor(person(bc_rect)); - } + std::string resPersReid = ""; + if (personReId.enabled()) { + // Run Person Reidentification + if (FLAGS_auto_resize) { + personReId.setRoiTensor(roiTensor); + } else { + personReId.enqueue(person); } - std::string resPersReid = ""; - if (personReId.enabled()) { - // Run Person Reidentification - if (FLAGS_auto_resize) { - personReId.setRoiTensor(roiTensor); - } else { - personReId.enqueue(person); - } + t0 = std::chrono::high_resolution_clock::now(); + personReId.submitRequest(); + personReId.wait(); + t1 = std::chrono::high_resolution_clock::now(); - t0 = std::chrono::high_resolution_clock::now(); - personReId.submitRequest(); - personReId.wait(); - t1 = std::chrono::high_resolution_clock::now(); + personReIdNetworktime += std::chrono::duration_cast(t1 - t0); + personReIdInferred++; - personReIdNetworktime += std::chrono::duration_cast(t1 - t0); - personReIdInferred++; + auto reIdVector = personReId.getReidVec(); - auto reIdVector = personReId.getReidVec(); + // Check cosine similarity with all previously detected persons. + // If it's new person it is added to the global Reid vector and + // new global ID is assigned to the person. Otherwise, ID of + // matched person is assigned to it. + auto foundId = personReId.findMatchingPerson(reIdVector); + resPersReid = "REID: " + std::to_string(foundId); + } - // Check cosine similarity with all previously detected persons. - // If it's new person it is added to the global Reid vector and - // new global ID is assigned to the person. Otherwise, ID of - // matched person is assigned to it. - auto foundId = personReId.findMatchingPerson(reIdVector); - resPersReid = "REID: " + std::to_string(foundId); + // Process outputs + if (!resPersAttrAndColor.attributes_strings.empty()) { + cv::Rect image_area(0, 0, frame.cols, frame.rows); + cv::Rect tc_label(result.location.x + result.location.width, result.location.y, + result.location.width / 4, result.location.height / 2); + cv::Rect bc_label(result.location.x + result.location.width, result.location.y + result.location.height / 2, + result.location.width / 4, result.location.height / 2); + + if (shouldHandleTopBottomColors) { + frame(tc_label & image_area) = resPersAttrAndColor.top_color; + frame(bc_label & image_area) = resPersAttrAndColor.bottom_color; } - // Process outputs - if (!resPersAttrAndColor.attributes_strings.empty()) { - cv::Rect image_area(0, 0, frame.cols, frame.rows); - cv::Rect tc_label(result.location.x + result.location.width, result.location.y, - result.location.width / 4, result.location.height / 2); - cv::Rect bc_label(result.location.x + result.location.width, result.location.y + result.location.height / 2, - result.location.width / 4, result.location.height / 2); - - if (shouldHandleTopBottomColors) { - frame(tc_label & image_area) = resPersAttrAndColor.top_color; - frame(bc_label & image_area) = resPersAttrAndColor.bottom_color; - } - - for (size_t i = 0; i < resPersAttrAndColor.attributes_strings.size(); ++i) { - cv::Scalar color; - if (resPersAttrAndColor.attributes_indicators[i]) { - color = cv::Scalar(0, 200, 0); // has attribute - } else { - color = cv::Scalar(0, 0, 255); // doesn't have attribute - } - putHighlightedText(frame, - resPersAttrAndColor.attributes_strings[i], - cv::Point2f(static_cast(result.location.x + 5 * result.location.width / 4), - static_cast(result.location.y + 15 + 15 * i)), - cv::FONT_HERSHEY_COMPLEX_SMALL, - 0.5, - color, 1); - } - - if (FLAGS_r) { - std::string output_attribute_string; - for (size_t i = 0; i < resPersAttrAndColor.attributes_strings.size(); ++i) - if (resPersAttrAndColor.attributes_indicators[i]) - output_attribute_string += resPersAttrAndColor.attributes_strings[i] + ","; - slog::debug << "Person Attributes results: " << output_attribute_string << slog::endl; - if (shouldHandleTopBottomColors) { - slog::debug << "Person top color: " << resPersAttrAndColor.top_color << slog::endl; - slog::debug << "Person bottom color: " << resPersAttrAndColor.bottom_color << slog::endl; - } + for (size_t i = 0; i < resPersAttrAndColor.attributes_strings.size(); ++i) { + cv::Scalar color; + if (resPersAttrAndColor.attributes_indicators[i]) { + color = cv::Scalar(0, 200, 0); // has attribute + } else { + color = cv::Scalar(0, 0, 255); // doesn't have attribute } - } - if (!resPersReid.empty()) { putHighlightedText(frame, - resPersReid, - cv::Point2f(static_cast(result.location.x), static_cast(result.location.y + 30)), - cv::FONT_HERSHEY_COMPLEX_SMALL, - 0.55, - cv::Scalar(250, 10, 10), 1); - - if (FLAGS_r) { - slog::debug << "Person Re-Identification results: " << resPersReid << slog::endl; - } + resPersAttrAndColor.attributes_strings[i], + cv::Point2f(static_cast(result.location.x + 5 * result.location.width / 4), + static_cast(result.location.y + 15 + 15 * i)), + cv::FONT_HERSHEY_COMPLEX_SMALL, + 0.5, + color, 1); } - cv::rectangle(frame, result.location, cv::Scalar(0, 255, 0), 1); - } - } - - presenter.drawGraphs(frame); - metrics.update(startTime); - // Execution statistics - std::ostringstream out; - out << "Detection time : " << std::fixed << std::setprecision(2) << detection.count() - << " ms (" << 1000.f / detection.count() << " fps)"; - - putHighlightedText(frame, out.str(), cv::Point2f(0, 20), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); - - if (personDetection.results.size()) { - if (personAttribs.enabled() && personAttribsInferred) { - float average_time = static_cast(personAttribsNetworkTime.count() / personAttribsInferred); - out.str(""); - out << "Attributes Recognition time: " << std::fixed << std::setprecision(2) << average_time - << " ms (" << 1000.f / average_time << " fps)"; - putHighlightedText(frame, out.str(), cv::Point2f(0, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); if (FLAGS_r) { - slog::debug << out.str() << slog::endl; + std::string output_attribute_string; + for (size_t i = 0; i < resPersAttrAndColor.attributes_strings.size(); ++i) + if (resPersAttrAndColor.attributes_indicators[i]) + output_attribute_string += resPersAttrAndColor.attributes_strings[i] + ","; + slog::debug << "Person Attributes results: " << output_attribute_string << slog::endl; + if (shouldHandleTopBottomColors) { + slog::debug << "Person top color: " << resPersAttrAndColor.top_color << slog::endl; + slog::debug << "Person bottom color: " << resPersAttrAndColor.bottom_color << slog::endl; + } } } - if (personReId.enabled() && personReIdInferred) { - float average_time = static_cast(personReIdNetworktime.count() / personReIdInferred); - out.str(""); - out << "Re-Identification time: " << std::fixed << std::setprecision(2) << average_time - << " ms (" << 1000.f / average_time << " fps)"; - putHighlightedText(frame, out.str(), cv::Point2f(0, 60), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); + if (!resPersReid.empty()) { + putHighlightedText(frame, + resPersReid, + cv::Point2f(static_cast(result.location.x), static_cast(result.location.y + 30)), + cv::FONT_HERSHEY_COMPLEX_SMALL, + 0.55, + cv::Scalar(250, 10, 10), 1); + if (FLAGS_r) { - slog::debug << out.str() << slog::endl; + slog::debug << "Person Re-Identification results: " << resPersReid << slog::endl; } } + cv::rectangle(frame, result.location, cv::Scalar(0, 255, 0), 1); } - videoWriter.write(frame); - if (!FLAGS_no_show) { - cv::imshow("Detection results", frame); - const int key = cv::waitKey(1); - if (27 == key) // Esc - break; - presenter.handleKey(key); + } + + presenter.drawGraphs(frame); + metrics.update(startTime); + + // Execution statistics + std::ostringstream out; + out << "Detection time : " << std::fixed << std::setprecision(2) << detection.count() + << " ms (" << 1000.f / detection.count() << " fps)"; + + putHighlightedText(frame, out.str(), cv::Point2f(0, 20), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); + + if (personDetection.results.size()) { + if (personAttribs.enabled() && personAttribsInferred) { + float average_time = static_cast(personAttribsNetworkTime.count() / personAttribsInferred); + out.str(""); + out << "Attributes Recognition time: " << std::fixed << std::setprecision(2) << average_time + << " ms (" << 1000.f / average_time << " fps)"; + putHighlightedText(frame, out.str(), cv::Point2f(0, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); + if (FLAGS_r) { + slog::debug << out.str() << slog::endl; + } } - startTime = std::chrono::steady_clock::now(); + if (personReId.enabled() && personReIdInferred) { + float average_time = static_cast(personReIdNetworktime.count() / personReIdInferred); + out.str(""); + out << "Re-Identification time: " << std::fixed << std::setprecision(2) << average_time + << " ms (" << 1000.f / average_time << " fps)"; + putHighlightedText(frame, out.str(), cv::Point2f(0, 60), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); + if (FLAGS_r) { + slog::debug << out.str() << slog::endl; + } + } + } + videoWriter.write(frame); + if (FLAGS_show) { + cv::imshow("Detection results", frame); + const int key = cv::pollKey(); + if (32 == key || 'P' == key || 'p' == key || '0' == key) + cv::waitKey(0); + if (27 == key || 'Q' == key || 'q' == key) // Esc + break; + presenter.handleKey(key); + } + startTime = std::chrono::steady_clock::now(); - // get next frame - frame = cap->read(); - } while (frame.data); + // get next frame + frame = cap->read(); + } while (frame.data); - slog::info << "Metrics report:" << slog::endl; - metrics.logTotal(); - slog::info << presenter.reportMeans() << slog::endl; - } - catch (const std::exception& error) { - slog ::err << error.what() << slog::endl; - return 1; - } - catch (...) { - slog::err << "Unknown/internal exception happened." << slog::endl; - return 1; - } + slog::info << "Metrics report:" << slog::endl; + metrics.logTotal(); + slog::info << presenter.reportMeans() << slog::endl; return 0; } diff --git a/demos/interactive_face_detection_demo/cpp_gapi/main.cpp b/demos/interactive_face_detection_demo/cpp_gapi/main.cpp index 74f6e8d96e6..441ae8b687a 100644 --- a/demos/interactive_face_detection_demo/cpp_gapi/main.cpp +++ b/demos/interactive_face_detection_demo/cpp_gapi/main.cpp @@ -571,7 +571,7 @@ int main(int argc, char *argv[]) { /** Init presenter **/ if (presenter == nullptr) { - cv::Size graphSize{static_cast(frame.rows / 4), 60}; + cv::Size graphSize{static_cast(frame.cols / 4), 60}; presenter.reset(new Presenter(FLAGS_u, THROUGHPUT_METRIC_POSITION.y + 15, graphSize)); } diff --git a/demos/tests/cases.py b/demos/tests/cases.py index b88b4d3da2d..360e377cc27 100644 --- a/demos/tests/cases.py +++ b/demos/tests/cases.py @@ -279,15 +279,15 @@ def single_option_cases(key, *args): ))), CppDemo(name='crossroad_camera_demo', - model_keys=['-m', '-m_pa', '-m_reid'], - device_keys=['-d', '-d_pa', '-d_reid'], + model_keys=['-m', '--mpa', '--mpr'], + device_keys=['-d', '--dpa', '--dpr'], test_cases=combine_cases( - TestCase(options={'-no_show': None, + TestCase(options={'--noshow': None, **MONITORS, '-i': DataPatternArg('person-vehicle-bike-detection-crossroad')}), TestCase(options={'-m': ModelArg('person-vehicle-bike-detection-crossroad-0078')}), - single_option_cases('-m_pa', None, ModelArg('person-attributes-recognition-crossroad-0230')), - single_option_cases('-m_reid', + single_option_cases('--mpa', None, ModelArg('person-attributes-recognition-crossroad-0230')), + single_option_cases('--mpr', None, ModelArg('person-reidentification-retail-0277'), ModelArg('person-reidentification-retail-0286'),