autoware.privately-owned-vehicles/VisionPilot/Production_Releases/0.5/include/inference/onnxruntime_engine.hpp at 75d73b87e1344c3fdd9f8d184d3da7468c117063 · pranavreddy23/autoware.privately-owned-vehicles · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#ifndef AUTOWARE_POV_VISION_EGOLANES_ONNXRUNTIME_ENGINE_HPP_
#define AUTOWARE_POV_VISION_EGOLANES_ONNXRUNTIME_ENGINE_HPP_

#include <onnxruntime_cxx_api.h>
#include <opencv2/opencv.hpp>
#include <string>
#include <vector>
#include <memory>
#include <array>

namespace autoware_pov::vision::egolanes
{

/**
 * @brief Lane segmentation output structure
 *
 * Contains binary segmentation masks for different lane types.
 * Each mask is a 2D binary image (1.0 = lane present, 0.0 = no lane)
 */
struct LaneSegmentation
{
  cv::Mat ego_left;    // Ego left lane boundary (320x640)
  cv::Mat ego_right;   // Ego right lane boundary (320x640)
  cv::Mat other_lanes; // Other visible lanes (320x640)

  int height;
  int width;

  // Coeffs for polyfit upscaling
  std::vector<double> left_coeffs;
  std::vector<double> right_coeffs;

  // Drivable path info
  std::vector<double> center_coeffs;

  // Curve params info
  double lane_offset = 0.0;     // Distance from image center to path center
  double yaw_offset = 0.0;      // Heading error (radians)
  double steering_angle = 0.0;  // Steering angle (degrees)
  double curvature = 0.0;       // Curveture = 1/R
  bool path_valid = false;      // True if we successfully calculated the path

  // Debug info
  cv::Point left_start_point = {-1, -1};
  cv::Point right_start_point = {-1, -1};
  std::vector<cv::Rect> left_sliding_windows;
  std::vector<cv::Rect> right_sliding_windows;
};

/**
 * @brief EgoLanes ONNX Runtime Inference Engine
 *
 * Supports multiple execution providers:
 * - CPU: Default CPU execution
 * - TensorRT: GPU-accelerated with FP16/FP32
 *
 * Handles complete inference pipeline:
 * 1. Preprocessing (resize to 320x640, normalize)
 * 2. Model inference via ONNX Runtime
 * 3. Post-processing (thresholding, channel extraction)
 */
class EgoLanesOnnxEngine
{
public:
  /**
   * @brief Constructor
   *
   * @param model_path Path to ONNX model (.onnx file)
   * @param provider Execution provider: "cpu" or "tensorrt"
   * @param precision Precision mode: "fp32" or "fp16" (TensorRT only)
   * @param device_id GPU device ID (TensorRT only, default: 0)
   * @param cache_dir TensorRT engine cache directory (default: ./trt_cache)
   */
  EgoLanesOnnxEngine(
    const std::string& model_path,
    const std::string& provider = "cpu",
    const std::string& precision = "fp32",
    int device_id = 0,
    const std::string& cache_dir = "./trt_cache"
  );

  ~EgoLanesOnnxEngine();

  /**
   * @brief Run complete inference pipeline
   *
   * @param input_image Input image (BGR, any resolution)
   * @param threshold Segmentation threshold (default: 0.0)
   * @return Lane segmentation masks in model resolution (320x640)
   */
  LaneSegmentation inference(
    const cv::Mat& input_image,
    float threshold = 0.0f
  );

  /**
   * @brief Get raw tensor output (for advanced users)
   * @return Pointer to raw output tensor [1, 3, height, width]
   */
  const float* getRawTensorData() const;

  /**
   * @brief Get output tensor shape
   * @return Shape as [batch, channels, height, width]
   */
  std::vector<int64_t> getTensorShape() const;

  // Model input dimensions (320x640 for EgoLanes)
  int getInputWidth() const { return model_input_width_; }
  int getInputHeight() const { return model_input_height_; }

  // Model output dimensions
  int getOutputWidth() const { return model_output_width_; }
  int getOutputHeight() const { return model_output_height_; }

private:
  /**
   * @brief Preprocess image for EgoLanes
   *
   * Resizes to 320x640, converts to RGB, normalizes with ImageNet stats, converts to CHW
   */
  void preprocessEgoLanes(const cv::Mat& input_image, float* buffer);

  /**
   * @brief Run ONNX Runtime inference
   */
  bool doInference(const cv::Mat& input_image);

  /**
   * @brief Post-process raw output
   *
   * Applies threshold and extracts lane masks from 3-channel output
   */
  LaneSegmentation postProcess(float threshold);

  // ONNX Runtime components
  std::unique_ptr<Ort::Session> session_;
  std::unique_ptr<Ort::MemoryInfo> memory_info_;

  // Input/Output tensor names (storage + pointers)
  std::string input_name_storage_;
  std::string output_name_storage_;
  std::vector<const char*> input_names_;
  std::vector<const char*> output_names_;

  // Model dimensions
  int model_input_width_;    // 640
  int model_input_height_;   // 320
  int model_output_width_;   // 640
  int model_output_height_;  // 320
  int model_output_channels_; // 3 (ego_left, ego_right, other_lanes)

  // Buffers
  std::vector<float> input_buffer_;
  std::vector<Ort::Value> output_tensors_;  // Output managed by ONNX Runtime

  // ImageNet normalization constants
  static constexpr std::array<float, 3> MEAN = {0.485f, 0.456f, 0.406f};
  static constexpr std::array<float, 3> STD = {0.229f, 0.224f, 0.225f};
};

}  // namespace autoware_pov::vision::egolanes

#endif  // AUTOWARE_POV_VISION_EGOLANES_ONNXRUNTIME_ENGINE_HPP_