Skip to content
Merged
1 change: 1 addition & 0 deletions perception/autoware_ptv3/config/ptv3.param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# network
trt_precision: fp16
cloud_capacity: 2000000
source_reconstruction: full
onnx_path: "$(var model_path)/ptv3.onnx"
Comment thread
amadeuszsz marked this conversation as resolved.
engine_path: "$(var model_path)/ptv3.engine"
filter:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,23 @@ class PostprocessCuda

void createVisualizationPointcloud(
const float * input_features, const std::int64_t * pred_labels, float * output_points,
std::size_t num_points);
std::size_t num_classes, std::size_t num_points);

void createSegmentationPointcloud(
const float * input_features, const std::int64_t * pred_labels, const float * pred_probs,
std::uint8_t * output_points, std::size_t num_classes, std::size_t num_points);

void reconstructPartial(
const std::int64_t * inverse_map, const std::int64_t * voxel_labels, const float * voxel_probs,
std::int64_t * output_labels, float * output_probs, std::size_t num_classes,
std::size_t num_cropped_points, std::size_t num_voxels);

void reconstructFull(
const std::uint32_t * crop_mask, const std::uint32_t * crop_indices,
const std::int64_t * inverse_map, const std::int64_t * voxel_labels, const float * voxel_probs,
std::int64_t * output_labels, float * output_probs, std::size_t num_classes,
std::size_t num_points, std::size_t num_voxels);

std::size_t createFilteredPointcloud(
const void * compact_input_points, CloudFormat input_format, CloudFormat output_format,
const float * pred_probs, void * output_points, std::size_t num_classes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ class PreprocessCuda
std::size_t generateFeatures(
const void * input_data, CloudFormat input_format, unsigned int num_points,
float * voxel_features, std::int64_t * voxel_coords, std::int64_t * voxel_hashes,
void * compact_points);
void * compact_points, float * reconstruction_features, void * cropped_source_points,
std::int64_t * inverse_map, std::size_t * num_cropped_points);

[[nodiscard]] const std::uint32_t * cropMask() const { return crop_mask_d_.get(); }
[[nodiscard]] const std::uint32_t * cropIndices() const { return crop_indices_d_.get(); }

private:
PTv3Config config_;
Expand Down
25 changes: 24 additions & 1 deletion perception/autoware_ptv3/include/autoware/ptv3/ptv3_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@
namespace autoware::ptv3
{

enum class SourceReconstruction {
NONE,
PARTIAL,
FULL,
};

class PTv3Config
{
public:
Expand All @@ -36,7 +42,8 @@ class PTv3Config
const std::vector<std::int64_t> & voxels_num, const std::vector<float> & point_cloud_range,
const std::vector<float> & voxel_size, const std::vector<std::string> & class_names,
const std::vector<std::int64_t> & palette, const float filter_class_probability_threshold,
const std::vector<std::string> & filter_classes, const std::string & filter_output_format)
const std::vector<std::string> & filter_classes, const std::string & filter_output_format,
const std::string & source_reconstruction)
{
plugins_path_ = plugins_path;

Expand Down Expand Up @@ -91,6 +98,21 @@ class PTv3Config
filter_class_probability_threshold_ = filter_class_probability_threshold;
filter_class_indices_ = make_filter_class_indices(class_names_, filter_classes);
filter_output_format_ = filter_output_format;
source_reconstruction_ = parse_source_reconstruction(source_reconstruction);
}

static SourceReconstruction parse_source_reconstruction(const std::string & value)
{
if (value == "none") {
return SourceReconstruction::NONE;
}
if (value == "partial") {
return SourceReconstruction::PARTIAL;
}
if (value == "full") {
return SourceReconstruction::FULL;
}
throw std::runtime_error("source_reconstruction must be one of: 'none', 'partial', or 'full'.");
}

static std::vector<std::uint32_t> make_filter_class_indices(
Expand Down Expand Up @@ -155,6 +177,7 @@ class PTv3Config
float filter_class_probability_threshold_{};
std::vector<std::uint32_t> filter_class_indices_;
std::string filter_output_format_;
SourceReconstruction source_reconstruction_{SourceReconstruction::NONE};

// Common network parameters
std::int64_t cloud_capacity_{};
Expand Down
8 changes: 8 additions & 0 deletions perception/autoware_ptv3/include/autoware/ptv3/ptv3_trt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,16 @@ class PTV3_PUBLIC PTv3TRT

// Preprocess outputs
std::int64_t num_voxels_{0};
std::int64_t num_cropped_points_{0}; // only for partial
std::int64_t num_source_points_{0}; // only for full
const void * current_input_data_{nullptr}; // only for full

CudaUniquePtr<std::uint8_t[]> compact_points_d_{nullptr};
CudaUniquePtr<std::uint8_t[]> cropped_source_points_d_{nullptr}; // only for partial
CudaUniquePtr<float[]> reconstructed_features_d_{nullptr}; // only for partial and full
CudaUniquePtr<std::int64_t[]> inverse_map_d_{nullptr}; // only for partial and full
CudaUniquePtr<std::int64_t[]> reconstructed_labels_d_{nullptr}; // only for partial and full
CudaUniquePtr<float[]> reconstructed_probs_d_{nullptr}; // only for partial and full
CudaUniquePtr<std::int64_t[]> grid_coord_d_{nullptr};
CudaUniquePtr<float[]> feat_d_{nullptr};
CudaUniquePtr<std::int64_t[]> serialized_code_d_{nullptr};
Expand Down
8 changes: 4 additions & 4 deletions perception/autoware_ptv3/launch/ptv3.launch.xml

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Temporary changes made it into the PR

Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@
<group unless="$(var use_pointcloud_container)">
<node pkg="autoware_ptv3" exec="autoware_ptv3_node" name="ptv3" output="screen" args="--ros-args --log-level $(var log_level)">
<remap from="~/input/pointcloud" to="$(var input/pointcloud)"/>
<remap from="~/input/pointcloud/cuda" to="$(var input/pointcloud)/cuda"/>
<remap from="~/input/pointcloud/cuda" to="$(var input/pointcloud)/cuda/dummy"/>
<remap from="~/output/pointcloud/segmentation" to="$(var output/pointcloud/segmentation)"/>
<remap from="~/output/pointcloud/segmentation/cuda" to="$(var output/pointcloud/segmentation)/cuda"/>
<remap from="~/output/pointcloud/segmentation/cuda" to="$(var output/pointcloud/segmentation)/cuda/dummy"/>
<remap from="~/output/pointcloud/visualization" to="$(var output/pointcloud/visualization)"/>
<remap from="~/output/pointcloud/visualization/cuda" to="$(var output/pointcloud/visualization)/cuda"/>
<remap from="~/output/pointcloud/visualization/cuda" to="$(var output/pointcloud/visualization)/cuda/dummy"/>
<remap from="~/output/pointcloud/filtered" to="$(var output/pointcloud/filtered)"/>
<remap from="~/output/pointcloud/filtered/cuda" to="$(var output/pointcloud/filtered)/cuda"/>
<remap from="~/output/pointcloud/filtered/cuda" to="$(var output/pointcloud/filtered)/cuda/dummy"/>

<param from="$(var model_param_path)" allow_substs="true"/>
<param from="$(var ml_package_param_path)" allow_substs="true"/>
Expand Down
112 changes: 106 additions & 6 deletions perception/autoware_ptv3/lib/postprocess/postprocess_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,21 @@ struct OutputSegmentationPointType
float z;
std::uint8_t class_id;
float probability;
float entropy;
Comment thread
mojomex marked this conversation as resolved.
} __attribute__((packed));

__global__ void createVisualizationPointcloudKernel(
const float4 * input_features, const float * colors, const std::int64_t * labels,
float4 * output_points, std::size_t num_points)
float4 * output_points, std::size_t num_classes, std::size_t num_points)
{
const auto idx = static_cast<std::uint32_t>(blockIdx.x * blockDim.x + threadIdx.x);
if (idx >= num_points) {
return;
}

const auto label = labels[idx];
const auto color = colors[label];
const auto color =
label >= 0 && static_cast<std::size_t>(label) < num_classes ? colors[label] : 0.0f;

output_points[idx] =
make_float4(input_features[idx].x, input_features[idx].y, input_features[idx].z, color);
Expand All @@ -55,11 +57,78 @@ __global__ void createSegmentationPointcloudKernel(

const auto input_point = input_features[idx];
const auto label = labels[idx];
const bool has_valid_label = label >= 0 && static_cast<std::size_t>(label) < num_classes;
float entropy = 0.0f;
for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
const auto probability = pred_probs[idx * num_classes + class_idx];
if (probability > 0.0f) {
entropy -= probability * logf(probability);
}
}
if (num_classes > 1) {
entropy /= logf(static_cast<float>(num_classes));
}
output_points[idx].x = input_point.x;
output_points[idx].y = input_point.y;
output_points[idx].z = input_point.z;
output_points[idx].class_id = static_cast<std::uint8_t>(label);
output_points[idx].probability = pred_probs[idx * num_classes + label];
output_points[idx].class_id = has_valid_label ? static_cast<std::uint8_t>(label) : 255U;
output_points[idx].probability = has_valid_label ? pred_probs[idx * num_classes + label] : 0.0f;
output_points[idx].entropy = entropy;
}

__global__ void reconstructPartialKernel(
const std::int64_t * __restrict__ inverse_map, const std::int64_t * __restrict__ voxel_labels,
const float * __restrict__ voxel_probs, std::int64_t * __restrict__ output_labels,
float * __restrict__ output_probs, std::size_t num_classes, std::size_t num_cropped_points,
std::size_t num_voxels)
{
const auto point_idx = blockIdx.x * blockDim.y + threadIdx.y;
const auto class_idx = blockIdx.y * blockDim.x + threadIdx.x;

if (point_idx >= num_cropped_points || class_idx >= num_classes) return;

const auto voxel_idx = inverse_map[point_idx];
const bool has_valid_voxel = voxel_idx >= 0 && static_cast<std::size_t>(voxel_idx) < num_voxels;
if (class_idx == 0) {
output_labels[point_idx] = has_valid_voxel ? voxel_labels[voxel_idx] : 255;
}

output_probs[point_idx * num_classes + class_idx] =
has_valid_voxel ? voxel_probs[static_cast<std::size_t>(voxel_idx) * num_classes + class_idx]
: 0.0f;
}

__global__ void reconstructFullKernel(
const std::uint32_t * __restrict__ crop_mask, const std::uint32_t * __restrict__ crop_indices,
const std::int64_t * __restrict__ inverse_map, const std::int64_t * __restrict__ voxel_labels,
const float * __restrict__ voxel_probs, std::int64_t * __restrict__ output_labels,
float * __restrict__ output_probs, std::size_t num_classes, std::size_t num_points,
std::size_t num_voxels)
{
const auto point_idx = blockIdx.x * blockDim.y + threadIdx.y;
const auto class_idx = blockIdx.y * blockDim.x + threadIdx.x;

if (point_idx >= num_points || class_idx >= num_classes) return;

const auto mask = crop_mask[point_idx];
if (mask == 0) {
if (class_idx == 0) output_labels[point_idx] = 255;
return;
}

const auto cropped_idx = crop_indices[point_idx] - 1;
const auto voxel_idx = inverse_map[cropped_idx];
if (voxel_idx < 0 || static_cast<std::size_t>(voxel_idx) >= num_voxels) {
if (class_idx == 0) output_labels[point_idx] = 255;
return;
}

if (class_idx == 0) {
output_labels[point_idx] = voxel_labels[voxel_idx];
}

output_probs[point_idx * num_classes + class_idx] =
voxel_probs[static_cast<std::size_t>(voxel_idx) * num_classes + class_idx];
}

template <typename OutputPointT>
Expand Down Expand Up @@ -213,13 +282,13 @@ PostprocessCuda::PostprocessCuda(const PTv3Config & config, cudaStream_t stream)

void PostprocessCuda::createVisualizationPointcloud(
const float * input_features, const std::int64_t * labels, float * output_points,
std::size_t num_points)
std::size_t num_classes, std::size_t num_points)
{
auto num_blocks = divup(num_points, config_.threads_per_block_);

createVisualizationPointcloudKernel<<<num_blocks, config_.threads_per_block_, 0, stream_>>>(
reinterpret_cast<const float4 *>(input_features), color_map_d_.get(), labels,
reinterpret_cast<float4 *>(output_points), num_points);
reinterpret_cast<float4 *>(output_points), num_classes, num_points);

CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
}
Expand All @@ -237,6 +306,37 @@ void PostprocessCuda::createSegmentationPointcloud(
CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
}

void PostprocessCuda::reconstructPartial(
const std::int64_t * inverse_map, const std::int64_t * voxel_labels, const float * voxel_probs,
std::int64_t * output_labels, float * output_probs, std::size_t num_classes,
std::size_t num_cropped_points, std::size_t num_voxels)
{
auto block = dim3(32, 8);
auto grid = dim3(divup(num_cropped_points, block.y), divup(num_classes, block.x));

reconstructPartialKernel<<<grid, block, 0, stream_>>>(
inverse_map, voxel_labels, voxel_probs, output_labels, output_probs, num_classes,
num_cropped_points, num_voxels);

CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
}

void PostprocessCuda::reconstructFull(
const std::uint32_t * crop_mask, const std::uint32_t * crop_indices,
const std::int64_t * inverse_map, const std::int64_t * voxel_labels, const float * voxel_probs,
std::int64_t * output_labels, float * output_probs, std::size_t num_classes,
std::size_t num_points, std::size_t num_voxels)
{
auto block = dim3(32, 8);
auto grid = dim3(divup(num_points, block.y), divup(num_classes, block.x));

reconstructFullKernel<<<grid, block, 0, stream_>>>(
crop_mask, crop_indices, inverse_map, voxel_labels, voxel_probs, output_labels, output_probs,
num_classes, num_points, num_voxels);

CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
}

std::size_t PostprocessCuda::createFilteredPointcloud(
const void * compact_input_points, CloudFormat input_format, CloudFormat output_format,
const float * pred_probs, void * output_points, std::size_t num_classes, std::size_t num_points)
Expand Down
Loading
Loading