diff --git a/perception/autoware_bevfusion/CMakeLists.txt b/perception/autoware_bevfusion/CMakeLists.txt
index d5e87aeff3399..01e1e79731b19 100644
--- a/perception/autoware_bevfusion/CMakeLists.txt
+++ b/perception/autoware_bevfusion/CMakeLists.txt
@@ -101,6 +101,7 @@ if(TRT_AVAIL AND CUDA_AVAIL AND SPCONV_AVAIL)
     lib/postprocess/circle_nms_kernel.cu
     lib/postprocess/postprocess_kernel.cu
     lib/preprocess/preprocess_kernel.cu
+    lib/camera/camera_preprocess_kernel.cu
   )
 
   target_link_libraries(${PROJECT_NAME}_cuda_lib
@@ -120,6 +121,8 @@ if(TRT_AVAIL AND CUDA_AVAIL AND SPCONV_AVAIL)
     lib/preprocess/precomputed_features.cpp
     lib/ros_utils.cpp
     lib/bevfusion_trt.cpp
+    lib/camera/camera_matrices.cpp
+    lib/camera/camera_data.cpp
   )
 
   target_compile_definitions(${PROJECT_NAME}_lib PRIVATE
diff --git a/perception/autoware_bevfusion/config/bevfusion_camera_lidar.param.yaml b/perception/autoware_bevfusion/config/bevfusion_camera_lidar.param.yaml
index feddd816165bd..fb10df36131c4 100644
--- a/perception/autoware_bevfusion/config/bevfusion_camera_lidar.param.yaml
+++ b/perception/autoware_bevfusion/config/bevfusion_camera_lidar.param.yaml
@@ -18,6 +18,8 @@
     # pre-process params
     densification_num_past_frames: 1
     densification_world_frame_id: map
+    run_image_undistortion: true
+
     # post-process params
     circle_nms_dist_threshold: 0.5
     iou_nms_search_distance_2d: 10.0
diff --git a/perception/autoware_bevfusion/config/bevfusion_lidar.param.yaml b/perception/autoware_bevfusion/config/bevfusion_lidar.param.yaml
index 7d062cbd71e2d..9039aca4f99a9 100644
--- a/perception/autoware_bevfusion/config/bevfusion_lidar.param.yaml
+++ b/perception/autoware_bevfusion/config/bevfusion_lidar.param.yaml
@@ -18,6 +18,8 @@
     # pre-process params
     densification_num_past_frames: 1
     densification_world_frame_id: map
+    run_image_undistortion: false
+
     # post-process params
     circle_nms_dist_threshold: 0.5
     iou_nms_search_distance_2d: 10.0
diff --git a/perception/autoware_bevfusion/include/autoware/bevfusion/bevfusion_node.hpp b/perception/autoware_bevfusion/include/autoware/bevfusion/bevfusion_node.hpp
index 048bb7fff827a..b520c804b61f7 100644
--- a/perception/autoware_bevfusion/include/autoware/bevfusion/bevfusion_node.hpp
+++ b/perception/autoware_bevfusion/include/autoware/bevfusion/bevfusion_node.hpp
@@ -16,6 +16,7 @@
 #define AUTOWARE__BEVFUSION__BEVFUSION_NODE_HPP_
 
 #include "autoware/bevfusion/bevfusion_trt.hpp"
+#include "autoware/bevfusion/camera/camera_data.hpp"
 #include "autoware/bevfusion/detection_class_remapper.hpp"
 #include "autoware/bevfusion/postprocess/non_maximum_suppression.hpp"
 #include "autoware/bevfusion/preprocess/pointcloud_densification.hpp"
@@ -67,7 +68,8 @@ class BEVFUSION_PUBLIC BEVFusionNode : public rclcpp::Node
   void diagnoseProcessingTime(diagnostic_updater::DiagnosticStatusWrapper & stat);
 
   // Helper methods for constructor
-  void initializeSensorFusionSubscribers(std::int64_t num_cameras);
+  void initializeSensorFusionSubscribers(
+    std::int64_t num_cameras, const ImagePreProcessingParams & image_pre_processing_params);
   void validateParameters(
     const std::vector<float> & point_cloud_range, const std::vector<float> & voxel_size);
 
@@ -100,6 +102,11 @@ class BEVFUSION_PUBLIC BEVFusionNode : public rclcpp::Node
   std::vector<rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::ConstSharedPtr> camera_info_subs_;
   rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr objects_pub_{
     nullptr};
+  // unique_ptr to avoid copying the actual camera data in memory since there's gpu buffer in the
+  // camera data
+  std::vector<std::unique_ptr<CameraData>> camera_data_ptrs_;
+  // One CameraMatrices object can be shared by several CameraData
+  std::vector<std::shared_ptr<CameraMatrices>> camera_matrices_ptrs_;
 
   tf2_ros::Buffer tf_buffer_;
   tf2_ros::TransformListener tf_listener_{tf_buffer_};
@@ -109,9 +116,7 @@ class BEVFUSION_PUBLIC BEVFusionNode : public rclcpp::Node
   std::optional<std::string> lidar_frame_;
   float max_camera_lidar_delay_;
 
-  std::vector<sensor_msgs::msg::Image::ConstSharedPtr> image_msgs_;
   std::vector<float> camera_masks_;
-  std::vector<std::optional<sensor_msgs::msg::CameraInfo>> camera_info_msgs_;
   std::vector<std::optional<Matrix4f>> lidar2camera_extrinsics_;
 
   bool sensor_fusion_{false};
diff --git a/perception/autoware_bevfusion/include/autoware/bevfusion/bevfusion_trt.hpp b/perception/autoware_bevfusion/include/autoware/bevfusion/bevfusion_trt.hpp
index 56f40655f1e0b..df6e44ea3b196 100644
--- a/perception/autoware_bevfusion/include/autoware/bevfusion/bevfusion_trt.hpp
+++ b/perception/autoware_bevfusion/include/autoware/bevfusion/bevfusion_trt.hpp
@@ -15,6 +15,7 @@
 #ifndef AUTOWARE__BEVFUSION__BEVFUSION_TRT_HPP_
 #define AUTOWARE__BEVFUSION__BEVFUSION_TRT_HPP_
 
+#include "autoware/bevfusion/camera/camera_data.hpp"
 #include "autoware/bevfusion/postprocess/postprocess_kernel.hpp"
 #include "autoware/bevfusion/preprocess/pointcloud_densification.hpp"
 #include "autoware/bevfusion/preprocess/preprocess_kernel.hpp"
@@ -87,7 +88,7 @@ class BEVFUSION_PUBLIC BEVFusionTRT
 
   bool detect(
     const std::shared_ptr<const cuda_blackboard::CudaPointCloud2> & input_pointcloud_msg_ptr,
-    const std::vector<sensor_msgs::msg::Image::ConstSharedPtr> & image_msgs,
+    const std::vector<std::unique_ptr<CameraData>> & camera_data_ptrs,
     const std::vector<float> & camera_masks, const tf2_ros::Buffer & tf_buffer,
     std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing,
     bool & is_num_voxels_within_range);
@@ -102,7 +103,7 @@ class BEVFUSION_PUBLIC BEVFusionTRT
 
   bool preProcess(
     const std::shared_ptr<const cuda_blackboard::CudaPointCloud2> & pc_msg_ptr,
-    const std::vector<sensor_msgs::msg::Image::ConstSharedPtr> & image_msgs,
+    const std::vector<std::unique_ptr<CameraData>> & camera_data_ptrs,
     const std::vector<float> & camera_masks, const tf2_ros::Buffer & tf_buffer,
     bool & is_num_voxels_within_range);
 
@@ -111,8 +112,11 @@ class BEVFUSION_PUBLIC BEVFusionTRT
 
   void clearDeviceMemory();
 
-  void processImages(
-    const std::vector<sensor_msgs::msg::Image::ConstSharedPtr> & image_msgs,
+  bool checkImageCameraMatricesReady(
+    const std::vector<std::unique_ptr<CameraData>> & camera_data_ptrs);
+
+  bool processImages(
+    const std::vector<std::unique_ptr<CameraData>> & camera_data_ptrs,
     const std::vector<float> & camera_masks);
 
   std::int64_t processPointCloudVoxelization(
@@ -140,7 +144,6 @@ class BEVFUSION_PUBLIC BEVFusionTRT
   std::unique_ptr<PreprocessCuda> pre_ptr_{nullptr};
   std::unique_ptr<PostprocessCuda> post_ptr_{nullptr};
   cudaStream_t stream_{nullptr};
-  std::vector<cudaStream_t> camera_streams_{};
 
   BEVFusionConfig config_;
   std::vector<int> roi_start_y_vector_;
@@ -171,7 +174,6 @@ class BEVFUSION_PUBLIC BEVFusionTRT
 
   // image buffers
   CudaUniquePtr<std::uint8_t[]> roi_tensor_d_{nullptr};
-  std::vector<CudaUniquePtr<std::uint8_t[]>> image_buffers_d_{};
   CudaUniquePtr<float[]> camera_masks_d_{nullptr};
 
   // image feature buffers for fusion model
diff --git a/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_data.hpp b/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_data.hpp
new file mode 100644
index 0000000000000..1262571941251
--- /dev/null
+++ b/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_data.hpp
@@ -0,0 +1,104 @@
+// Copyright 2026 TIER IV
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__BEVFUSION__CAMERA__CAMERA_DATA_HPP_
+#define AUTOWARE__BEVFUSION__CAMERA__CAMERA_DATA_HPP_
+
+#include "autoware/bevfusion/camera/camera_matrices.hpp"
+#include "autoware/bevfusion/camera/camera_preprocess.hpp"
+
+#include <autoware/cuda_utils/cuda_check_error.hpp>
+#include <autoware/cuda_utils/cuda_unique_ptr.hpp>
+#include <rclcpp/rclcpp.hpp>
+
+#include <sensor_msgs/msg/camera_info.hpp>
+#include <sensor_msgs/msg/compressed_image.hpp>
+#include <sensor_msgs/msg/image.hpp>
+
+#include <memory>
+#include <string>
+
+namespace autoware::bevfusion
+{
+using autoware::cuda_utils::CudaUniquePtr;
+
+struct ImagePreProcessingParams
+{
+  std::int64_t original_image_height;
+  std::int64_t original_image_width;
+  std::int64_t roi_height;
+  std::int64_t roi_width;
+  float image_aug_scale_y;
+  float image_aug_scale_x;
+  bool run_image_undistortion;
+  std::int64_t crop_height;
+  std::int64_t crop_width;
+  std::int64_t resized_height;
+  std::int64_t resized_width;
+  std::int64_t roi_start_y;
+  std::int64_t roi_start_x;
+
+  ImagePreProcessingParams(
+    const std::int64_t original_image_height, const std::int64_t original_image_width,
+    const std::int64_t roi_height, const std::int64_t roi_width, const float image_aug_scale_y,
+    const float image_aug_scale_x, const bool run_image_undistortion);
+};
+
+class CameraData
+{
+public:
+  CameraData(
+    rclcpp::Node * node, const int camera_id,
+    const ImagePreProcessingParams & image_pre_processing_params,
+    const std::shared_ptr<CameraMatrices> & camera_matrices_ptr);
+  ~CameraData();
+
+  void update_image_msg(const sensor_msgs::msg::Image::ConstSharedPtr & input_camera_image_msg);
+  void update_camera_info(const sensor_msgs::msg::CameraInfo & input_camera_info_msg);
+  std::optional<sensor_msgs::msg::CameraInfo> camera_info() const;
+  sensor_msgs::msg::CameraInfo camera_info_value() const;
+
+  sensor_msgs::msg::Image::ConstSharedPtr image_msg() const;
+  bool is_image_msg_available() const;
+  bool is_camera_info_available() const;
+  std::size_t output_img_offset() const;
+
+  bool preprocess_image(std::uint8_t * output_img);
+  bool is_camera_matrices_ready() const;
+  cudaError_t sync_cuda_stream();
+
+private:
+  // Camera info buffer
+  std::optional<sensor_msgs::msg::CameraInfo> camera_info_;
+
+  // Image buffer
+  // TODO(KokSeang): Remove this once we move preprocessing to subscribers
+  sensor_msgs::msg::Image::ConstSharedPtr image_msg_;
+
+  // GPU Memory for preprocessed image
+  // image buffers
+  CudaUniquePtr<std::uint8_t[]> image_buffer_d_{nullptr};
+  CudaUniquePtr<std::uint8_t[]> undistorted_image_buffer_d_{nullptr};
+
+  rclcpp::Logger logger_;
+  int camera_id_;
+  ImagePreProcessingParams image_pre_processing_params_;
+  std::size_t output_img_offset_;
+
+  cudaStream_t stream_{nullptr};
+  std::unique_ptr<CameraPreprocess> camera_preprocess_ptr_{nullptr};
+  std::shared_ptr<CameraMatrices> camera_matrices_ptr_{nullptr};
+};
+}  // namespace autoware::bevfusion
+#endif  // AUTOWARE__BEVFUSION__CAMERA__CAMERA_DATA_HPP_
diff --git a/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_matrices.hpp b/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_matrices.hpp
new file mode 100644
index 0000000000000..0b895aea2252b
--- /dev/null
+++ b/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_matrices.hpp
@@ -0,0 +1,46 @@
+// Copyright 2026 TIER IV
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__BEVFUSION__CAMERA__CAMERA_MATRICES_HPP_
+#define AUTOWARE__BEVFUSION__CAMERA__CAMERA_MATRICES_HPP_
+
+#include <autoware/cuda_utils/cuda_unique_ptr.hpp>
+#include <opencv2/opencv.hpp>
+
+#include <sensor_msgs/msg/camera_info.hpp>
+
+namespace autoware::bevfusion
+{
+using autoware::cuda_utils::CudaUniquePtr;
+
+// Helper struct for camera matrices
+struct CameraMatrices
+{
+  cv::Mat K;
+  cv::Mat D;
+  cv::Mat P;
+  int map_width;
+  int map_height;
+  bool matrices_ready{false};
+
+  // GPU Memory for undistortion maps and they only need to be computed once for each camera
+  CudaUniquePtr<float[]> undistorted_map_x_d_{nullptr};
+  CudaUniquePtr<float[]> undistorted_map_y_d_{nullptr};
+
+  CameraMatrices();
+  void update_camera_matrices(const sensor_msgs::msg::CameraInfo & camera_info);
+  void compute_undistorted_map_x_y();
+};
+}  // namespace autoware::bevfusion
+#endif  // AUTOWARE__BEVFUSION__CAMERA__CAMERA_MATRICES_HPP_
diff --git a/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_preprocess.hpp b/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_preprocess.hpp
new file mode 100644
index 0000000000000..70e30ff2962f5
--- /dev/null
+++ b/perception/autoware_bevfusion/include/autoware/bevfusion/camera/camera_preprocess.hpp
@@ -0,0 +1,50 @@
+// Copyright 2026 TIER IV
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__BEVFUSION__CAMERA__CAMERA_PREPROCESS_HPP_
+#define AUTOWARE__BEVFUSION__CAMERA__CAMERA_PREPROCESS_HPP_
+
+#include "autoware/bevfusion/utils.hpp"
+
+#include <autoware/cuda_utils/cuda_check_error.hpp>
+
+#include <cstdint>
+
+namespace autoware::bevfusion
+{
+
+class CameraPreprocess
+{
+public:
+  CameraPreprocess(cudaStream_t stream, const int camera_id);
+
+  cudaError_t resizeAndExtractRoi_launch(
+    const std::uint8_t * input_img, std::uint8_t * output_img, int H,
+    int W,                    // Original image dimensions
+    int H2, int W2,           // Resized image dimensions
+    int H3, int W3,           // ROI dimensions
+    int y_start, int x_start  // ROI top-left coordinates in resized image
+  );
+  cudaError_t remap_launch(
+    const std::uint8_t * input_img, std::uint8_t * output_img, int output_height,
+    int output_width,                   // Output (destination) image dimensions
+    int input_height, int input_width,  // Input (source) image dimensions
+    const float * map_x, const float * map_y);
+
+private:
+  cudaStream_t stream_;
+  int camera_id_;
+};
+}  // namespace autoware::bevfusion
+#endif  // AUTOWARE__BEVFUSION__CAMERA__CAMERA_PREPROCESS_HPP_
diff --git a/perception/autoware_bevfusion/include/autoware/bevfusion/preprocess/preprocess_kernel.hpp b/perception/autoware_bevfusion/include/autoware/bevfusion/preprocess/preprocess_kernel.hpp
index ad1dcdfde11a6..22d9c06a5bd96 100644
--- a/perception/autoware_bevfusion/include/autoware/bevfusion/preprocess/preprocess_kernel.hpp
+++ b/perception/autoware_bevfusion/include/autoware/bevfusion/preprocess/preprocess_kernel.hpp
@@ -59,10 +59,6 @@ class PreprocessCuda
     const float * points, unsigned int points_size, float * voxel_features,
     std::int32_t * voxel_coords, std::int32_t * num_points_per_voxel);
 
-  cudaError_t resizeAndExtractRoi_launch(
-    const std::uint8_t * input_img, std::uint8_t * output_img, int H, int W, int H2, int W2, int H3,
-    int W3, int y_start, int x_start, cudaStream_t stream);
-
 private:
   BEVFusionConfig config_;
   cudaStream_t stream_;
diff --git a/perception/autoware_bevfusion/launch/bevfusion.launch.xml b/perception/autoware_bevfusion/launch/bevfusion.launch.xml
index 01bddca4b1082..5b130423fde60 100644
--- a/perception/autoware_bevfusion/launch/bevfusion.launch.xml
+++ b/perception/autoware_bevfusion/launch/bevfusion.launch.xml
@@ -74,18 +74,18 @@
       <param from="$(var class_remapper_param_path)"/>
       <param from="$(var common_param_path)"/>
 
-      <remap from="~/input/camera_info0" to="$(var camera_info0)"/>
-      <remap from="~/input/camera_info1" to="$(var camera_info1)"/>
-      <remap from="~/input/camera_info2" to="$(var camera_info2)"/>
-      <remap from="~/input/camera_info3" to="$(var camera_info3)"/>
-      <remap from="~/input/camera_info4" to="$(var camera_info4)"/>
-      <remap from="~/input/camera_info5" to="$(var camera_info5)"/>
-      <remap from="~/input/image0" to="$(var image0)"/>
-      <remap from="~/input/image1" to="$(var image1)"/>
-      <remap from="~/input/image2" to="$(var image2)"/>
-      <remap from="~/input/image3" to="$(var image3)"/>
-      <remap from="~/input/image4" to="$(var image4)"/>
-      <remap from="~/input/image5" to="$(var image5)"/>
+      <remap from="~/input/camera_info0" to="$(var bevfusion_camera_info0)"/>
+      <remap from="~/input/camera_info1" to="$(var bevfusion_camera_info1)"/>
+      <remap from="~/input/camera_info2" to="$(var bevfusion_camera_info2)"/>
+      <remap from="~/input/camera_info3" to="$(var bevfusion_camera_info3)"/>
+      <remap from="~/input/camera_info4" to="$(var bevfusion_camera_info4)"/>
+      <remap from="~/input/camera_info5" to="$(var bevfusion_camera_info5)"/>
+      <remap from="~/input/image0" to="$(var bevfusion_image0)"/>
+      <remap from="~/input/image1" to="$(var bevfusion_image1)"/>
+      <remap from="~/input/image2" to="$(var bevfusion_image2)"/>
+      <remap from="~/input/image3" to="$(var bevfusion_image3)"/>
+      <remap from="~/input/image4" to="$(var bevfusion_image4)"/>
+      <remap from="~/input/image5" to="$(var bevfusion_image5)"/>
       <param name="use_compressed_images" value="$(var use_compressed_images)"/>
       <!-- This parameter shall NOT be included in param file. See also: https://github.com/autowarefoundation/autoware.universe/pull/6167 -->
       <param name="build_only" value="$(var build_only)"/>
diff --git a/perception/autoware_bevfusion/lib/bevfusion_trt.cpp b/perception/autoware_bevfusion/lib/bevfusion_trt.cpp
index 620e162ee1fe5..dc34c99cbfef9 100644
--- a/perception/autoware_bevfusion/lib/bevfusion_trt.cpp
+++ b/perception/autoware_bevfusion/lib/bevfusion_trt.cpp
@@ -45,10 +45,6 @@ BEVFusionTRT::BEVFusionTRT(
 {
   // Create and init cuda streams
   CHECK_CUDA_ERROR(cudaStreamCreate(&stream_));
-  camera_streams_.resize(config_.num_cameras_);
-  for (std::int64_t i = 0; i < config_.num_cameras_; i++) {
-    CHECK_CUDA_ERROR(cudaStreamCreate(&camera_streams_[i]));
-  }
   vg_ptr_ = std::make_unique<VoxelGenerator>(densification_param, config_, stream_);
 
   stop_watch_ptr_ =
@@ -65,13 +61,6 @@ BEVFusionTRT::~BEVFusionTRT()
     cudaStreamSynchronize(stream_);
     cudaStreamDestroy(stream_);
   }
-
-  for (std::int64_t i = 0; i < config_.num_cameras_; i++) {
-    if (camera_streams_[i]) {
-      cudaStreamSynchronize(camera_streams_[i]);
-      cudaStreamDestroy(camera_streams_[i]);
-    }
-  }
 }
 
 void BEVFusionTRT::initPtr()
@@ -112,11 +101,6 @@ void BEVFusionTRT::initPtr()
     roi_tensor_d_ = autoware::cuda_utils::make_unique<std::uint8_t[]>(
       config_.num_cameras_ * config_.roi_height_ * config_.roi_width_ *
       BEVFusionConfig::kNumRGBChannels);
-    for (std::int64_t camera_id = 0; camera_id < config_.num_cameras_; camera_id++) {
-      image_buffers_d_.emplace_back(
-        autoware::cuda_utils::make_unique<std::uint8_t[]>(
-          config_.raw_image_height_ * config_.raw_image_width_ * BEVFusionConfig::kNumRGBChannels));
-    }
     camera_masks_d_ = autoware::cuda_utils::make_unique<float[]>(config_.num_cameras_);
 
     // buffers for fusion model with separate image backbone
@@ -368,13 +352,14 @@ void BEVFusionTRT::setSensorFusionTensorAddresses()
 
 bool BEVFusionTRT::detect(
   const std::shared_ptr<const cuda_blackboard::CudaPointCloud2> & pc_msg_ptr,
-  const std::vector<sensor_msgs::msg::Image::ConstSharedPtr> & image_msgs,
+  const std::vector<std::unique_ptr<CameraData>> & camera_data_ptrs,
   const std::vector<float> & camera_masks, const tf2_ros::Buffer & tf_buffer,
   std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing,
   bool & is_num_voxels_within_range)
 {
   stop_watch_ptr_->toc("processing/inner", true);
-  if (!preProcess(pc_msg_ptr, image_msgs, camera_masks, tf_buffer, is_num_voxels_within_range)) {
+  if (!preProcess(
+        pc_msg_ptr, camera_data_ptrs, camera_masks, tf_buffer, is_num_voxels_within_range)) {
     RCLCPP_ERROR(rclcpp::get_logger("bevfusion"), "Pre-process failed. Skipping detection.");
     return false;
   }
@@ -454,19 +439,20 @@ void BEVFusionTRT::setIntrinsicsExtrinsics(
   assert(num_geom_feats_ == BEVFusionConfig::kTransformMatrixDim * num_ranks_);
   assert(num_ranks_ == num_indices_);
 
-  cudaMemcpy(
+  CHECK_CUDA_ERROR(cudaMemcpy(
     lidar2image_d_.get(), lidar2images_flattened.data(),
     config_.num_cameras_ * BEVFusionConfig::kTransformMatrixDim *
       BEVFusionConfig::kTransformMatrixDim * sizeof(float),
-    cudaMemcpyHostToDevice);
-  cudaMemcpy(
+    cudaMemcpyHostToDevice));
+  CHECK_CUDA_ERROR(cudaMemcpy(
     geom_feats_d_.get(), geom_feats.data(), num_geom_feats_ * sizeof(std::int32_t),
-    cudaMemcpyHostToDevice);
-  cudaMemcpy(kept_d_.get(), kept.data(), num_kept_ * sizeof(std::uint8_t), cudaMemcpyHostToDevice);
-  cudaMemcpy(
-    ranks_d_.get(), ranks.data(), num_ranks_ * sizeof(std::int64_t), cudaMemcpyHostToDevice);
-  cudaMemcpy(
-    indices_d_.get(), indices.data(), num_indices_ * sizeof(std::int64_t), cudaMemcpyHostToDevice);
+    cudaMemcpyHostToDevice));
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    kept_d_.get(), kept.data(), num_kept_ * sizeof(std::uint8_t), cudaMemcpyHostToDevice));
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    ranks_d_.get(), ranks.data(), num_ranks_ * sizeof(std::int64_t), cudaMemcpyHostToDevice));
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    indices_d_.get(), indices.data(), num_indices_ * sizeof(std::int64_t), cudaMemcpyHostToDevice));
 
   // Copy img_aug_matrix data for fusion model (fusion model always uses separate image backbone)
   // Each Matrix4fRowM is contiguous, copy each matrix directly to its position in device memory
@@ -475,9 +461,9 @@ void BEVFusionTRT::setIntrinsicsExtrinsics(
       BEVFusionConfig::kTransformMatrixDim * BEVFusionConfig::kTransformMatrixDim;
 
     for (std::int64_t i = 0; i < config_.num_cameras_; i++) {
-      cudaMemcpy(
+      CHECK_CUDA_ERROR(cudaMemcpy(
         img_aug_matrix_d_.get() + i * matrix_size, img_aug_matrices_[i].data(),
-        matrix_size * sizeof(float), cudaMemcpyHostToDevice);
+        matrix_size * sizeof(float), cudaMemcpyHostToDevice));
     }
   }
 }
@@ -514,42 +500,36 @@ void BEVFusionTRT::clearDeviceMemory()
   CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
 }
 
-void BEVFusionTRT::processImages(
-  const std::vector<sensor_msgs::msg::Image::ConstSharedPtr> & image_msgs,
-  const std::vector<float> & camera_masks)
+bool BEVFusionTRT::checkImageCameraMatricesReady(
+  const std::vector<std::unique_ptr<CameraData>> & camera_data_ptrs)
 {
-  if (!config_.sensor_fusion_) {
-    return;
-  }
-
-  // TODO(knzo25): move this to each image callback
-  const int start_x =
-    std::max(0, static_cast<int>(config_.resized_width_) - static_cast<int>(config_.roi_width_)) /
-    2;
-
   for (std::int64_t camera_id = 0; camera_id < config_.num_cameras_; camera_id++) {
-    const int start_y = roi_start_y_vector_[camera_id];
-    cudaMemcpyAsync(
-      image_buffers_d_[camera_id].get(), image_msgs[camera_id]->data.data(),
-      config_.raw_image_height_ * config_.raw_image_width_ * BEVFusionConfig::kNumRGBChannels,
-      cudaMemcpyHostToDevice, stream_);
-
-    pre_ptr_->resizeAndExtractRoi_launch(
-      image_buffers_d_[camera_id].get(),
-      &roi_tensor_d_
-        [camera_id * config_.roi_height_ * config_.roi_width_ * BEVFusionConfig::kNumRGBChannels],
-      config_.raw_image_height_, config_.raw_image_width_, config_.resized_height_,
-      config_.resized_width_, config_.roi_height_, config_.roi_width_, start_y, start_x,
-      camera_streams_[camera_id]);
+    if (!camera_data_ptrs[camera_id]->is_camera_matrices_ready()) {
+      return false;
+    }
   }
+  return true;
+}
 
-  for (std::int64_t i = 0; i < config_.num_cameras_; i++) {
-    cudaStreamSynchronize(camera_streams_[i]);
+bool BEVFusionTRT::processImages(
+  const std::vector<std::unique_ptr<CameraData>> & camera_data_ptrs,
+  const std::vector<float> & camera_masks)
+{
+  for (std::int64_t camera_id = 0; camera_id < config_.num_cameras_; camera_id++) {
+    auto roi_tensor_offset = camera_data_ptrs[camera_id]->output_img_offset();
+    if (!camera_data_ptrs[camera_id]->preprocess_image(&roi_tensor_d_[roi_tensor_offset])) {
+      return false;
+    }
   }
 
-  cudaMemcpyAsync(
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
     camera_masks_d_.get(), camera_masks.data(), config_.num_cameras_ * sizeof(float),
-    cudaMemcpyHostToDevice, stream_);
+    cudaMemcpyHostToDevice, stream_));
+
+  for (std::int64_t camera_id = 0; camera_id < config_.num_cameras_; camera_id++) {
+    CHECK_CUDA_ERROR(camera_data_ptrs[camera_id]->sync_cuda_stream());
+  }
+  return true;
 }
 
 std::int64_t BEVFusionTRT::processPointCloudVoxelization(
@@ -628,7 +608,7 @@ void BEVFusionTRT::configureTensorRTInputs(std::int64_t num_voxels, std::size_t
 
 bool BEVFusionTRT::preProcess(
   const std::shared_ptr<const cuda_blackboard::CudaPointCloud2> & pc_msg_ptr,
-  const std::vector<sensor_msgs::msg::Image::ConstSharedPtr> & image_msgs,
+  const std::vector<std::unique_ptr<CameraData>> & camera_data_ptrs,
   const std::vector<float> & camera_masks, const tf2_ros::Buffer & tf_buffer,
   bool & is_num_voxels_within_range)
 {
@@ -643,7 +623,24 @@ bool BEVFusionTRT::preProcess(
   }
 
   clearDeviceMemory();
-  processImages(image_msgs, camera_masks);
+
+  // Process images if sensor fusion is enabled
+  if (config_.sensor_fusion_) {
+    // Check if image camera matrices are ready
+    if (!checkImageCameraMatricesReady(camera_data_ptrs)) {
+      RCLCPP_ERROR(
+        rclcpp::get_logger("bevfusion"),
+        "Image camera matrices are not ready. Skipping pre-processing.");
+      return false;
+    }
+
+    // Process Images
+    if (!processImages(camera_data_ptrs, camera_masks)) {
+      RCLCPP_ERROR(
+        rclcpp::get_logger("bevfusion"), "Failed to process images. Skipping detection.");
+      return false;
+    }
+  }
 
   const auto num_points = vg_ptr_->generateSweepPoints(points_d_);
   if (num_points == 0) {
@@ -668,12 +665,12 @@ bool BEVFusionTRT::preProcess(
     std::vector<uint8_t> roi_host_data(
       config_.roi_height_ * config_.roi_width_ * BEVFusionConfig::kNumRGBChannels);
     for (std::int64_t camera_id = 0; camera_id < config_.num_cameras_; camera_id++) {
-      cudaMemcpy(
+      CHECK_CUDA_ERROR(cudaMemcpy(
         roi_host_data.data(),
         &roi_tensor_d_
           [camera_id * config_.roi_height_ * config_.roi_width_ * BEVFusionConfig::kNumRGBChannels],
         config_.roi_height_ * config_.roi_width_ * BEVFusionConfig::kNumRGBChannels,
-        cudaMemcpyDeviceToHost);
+        cudaMemcpyDeviceToHost));
     }
   }
 
diff --git a/perception/autoware_bevfusion/lib/camera/camera_data.cpp b/perception/autoware_bevfusion/lib/camera/camera_data.cpp
new file mode 100644
index 0000000000000..648f1e595c430
--- /dev/null
+++ b/perception/autoware_bevfusion/lib/camera/camera_data.cpp
@@ -0,0 +1,217 @@
+// Copyright 2026 TIER IV
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "autoware/bevfusion/camera/camera_data.hpp"
+
+#include "autoware/bevfusion/bevfusion_config.hpp"
+#include "autoware/bevfusion/camera/camera_preprocess.hpp"
+
+#include <algorithm>
+#include <memory>
+
+namespace autoware::bevfusion
+{
+
+ImagePreProcessingParams::ImagePreProcessingParams(
+  const std::int64_t image_height, const std::int64_t image_width, const std::int64_t roi_height,
+  const std::int64_t roi_width, const float image_aug_scale_y, const float image_aug_scale_x,
+  const bool run_image_undistortion)
+: original_image_height(image_height),
+  original_image_width(image_width),
+  roi_height(roi_height),
+  roi_width(roi_width),
+  image_aug_scale_y(image_aug_scale_y),
+  image_aug_scale_x(image_aug_scale_x),
+  run_image_undistortion(run_image_undistortion)
+{
+  resized_height = original_image_height * image_aug_scale_y;
+  resized_width = original_image_width * image_aug_scale_x;
+  crop_height = resized_height - roi_height;
+  crop_width = std::max(
+    static_cast<std::int64_t>(0), static_cast<std::int64_t>((resized_width - roi_width) / 2));
+
+  roi_start_x = std::max(static_cast<std::int64_t>(0), crop_width);
+  roi_start_y = std::max(static_cast<std::int64_t>(0), crop_height);
+}
+
+CameraData::CameraData(
+  rclcpp::Node * node, const int camera_id,
+  const ImagePreProcessingParams & image_pre_processing_params,
+  const std::shared_ptr<CameraMatrices> & camera_matrices_ptr)
+: logger_(node->get_logger()),
+  camera_id_(camera_id),
+  image_pre_processing_params_(image_pre_processing_params),
+  camera_matrices_ptr_(camera_matrices_ptr)
+{
+  // Initialization cuda stream
+  cudaStreamCreate(&stream_);
+
+  camera_preprocess_ptr_ = std::make_unique<CameraPreprocess>(stream_, camera_id_);
+  image_buffer_d_ = autoware::cuda_utils::make_unique<std::uint8_t[]>(
+    image_pre_processing_params_.original_image_height *
+    image_pre_processing_params_.original_image_width * BEVFusionConfig::kNumRGBChannels);
+
+  if (image_pre_processing_params_.run_image_undistortion) {
+    undistorted_image_buffer_d_ = autoware::cuda_utils::make_unique<std::uint8_t[]>(
+      image_pre_processing_params_.original_image_height *
+      image_pre_processing_params_.original_image_width * BEVFusionConfig::kNumRGBChannels);
+  }
+
+  output_img_offset_ = camera_id_ * BEVFusionConfig::kNumRGBChannels *
+                       image_pre_processing_params_.roi_height *
+                       image_pre_processing_params_.roi_width;
+}
+
+CameraData::~CameraData()
+{
+  if (stream_ != nullptr) {
+    // 1. Wait for ALL pending GPU work to finish
+    cudaStreamSynchronize(stream_);
+
+    // 2. Destroy the stream handle
+    cudaStreamDestroy(stream_);
+
+    // 3. Mark it as null
+    stream_ = nullptr;
+  }
+}
+
+void CameraData::update_image_msg(
+  const sensor_msgs::msg::Image::ConstSharedPtr & input_camera_image_msg)
+{
+  // Update image message
+  image_msg_ = input_camera_image_msg;
+}
+
+void CameraData::update_camera_info(const sensor_msgs::msg::CameraInfo & camera_info_msg)
+{
+  // Update camera info message
+  camera_info_ = camera_info_msg;
+
+  // Dont need to compute matrices and undistortion if it's not enabled
+  if (!image_pre_processing_params_.run_image_undistortion) {
+    return;
+  }
+
+  // Check if undistorted map x and y are already computed
+  if (
+    camera_matrices_ptr_->undistorted_map_x_d_ != nullptr &&
+    camera_matrices_ptr_->undistorted_map_y_d_ != nullptr) {
+    return;
+  }
+
+  // Update camera matrices
+  camera_matrices_ptr_->update_camera_matrices(camera_info_msg);
+
+  // Compute undistorted map x and y
+  camera_matrices_ptr_->compute_undistorted_map_x_y();
+}
+
+bool CameraData::is_image_msg_available() const
+{
+  return image_msg_ != nullptr;
+}
+
+bool CameraData::is_camera_info_available() const
+{
+  return camera_info_.has_value();
+}
+
+std::optional<sensor_msgs::msg::CameraInfo> CameraData::camera_info() const
+{
+  return camera_info_;
+}
+
+sensor_msgs::msg::CameraInfo CameraData::camera_info_value() const
+{
+  return *(camera_info_);
+}
+
+sensor_msgs::msg::Image::ConstSharedPtr CameraData::image_msg() const
+{
+  return image_msg_;
+}
+
+std::size_t CameraData::output_img_offset() const
+{
+  return output_img_offset_;
+}
+
+bool CameraData::preprocess_image(std::uint8_t * output_img)
+{
+  // 1. Copy image from CPU to GPU
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    image_buffer_d_.get(), image_msg_->data.data(),
+    image_pre_processing_params_.original_image_height *
+      image_pre_processing_params_.original_image_width * BEVFusionConfig::kNumRGBChannels,
+    cudaMemcpyHostToDevice, stream_));
+
+  if (image_pre_processing_params_.run_image_undistortion) {
+    // 1. Check if original image shape is the same as the original image shape
+    if (
+      image_msg_->height != image_pre_processing_params_.original_image_height ||
+      image_msg_->width != image_pre_processing_params_.original_image_width) {
+      RCLCPP_ERROR(logger_, "Input image shape is not the same as the original image shape");
+      return false;
+    }
+
+    // 2. Launch remap kernel for undistortion
+    camera_preprocess_ptr_->remap_launch(
+      image_buffer_d_.get(), undistorted_image_buffer_d_.get(),
+      // Output dimensions (full resolution)
+      image_pre_processing_params_.original_image_height,
+      image_pre_processing_params_.original_image_width,
+      // Input dimensions (full resolution)
+      image_msg_->height, image_msg_->width,
+      // Undistorted map x and y
+      camera_matrices_ptr_->undistorted_map_x_d_.get(),
+      camera_matrices_ptr_->undistorted_map_y_d_.get());
+
+    // 3. Resize and extract ROI, and then saving to output_img
+    // output_img is expected to be in the size of roi_height * roi_width * image channels (usually,
+    camera_preprocess_ptr_->resizeAndExtractRoi_launch(
+      undistorted_image_buffer_d_.get(), output_img,
+      image_pre_processing_params_.original_image_height,
+      image_pre_processing_params_.original_image_width,
+      image_pre_processing_params_.resized_height, image_pre_processing_params_.resized_width,
+      image_pre_processing_params_.roi_height, image_pre_processing_params_.roi_width,
+      image_pre_processing_params_.roi_start_y, image_pre_processing_params_.roi_start_x);
+  } else {
+    // Skip the undistortion step and directly resize and extract ROI
+    camera_preprocess_ptr_->resizeAndExtractRoi_launch(
+      image_buffer_d_.get(), output_img, image_pre_processing_params_.original_image_height,
+      image_pre_processing_params_.original_image_width,
+      image_pre_processing_params_.resized_height, image_pre_processing_params_.resized_width,
+      image_pre_processing_params_.roi_height, image_pre_processing_params_.roi_width,
+      image_pre_processing_params_.roi_start_y, image_pre_processing_params_.roi_start_x);
+  }
+
+  return true;
+}
+
+bool CameraData::is_camera_matrices_ready() const
+{
+  if (!image_pre_processing_params_.run_image_undistortion) {
+    return true;
+  } else {
+    return camera_matrices_ptr_->matrices_ready;
+  }
+}
+
+cudaError_t CameraData::sync_cuda_stream()
+{
+  return cudaStreamSynchronize(stream_);
+}
+
+}  // namespace autoware::bevfusion
diff --git a/perception/autoware_bevfusion/lib/camera/camera_matrices.cpp b/perception/autoware_bevfusion/lib/camera/camera_matrices.cpp
new file mode 100644
index 0000000000000..1fde831a80f50
--- /dev/null
+++ b/perception/autoware_bevfusion/lib/camera/camera_matrices.cpp
@@ -0,0 +1,81 @@
+// Copyright 2026 TIER IV
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "autoware/bevfusion/camera/camera_matrices.hpp"
+
+#include <opencv2/opencv.hpp>
+
+namespace autoware::bevfusion
+{
+
+CameraMatrices::CameraMatrices()
+{
+  K = cv::Mat::zeros(3, 3, CV_64F);
+  D = cv::Mat::zeros(1, 5, CV_64F);
+  P = cv::Mat::zeros(3, 4, CV_64F);
+  map_width = 0;
+  map_height = 0;
+
+  matrices_ready = false;
+}
+
+void CameraMatrices::update_camera_matrices(const sensor_msgs::msg::CameraInfo & camera_info)
+{
+  // Create camera matrix K from camera_info
+  K =
+    (cv::Mat_<double>(3, 3) << camera_info.k[0], camera_info.k[1], camera_info.k[2],
+     camera_info.k[3], camera_info.k[4], camera_info.k[5], camera_info.k[6], camera_info.k[7],
+     camera_info.k[8]);
+
+  // Create distortion coefficients matrix D from camera_info
+  const auto & d_vec = camera_info.d;
+  D = cv::Mat(1, static_cast<int>(d_vec.size()), CV_64F);
+  for (size_t i = 0; i < d_vec.size(); ++i) {
+    D.at<double>(0, static_cast<int>(i)) = d_vec[i];
+  }
+
+  // Create projection matrix P from camera_info (first 3x3 part)
+  P =
+    (cv::Mat_<double>(3, 3) << camera_info.p[0], camera_info.p[1], camera_info.p[2],
+     camera_info.p[4], camera_info.p[5], camera_info.p[6], camera_info.p[8], camera_info.p[9],
+     camera_info.p[10]);
+
+  // Use full resolution for both input and output (no downsampling)
+  map_width = camera_info.width;
+  map_height = camera_info.height;
+}
+
+void CameraMatrices::compute_undistorted_map_x_y()
+{
+  undistorted_map_x_d_ = autoware::cuda_utils::make_unique<float[]>(map_width * map_height);
+  undistorted_map_y_d_ = autoware::cuda_utils::make_unique<float[]>(map_width * map_height);
+
+  // Compute undistortion maps for full resolution
+  cv::Mat undistort_map_x, undistort_map_y;
+  cv::initUndistortRectifyMap(
+    K, D, cv::Mat(), P, cv::Size(map_width, map_height), CV_32FC1, undistort_map_x,
+    undistort_map_y);
+
+  // Always sync the data to the GPU since it only runs once for every camera
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    undistorted_map_x_d_.get(), undistort_map_x.data, map_width * map_height * sizeof(float),
+    cudaMemcpyHostToDevice));
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    undistorted_map_y_d_.get(), undistort_map_y.data, map_width * map_height * sizeof(float),
+    cudaMemcpyHostToDevice));
+
+  matrices_ready = true;
+}
+
+}  // namespace autoware::bevfusion
diff --git a/perception/autoware_bevfusion/lib/camera/camera_preprocess_kernel.cu b/perception/autoware_bevfusion/lib/camera/camera_preprocess_kernel.cu
new file mode 100644
index 0000000000000..f416fd5b2c477
--- /dev/null
+++ b/perception/autoware_bevfusion/lib/camera/camera_preprocess_kernel.cu
@@ -0,0 +1,268 @@
+// Copyright 2025 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+ * All rights reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "autoware/bevfusion/camera/camera_preprocess.hpp"
+
+#include <autoware/cuda_utils/cuda_check_error.hpp>
+
+#include <cstddef>
+#include <cstdint>
+
+namespace autoware::bevfusion
+{
+
+CameraPreprocess::CameraPreprocess(cudaStream_t stream, const int camera_id)
+: stream_(stream), camera_id_(camera_id)
+{
+}
+
+__global__ void remap_kernel(
+  const std::uint8_t * __restrict__ input_img, std::uint8_t * __restrict__ output_img,
+  int output_height, int output_width,  // Output (destination) image dimensions
+  int input_height, int input_width,    // Input (source) image dimensions
+  const float * __restrict__ map_x, const float * __restrict__ map_y)
+{
+  // Calculate the global thread indices for output image
+  int x = blockIdx.x * blockDim.x + threadIdx.x;  // Width index in output
+  int y = blockIdx.y * blockDim.y + threadIdx.y;  // Height index in output
+
+  // Check if the thread corresponds to a valid pixel in output
+  if (x >= output_width || y >= output_height) return;
+
+  // Get the mapping coordinates for this output pixel.
+  // Skip the first y rows with (y * output_width) and then skip the first x columns by adding x
+  int map_idx = y * output_width + x;
+  float src_x = map_x[map_idx];
+  float src_y = map_y[map_idx];
+
+  // Check if the mapped coordinates are valid in the input image
+  // Skip the first y rows with (y * output_width) and then skip the first x columns by adding x.
+  // Then skip the first 3 channels by multiplying by 3
+  int out_idx = (y * output_width + x) * 3;
+  if (src_x < 0 || src_y < 0 || src_x >= input_width || src_y >= input_height) {
+    // Set to black for out-of-bounds pixels
+    output_img[out_idx] = 0;
+    output_img[out_idx + 1] = 0;
+    output_img[out_idx + 2] = 0;
+    return;
+  }
+
+  // Bilinear interpolation in unit square (0,0), (0,1), (1,0), (1,1)
+  int x0 = static_cast<int>(floorf(src_x));
+  int y0 = static_cast<int>(floorf(src_y));
+  int x1 = x0 + 1;
+  int y1 = y0 + 1;
+
+  float dx = src_x - x0;
+  float dy = src_y - y0;
+
+  // Calculate interpolation weights
+  float w00 = (1.0f - dx) * (1.0f - dy);
+  float w01 = (1.0f - dx) * dy;
+  float w10 = dx * (1.0f - dy);
+  float w11 = dx * dy;
+
+// Process each color channel
+#pragma unroll
+  for (int c = 0; c < 3; ++c) {
+    float v00 = 0.0f, v01 = 0.0f, v10 = 0.0f, v11 = 0.0f;
+
+    // Get pixel values with boundary checks
+    if (x0 >= 0 && x0 < input_width && y0 >= 0 && y0 < input_height)
+      v00 = static_cast<float>(input_img[(y0 * input_width + x0) * 3 + c]);
+    if (x0 >= 0 && x0 < input_width && y1 >= 0 && y1 < input_height)
+      v01 = static_cast<float>(input_img[(y1 * input_width + x0) * 3 + c]);
+    if (x1 >= 0 && x1 < input_width && y0 >= 0 && y0 < input_height)
+      v10 = static_cast<float>(input_img[(y0 * input_width + x1) * 3 + c]);
+    if (x1 >= 0 && x1 < input_width && y1 >= 0 && y1 < input_height)
+      v11 = static_cast<float>(input_img[(y1 * input_width + x1) * 3 + c]);
+
+    // Interpolate and store the result
+    float interpolated_value = w00 * v00 + w01 * v01 + w10 * v10 + w11 * v11;
+    // Clamp the interpolated value to [0, 255] range
+    interpolated_value = fmaxf(0.0f, fminf(255.0f, interpolated_value));
+    output_img[out_idx + c] = static_cast<std::uint8_t>(interpolated_value);
+  }
+}
+
+// -------------------------------------------------------------------------
+// Fused Kernel: Anti-Aliased Resize -> Crop -> Normalize -> CHW Layout
+// -------------------------------------------------------------------------
+// This kernel mimics PIL's resize (Bilinear/Triangle filter with adaptive support).
+// For downscaling, it expands the kernel window to cover all contributing pixels
+// (Anti-aliasing). For upscaling, it acts as standard bilinear interpolation.
+__global__ void resizeAndExtractRoi_kernel(
+  const std::uint8_t * __restrict__ input_img, std::uint8_t * __restrict__ output_img, int in_h,
+  int in_w,                         // Input dimensions
+  int resize_h, int resize_w,       // Target resize dimensions (before cropping)
+  int roi_h, int roi_w,             // Output ROI dimensions
+  int roi_y_start, int roi_x_start  // Top-left of ROI in the resized coordinate space
+)
+{
+  // 1. Calculate thread target pixel in the ROI (Output Image)
+  int out_x = blockIdx.x * blockDim.x + threadIdx.x;
+  int out_y = blockIdx.y * blockDim.y + threadIdx.y;
+
+  if (out_x >= roi_w || out_y >= roi_h) return;
+
+  // 2. Map ROI pixel to the virtual Resized Image coordinates
+  //    (The image that *would* exist if we resized the whole thing)
+  float resized_x = static_cast<float>(out_x + roi_x_start);
+  float resized_y = static_cast<float>(out_y + roi_y_start);
+
+  // 3. Calculate Scale Factors (Input / Resized)
+  //    scale > 1.0 means downsampling
+  float scale_x = static_cast<float>(in_w) / static_cast<float>(resize_w);
+  float scale_y = static_cast<float>(in_h) / static_cast<float>(resize_h);
+
+  // 4. Map to Input Image Center Point
+  //    Standard alignment: (x + 0.5) * scale - 0.5
+  float center_x = (resized_x + 0.5f) * scale_x - 0.5f;
+  float center_y = (resized_y + 0.5f) * scale_y - 0.5f;
+
+  // 5. Determine Support Size (Kernel Radius)
+  //    For downsampling, support scales with the downsampling factor (Anti-aliasing).
+  //    For upsampling (scale < 1), support is 1.0 (Standard Bilinear).
+  float support_x = (scale_x > 1.0f) ? scale_x : 1.0f;
+  float support_y = (scale_y > 1.0f) ? scale_y : 1.0f;
+
+  // 6. Define Sampling Bounds in Input
+  //    We loop over pixels within [center - support, center + support]
+  int x_min = static_cast<int>(ceilf(center_x - support_x));
+  int x_max = static_cast<int>(floorf(center_x + support_x));
+  int y_min = static_cast<int>(ceilf(center_y - support_y));
+  int y_max = static_cast<int>(floorf(center_y + support_y));
+
+  // Clamp to input image boundaries
+  x_min = max(0, x_min);
+  x_max = min(in_w - 1, x_max);
+  y_min = max(0, y_min);
+  y_max = min(in_h - 1, y_max);
+
+  // Accumulators
+  //    float sum_r = 0.0f;
+  //    float sum_g = 0.0f;
+  //    float sum_b = 0.0f;
+  float sum_first_channel = 0.0f;
+  float sum_second_channel = 0.0f;
+  float sum_third_channel = 0.0f;
+  float sum_weight = 0.0f;
+
+  // 7. Convolution Loop (Triangle/Bilinear Filter)
+  // TODO(KokSeang): If necessary, optimize with im2col, separate horizontal and vertical
+  // filtering, etc.
+  for (int y = y_min; y <= y_max; ++y) {
+    // Y-weight: Triangle filter (1 - distance / support)
+    float dy = (center_y - y);
+    float wy = max(0.0f, 1.0f - abs(dy) / support_y);
+
+    // Optimization: Pre-calculate Y-offset index
+    int row_offset = y * in_w;
+
+    for (int x = x_min; x <= x_max; ++x) {
+      // X-weight
+      float dx = (center_x - x);
+      float wx = max(0.0f, 1.0f - abs(dx) / support_x);
+
+      // Combined weight
+      float w = wx * wy;
+
+      // Skip negligible weights
+      if (w > 0.0f) {
+        // To save the image in CHW format, the index is calculated as (row_offset + x) * 3.
+        int idx = (row_offset + x) * 3;
+        sum_first_channel += input_img[idx] * w;
+        sum_second_channel += input_img[idx + 1] * w;
+        sum_third_channel += input_img[idx + 2] * w;
+        sum_weight += w;
+      }
+    }
+  }
+
+  // 8. Normalize Weights and Write Output
+  //    Avoid division by zero if weight sum is tiny (shouldn't happen inside valid ROI)
+  if (sum_weight > 0.0f) {
+    sum_first_channel /= sum_weight;
+    sum_second_channel /= sum_weight;
+    sum_third_channel /= sum_weight;
+  }
+
+  // 9. Normalize (Mean/Std) and Write to Output (Planar CHW)
+  //    Output layout: [Batch/Camera, Channel, Height, Width]
+  int area = roi_h * roi_w;
+  int out_idx = out_y * roi_w + out_x;
+
+  // Channel 0
+  output_img[(0 * area + out_idx)] = sum_first_channel;
+  // Channel 1
+  output_img[(1 * area + out_idx)] = sum_second_channel;
+  // Channel 2
+  output_img[(2 * area + out_idx)] = sum_third_channel;
+}
+
+cudaError_t CameraPreprocess::resizeAndExtractRoi_launch(
+  const std::uint8_t * input_img, std::uint8_t * output_img, int H,
+  int W,                    // Original image dimensions
+  int H2, int W2,           // Resized image dimensions
+  int H3, int W3,           // ROI dimensions
+  int y_start, int x_start  // ROI top-left coordinates in resized image
+)
+{
+  // Define the block and grid dimensions
+  dim3 threads(16, 16);
+  dim3 blocks(divup(W3, threads.x), divup(H3, threads.y));
+
+  // Launch the kernel
+  resizeAndExtractRoi_kernel<<<blocks, threads, 0, stream_>>>(
+    input_img, output_img, H, W, H2, W2, H3, W3, y_start, x_start);
+
+  // Check for errors
+  return cudaGetLastError();
+}
+
+cudaError_t CameraPreprocess::remap_launch(
+  const std::uint8_t * input_img, std::uint8_t * output_img, int output_height,
+  int output_width,                   // Output (destination) image dimensions
+  int input_height, int input_width,  // Input (source) image dimensions
+  const float * map_x, const float * map_y)
+{
+  // Define the block and grid dimensions based on output size
+  dim3 threads(16, 16);
+  dim3 blocks(divup(output_width, threads.x), divup(output_height, threads.y));
+
+  // Launch the kernel
+  remap_kernel<<<blocks, threads, 0, stream_>>>(
+    input_img, output_img, output_height, output_width, input_height, input_width, map_x, map_y);
+
+  // Check for errors
+  return cudaGetLastError();
+}
+
+}  // namespace autoware::bevfusion
diff --git a/perception/autoware_bevfusion/lib/preprocess/preprocess_kernel.cu b/perception/autoware_bevfusion/lib/preprocess/preprocess_kernel.cu
index 7686b4aaa98ed..78f4458a73366 100644
--- a/perception/autoware_bevfusion/lib/preprocess/preprocess_kernel.cu
+++ b/perception/autoware_bevfusion/lib/preprocess/preprocess_kernel.cu
@@ -153,87 +153,4 @@ std::size_t PreprocessCuda::generateVoxels(
   return real_num_voxels;
 }
 
-__global__ void resizeAndExtractRoi_kernel(
-  const std::uint8_t * __restrict__ input_img, std::uint8_t * __restrict__ output_img, int H,
-  int W,                     // Original image dimensions (Height, Width)
-  int H2, int W2,            // Resized image dimensions (Height, Width)
-  int H3, int W3,            // ROI dimensions (Height, Width)
-  int y_start, int x_start)  // ROI top-left coordinates in resized image
-{
-  // Calculate the global thread indices
-  int i = blockIdx.x * blockDim.x + threadIdx.x;  // Width index in output (ROI) image
-  int j = blockIdx.y * blockDim.y + threadIdx.y;  // Height index in output (ROI) image
-
-  // Check if the thread corresponds to a valid pixel in the ROI
-  if (i >= W3 || j >= H3) return;
-
-  // Compute scaling factors from original to resized image
-  auto scale_y = static_cast<float>(H) / H2;
-  auto scale_x = static_cast<float>(W) / W2;
-
-  // Map output pixel (i, j) in ROI to position in resized image
-  int i_resized = i + x_start;
-  int j_resized = j + y_start;
-
-  // Map position in resized image back to position in original image
-  float i_original = (i_resized + 0.5f) * scale_x - 0.5f;
-  float j_original = (j_resized + 0.5f) * scale_y - 0.5f;
-
-  // Compute coordinates for bilinear interpolation
-  auto i0 = static_cast<int>(floorf(i_original));
-  auto j0 = static_cast<int>(floorf(j_original));
-  int i1 = i0 + 1;
-  int j1 = j0 + 1;
-
-  // Compute interpolation weights
-  float di = i_original - i0;
-  float dj = j_original - j0;
-
-  float w00 = (1.0f - di) * (1.0f - dj);
-  float w01 = (1.0f - di) * dj;
-  float w10 = di * (1.0f - dj);
-  float w11 = di * dj;
-
-  // Loop over the three color channels
-  for (int c = 0; c < 3; ++c) {
-    float v00 = 0.0f, v01 = 0.0f, v10 = 0.0f, v11 = 0.0f;
-
-    // Boundary checks for each neighboring pixel
-    if (i0 >= 0 && i0 < W && j0 >= 0 && j0 < H)
-      v00 = static_cast<float>(input_img[(j0 * W + i0) * 3 + c]);
-    if (i0 >= 0 && i0 < W && j1 >= 0 && j1 < H)
-      v01 = static_cast<float>(input_img[(j1 * W + i0) * 3 + c]);
-    if (i1 >= 0 && i1 < W && j0 >= 0 && j0 < H)
-      v10 = static_cast<float>(input_img[(j0 * W + i1) * 3 + c]);
-    if (i1 >= 0 && i1 < W && j1 >= 0 && j1 < H)
-      v11 = static_cast<float>(input_img[(j1 * W + i1) * 3 + c]);
-
-    // Compute the interpolated pixel value
-    float value = w00 * v00 + w01 * v01 + w10 * v10 + w11 * v11;
-
-    // Store the result in the output ROI image
-    output_img[(j * W3 + i) * 3 + c] = static_cast<std::uint8_t>(value);
-  }
-}
-
-cudaError_t PreprocessCuda::resizeAndExtractRoi_launch(
-  const std::uint8_t * input_img, std::uint8_t * output_img, int H,
-  int W,                     // Original image dimensions
-  int H2, int W2,            // Resized image dimensions
-  int H3, int W3,            // ROI dimensions
-  int y_start, int x_start,  // ROI top-left coordinates in resized image
-  cudaStream_t stream)
-{
-  // Define the block and grid dimensions
-  dim3 threads(16, 16);
-  dim3 blocks(divup(W3, threads.x), divup(H3, threads.y));
-
-  // Launch the kernel
-  resizeAndExtractRoi_kernel<<<blocks, threads, 0, stream>>>(
-    input_img, output_img, H, W, H2, W2, H3, W3, y_start, x_start);
-
-  // Check for errors
-  return cudaGetLastError();
-}
-
 }  // namespace autoware::bevfusion
diff --git a/perception/autoware_bevfusion/schema/bevfusion.schema.json b/perception/autoware_bevfusion/schema/bevfusion.schema.json
index 1e1bb89d1ba48..953922c48b3c9 100644
--- a/perception/autoware_bevfusion/schema/bevfusion.schema.json
+++ b/perception/autoware_bevfusion/schema/bevfusion.schema.json
@@ -70,6 +70,11 @@
           "description": "A name of frame id where world coordinates system is defined with respect to.",
           "default": "map"
         },
+        "run_image_undistortion": {
+          "type": "boolean",
+          "description": "A flag to run image undistortion.",
+          "default": true
+        },
         "circle_nms_dist_threshold": {
           "type": "number",
           "description": "A distance threshold between detections in NMS.",
diff --git a/perception/autoware_bevfusion/src/bevfusion_node.cpp b/perception/autoware_bevfusion/src/bevfusion_node.cpp
index 4b60b0926b7d5..4574bbda39efa 100644
--- a/perception/autoware_bevfusion/src/bevfusion_node.cpp
+++ b/perception/autoware_bevfusion/src/bevfusion_node.cpp
@@ -127,8 +127,11 @@ BEVFusionNode::BEVFusionNode(const rclcpp::NodeOptions & options)
     circle_nms_dist_threshold, yaw_norm_thresholds, score_threshold, use_intensity);
 
   sensor_fusion_ = config.sensor_fusion_;
+
   use_compressed_images_ =
     this->declare_parameter<bool>("use_compressed_images", false, descriptor);
+  const auto run_image_undistortion =
+    this->declare_parameter<bool>("run_image_undistortion", descriptor);
 
   DensificationParam densification_param(
     densification_world_frame_id, densification_num_past_frames);
@@ -144,6 +147,19 @@ BEVFusionNode::BEVFusionNode(const rclcpp::NodeOptions & options)
           image_backbone_engine_path, 1ULL << 32U)
       }
       : TrtBEVFusionConfig{trt_main_config, std::nullopt};
+
+  // Build Image Preprocessing Parameters
+  // TODO(KokSeang): Remove image preprocessing parameters out of BEVFusionConfig
+  auto image_pre_processing_params = ImagePreProcessingParams(
+        raw_image_height,
+        raw_image_width,
+        roi_height,
+        roi_width,
+        img_aug_scale_y,
+        img_aug_scale_x,
+        run_image_undistortion
+    );
+
   // clang-format on
   detector_ptr_ = std::make_unique<BEVFusionTRT>(trt_bevfusion_config, densification_param, config);
   diagnostics_detector_trt_ =
@@ -157,7 +173,7 @@ BEVFusionNode::BEVFusionNode(const rclcpp::NodeOptions & options)
   objects_pub_ = this->create_publisher<autoware_perception_msgs::msg::DetectedObjects>(
     "~/output/objects", rclcpp::QoS(1));
 
-  initializeSensorFusionSubscribers(config.num_cameras_);
+  initializeSensorFusionSubscribers(config.num_cameras_, image_pre_processing_params);
 
   published_time_pub_ = std::make_unique<autoware_utils::PublishedTimePublisher>(this);
 
@@ -202,7 +218,7 @@ void BEVFusionNode::cloudCallback(
   std::unordered_map<std::string, double> proc_timing;
   bool is_num_voxels_within_range = true;
   const bool is_success = detector_ptr_->detect(
-    pc_msg_ptr, image_msgs_, camera_masks_, tf_buffer_, det_boxes3d, proc_timing,
+    pc_msg_ptr, camera_data_ptrs_, camera_masks_, tf_buffer_, det_boxes3d, proc_timing,
     is_num_voxels_within_range);
 
   if (!is_success) {
@@ -243,25 +259,24 @@ void BEVFusionNode::cloudCallback(
 void BEVFusionNode::imageCallback(
   const sensor_msgs::msg::Image::ConstSharedPtr msg, std::size_t camera_id)
 {
-  image_msgs_[camera_id] = msg;
+  camera_data_ptrs_[camera_id]->update_image_msg(msg);
 
   std::size_t num_valid_images = std::count_if(
-    image_msgs_.begin(), image_msgs_.end(),
-    [](const auto & image_msg) { return image_msg != nullptr; });
+    camera_data_ptrs_.begin(), camera_data_ptrs_.end(),
+    [](const auto & camera_data) { return camera_data->is_image_msg_available(); });
 
-  images_available_ = num_valid_images == image_msgs_.size();
+  images_available_ = num_valid_images == camera_data_ptrs_.size();
 }
 
 void BEVFusionNode::cameraInfoCallback(
   const sensor_msgs::msg::CameraInfo & msg, std::size_t camera_id)
 {
-  camera_info_msgs_[camera_id] = msg;
-
+  camera_data_ptrs_[camera_id]->update_camera_info(msg);
   std::size_t num_valid_intrinsics = std::count_if(
-    camera_info_msgs_.begin(), camera_info_msgs_.end(),
-    [](const auto & opt) { return opt.has_value(); });
+    camera_data_ptrs_.begin(), camera_data_ptrs_.end(),
+    [](const auto & camera_data) { return camera_data->is_camera_info_available(); });
 
-  intrinsics_available_ = num_valid_intrinsics == camera_info_msgs_.size();
+  intrinsics_available_ = num_valid_intrinsics == camera_data_ptrs_.size();
 
   if (
     lidar2camera_extrinsics_[camera_id].has_value() || !lidar_frame_.has_value() ||
diff --git a/perception/autoware_bevfusion/src/bevfusion_node_utils.cpp b/perception/autoware_bevfusion/src/bevfusion_node_utils.cpp
index 884c1c0b18f65..6a2c702290f72 100644
--- a/perception/autoware_bevfusion/src/bevfusion_node_utils.cpp
+++ b/perception/autoware_bevfusion/src/bevfusion_node_utils.cpp
@@ -41,7 +41,8 @@ void BEVFusionNode::validateParameters(
   }
 }
 
-void BEVFusionNode::initializeSensorFusionSubscribers(std::int64_t num_cameras)
+void BEVFusionNode::initializeSensorFusionSubscribers(
+  std::int64_t num_cameras, const ImagePreProcessingParams & image_pre_processing_params)
 {
   if (!sensor_fusion_) {
     return;
@@ -49,9 +50,9 @@ void BEVFusionNode::initializeSensorFusionSubscribers(std::int64_t num_cameras)
 
   image_subs_.resize(num_cameras);
   camera_info_subs_.resize(num_cameras);
-  image_msgs_.resize(num_cameras);
-  camera_info_msgs_.resize(num_cameras);
   lidar2camera_extrinsics_.resize(num_cameras);
+  camera_data_ptrs_.resize(num_cameras);
+  camera_matrices_ptrs_.resize(num_cameras);
 
   auto resolve_topic_name = [this](const std::string & query) {
     return this->get_node_topics_interface()->resolve_topic_name(query);
@@ -59,6 +60,15 @@ void BEVFusionNode::initializeSensorFusionSubscribers(std::int64_t num_cameras)
   const std::string transport = use_compressed_images_ ? "compressed" : "raw";
 
   for (std::int64_t camera_id = 0; camera_id < num_cameras; ++camera_id) {
+    // First construct CameraMatrices, CameraMatrices is shared by multiple CameraData for the same
+    // camera_id
+    camera_matrices_ptrs_[camera_id] = std::make_shared<CameraMatrices>();
+
+    // Then construct CameraData
+    camera_data_ptrs_[camera_id] = std::make_unique<CameraData>(
+      this, static_cast<int>(camera_id), image_pre_processing_params,
+      camera_matrices_ptrs_[camera_id]);
+
     // Explicitly resolve the topic name using the node name and namespace, please check
     // https://github.com/ros-perception/image_transport_plugins/issues/155
     const std::string base_topic = resolve_topic_name("~/input/image" + std::to_string(camera_id));
@@ -111,8 +121,8 @@ void BEVFusionNode::precomputeIntrinsicsExtrinsics()
   std::vector<Matrix4f> lidar2camera_extrinsics;
 
   std::transform(
-    camera_info_msgs_.begin(), camera_info_msgs_.end(), std::back_inserter(camera_info_msgs),
-    [](const auto & opt) { return *opt; });
+    camera_data_ptrs_.begin(), camera_data_ptrs_.end(), std::back_inserter(camera_info_msgs),
+    [](const auto & camera_data) { return camera_data->camera_info_value(); });
 
   std::transform(
     lidar2camera_extrinsics_.begin(), lidar2camera_extrinsics_.end(),
@@ -124,12 +134,11 @@ void BEVFusionNode::precomputeIntrinsicsExtrinsics()
 
 void BEVFusionNode::computeCameraMasks(double lidar_stamp)
 {
-  camera_masks_.resize(camera_info_msgs_.size());
+  camera_masks_.resize(camera_data_ptrs_.size());
   for (std::size_t i = 0; i < camera_masks_.size(); ++i) {
+    auto camera_mask_timestamp = rclcpp::Time(camera_data_ptrs_[i]->image_msg()->header.stamp);
     camera_masks_[i] =
-      (lidar_stamp - rclcpp::Time(image_msgs_[i]->header.stamp).seconds()) < max_camera_lidar_delay_
-        ? 1.0
-        : 0.f;
+      (lidar_stamp - camera_mask_timestamp.seconds()) < max_camera_lidar_delay_ ? 1.0 : 0.f;
   }
 }