mpj1234
diff --git a/‎.gitignore‎
Lines changed: 7 additions & 0 deletions b/‎.gitignore‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎CMakeLists-win.txt‎
Lines changed: 88 additions & 0 deletions b/‎CMakeLists-win.txt‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 47 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 68 additions & 0 deletions b/‎README.md‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎gen_wts.py‎
Lines changed: 56 additions & 0 deletions b/‎gen_wts.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎images/bus.jpg‎
134 KB b/‎images/bus.jpg‎
134 KB
diff --git a/‎images/cat.jpg‎
64 KB b/‎images/cat.jpg‎
64 KB
diff --git a/‎images/dog.jpg‎
51.6 KB b/‎images/dog.jpg‎
51.6 KB
diff --git a/‎images/zidane.jpg‎
49.2 KB b/‎images/zidane.jpg‎
49.2 KB
diff --git a/‎include/block.h‎
Lines changed: 47 additions & 0 deletions b/‎include/block.h‎
Lines changed: 47 additions & 0 deletions
@@ -30,3 +30,10 @@
 *.exe
 *.out
 *.app
+
+.idea/
+models/
+cmake-build-debug/
+cmake-build-release/
+build/
+output/
@@ -0,0 +1,88 @@
+cmake_minimum_required(VERSION 3.28)
+project(yolov10_trtx_v10)
+
+set(CMAKE_CXX_STANDARD 17)
+# 设置nvcc编译cu文件时候使用utf-8编码
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /utf-8")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")
+
+enable_language(CUDA)
+
+# 设置cuda多个框架支持
+set(CMAKE_CUDA_ARCHITECTURES 75 86 89)
+message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
+
+# OpenCV
+set(OpenCV_DIR E:\\Opencv\\install\\opencv-4.8.0\\build)
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+link_directories(${OpenCV_LIB_DIR})
+
+# CUDA
+set(CUDA_TOOLKIT_ROOT_DIR C:\\Program\ Files\\NVIDIA\ GPU\ Computing\ Toolkit\\CUDA\\v11.8)
+include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)
+link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/x64)
+
+# TensorRT
+#set(TENSORRT_ROOT E:\\TensorRT\\TensorRT-8.6.1.6)
+set(TENSORRT_ROOT E:\\TensorRT\\TensorRT-10.2.0.19)
+include_directories(${TENSORRT_ROOT}/include)
+link_directories(${TENSORRT_ROOT}/lib)
+
+# 判断TENSORRT_ROOT路径中的version如果路径中第一个.前大于8
+# 获取所有版本文件
+file(GLOB TENSORRT_VERSION_FILES "${TENSORRT_ROOT}/include/NvInferVersion.h")
+# 读取版本文件
+file(STRINGS ${TENSORRT_VERSION_FILES} TENSORRT_VERSION_LINES
+        LIMIT_COUNT 1  # 只读取第一行
+        REGEX "#define NV_TENSORRT_MAJOR [0-9]+"  # 匹配版本号定义行
+)
+message(STATUS "  TENSORRT_VERSION_LINES: ${TENSORRT_VERSION_LINES}")
+# 解析版本号
+string(REGEX REPLACE "#define NV_TENSORRT_MAJOR ([0-9]+)" "\\1" TENSORRT_VERSION_MAJOR ${TENSORRT_VERSION_LINES})
+message(STATUS "  TENSORRT_VERSION_MAJOR: ${TENSORRT_VERSION_MAJOR}")
+# 判断版本号是否大于等于10
+if (TENSORRT_VERSION_MAJOR GREATER_EQUAL 10)
+    message(STATUS "  TensorRT version is greater than or equal to 10.")
+    link_libraries(
+            opencv_core
+            opencv_highgui
+            opencv_imgproc
+            opencv_imgcodecs
+            cudart
+            cublas
+            nvinfer_10
+    )
+else ()
+    message(STATUS "  TensorRT version is less than 10.")
+    link_libraries(
+            opencv_core
+            opencv_highgui
+            opencv_imgproc
+            opencv_imgcodecs
+            cudart
+            cublas
+            nvinfer
+    )
+endif ()
+
+include_directories(${CMAKE_SOURCE_DIR}/include)
+include_directories(${CMAKE_SOURCE_DIR}/plugin)
+include_directories(${CMAKE_SOURCE_DIR}/src)
+link_directories(${CMAKE_SOURCE_DIR}/lib)
+
+add_definitions(-DNOMINMAX)
+
+add_definitions(-DAPI_EXPORTS)
+
+file(GLOB_RECURSE SRCS ${CMAKE_SOURCE_DIR}/src/*.cpp ${CMAKE_SOURCE_DIR}/src/*.cu)
+file(GLOB_RECURSE PLUGIN_SRCS ${PROJECT_SOURCE_DIR}/plugin/*.cu)
+
+add_library(myplugins SHARED ${PLUGIN_SRCS})
+target_link_libraries(myplugins nvinfer_10 cudart)
+
+add_executable(yolov10_det yolov10_det.cpp ${SRCS})
+target_link_libraries(yolov10_det nvinfer_10)
+target_link_libraries(yolov10_det myplugins)
+target_link_libraries(yolov10_det cudart)
+target_link_libraries(yolov10_det ${OpenCV_LIBS})
@@ -0,0 +1,47 @@
+cmake_minimum_required(VERSION 3.10)
+
+project(yolov10)
+
+add_definitions(-std=c++11)
+add_definitions(-DAPI_EXPORTS)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Debug)
+
+set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
+enable_language(CUDA)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+include_directories(${PROJECT_SOURCE_DIR}/plugin)
+
+# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+  message("embed_platform on")
+  include_directories(/usr/local/cuda/targets/aarch64-linux/include)
+  link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
+else()
+  message("embed_platform off")
+
+  # cuda
+  include_directories(/usr/local/cuda/include)
+  link_directories(/usr/local/cuda/lib64)
+
+  # tensorrt
+  include_directories(/workspace/shared/TensorRT-10.2.0.19/include/)
+  link_directories(/workspace/shared/TensorRT-10.2.0.19/lib/)
+
+  # include_directories(/home/lindsay/TensorRT-7.2.3.4/include)
+  # link_directories(/home/lindsay/TensorRT-7.2.3.4/lib)
+endif()
+
+add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/plugin/yololayer.cu)
+target_link_libraries(myplugins nvinfer cudart)
+
+find_package(OpenCV)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu)
+add_executable(yolov10_det ${PROJECT_SOURCE_DIR}/yolov10_det.cpp ${SRCS})
+target_link_libraries(yolov10_det nvinfer)
+target_link_libraries(yolov10_det cudart)
+target_link_libraries(yolov10_det myplugins)
+target_link_libraries(yolov10_det ${OpenCV_LIBS})
@@ -1,2 +1,70 @@
 # YOLOv10-TensorRT10
 YOLOv10 series model supports the latest TensorRT10.
+## Introduce
+
+Yolov10 model supports TensorRT-10.
+
+## Environment
+
+CUDA: 11.8
+CUDNN: 8.9.1.23
+TensorRT: TensorRT-10.2.0.19
+
+## Support
+
+* [x] YOLOv10-det support FP32/FP16/INT8 and Python/C++ API
+
+## Config
+
+* Choose the YOLOv10 sub-model n/s/m/b/l/x from command line arguments.
+* Other configs please check [src/config.h](src/config.h)
+
+## Build and Run
+
+1. generate .wts from pytorch with .pt, or download .wts from model zoo
+
+```shell
+git clone https://github.com/THU-MIG/yolov10.git
+cd yolov10/
+wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10n.pt
+
+git clone https://github.com/mpj1234/YOLOv10-TensorRT10.git
+cp [PATH-TO-YOLOv10-TensorRT10]/gen_wts.py [YOLOv10]/.
+
+python gen_wts.py -w yolov10n.pt -o yolov10n.wts
+# A file 'yolov10n.wts' will be generated.
+```
+
+2. build YOLOv10-TensorRT10 and run
+
+#### Detection
+
+```shell
+cd [PATH-TO-YOLOv10-TensorRT10]/
+# Update kNumClass in src/config.h if your model is trained on custom dataset
+mkdir build
+cd build
+cp [PATH-TO-yolov10]/yolov10n.wts .
+cmake ..
+make
+
+# Build and serialize TensorRT engine
+./yolov10_det -s yolov10n.wts yolov10n.engine [n/s/m/b/l/x]
+
+# Run inference
+./yolov10_det -d yolov10n.engine ../images
+# The results are displayed in the console
+```
+
+3. Optional, load and run the tensorrt model in Python
+```shell
+// Install python-tensorrt, pycuda, etc.
+// Ensure the yolov10n.engine
+python yolov10_det_trt.py ./build/yolov10n.engine ./build/libmyplugins.so
+```
+
+## INT8 Quantization
+1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [GoogleDrive](https://drive.google.com/drive/folders/1s7jE9DtOngZMzJC1uL307J2MiaGwdRSI?usp=sharing) or [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh
+2. unzip it in yolov10/build
+3. set the macro `USE_INT8` in src/config.h and make again
+4. serialize the model and test
@@ -0,0 +1,56 @@
+# -*- coding: UTF-8 -*-
+"""
+  @Author: mpj
+  @Date  : 2024/7/22 下午9:17
+  @version V1.0
+"""
+import sys  # noqa: F401
+import argparse
+import os
+import struct
+import torch
+
+
+def parse_args():
+	parser = argparse.ArgumentParser(description='Convert .pt file to .wts')
+	parser.add_argument('-w', '--weights', default='./weights/yolov10n.pt',
+	                    help='Input weights (.pt) file path (required)')
+	parser.add_argument(
+		'-o', '--output', help='Output (.wts) file path (optional)')
+	args = parser.parse_args()
+	if not os.path.isfile(args.weights):
+		raise SystemExit('Invalid input file')
+	if not args.output:
+		args.output = os.path.splitext(args.weights)[0] + '.wts'
+	elif os.path.isdir(args.output):
+		args.output = os.path.join(
+			args.output,
+			os.path.splitext(os.path.basename(args.weights))[0] + '.wts')
+	return args.weights, args.output
+
+
+pt_file, wts_file = parse_args()
+
+# Load model
+print(f'Loading {pt_file}')
+
+# Initialize
+device = 'cpu'
+
+# Load model
+model = torch.load(pt_file, map_location=device)['model'].float()  # load to FP32
+# If the training is not finished, the model will be interrupted.
+# model = torch.load(pt_file, map_location=device)['ema'].float()  # load to FP32
+
+model.to(device).eval()
+
+with open(wts_file, 'w') as f:
+	f.write('{}\n'.format(len(model.state_dict().keys())))
+	for k, v in model.state_dict().items():
+		vr = v.reshape(-1).cpu().numpy()
+		f.write('{} {} '.format(k, len(vr)))
+		for vv in vr:
+			f.write(' ')
+			f.write(struct.pack('>f', float(vv)).hex())
+		f.write('\n')
+print(f'success {wts_file}!!!')
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+#include "NvInfer.h"
+
+std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
+
+nvinfer1::IScaleLayer *addBatchNorm2d(nvinfer1::INetworkDefinition *network,
+                                      std::map<std::string, nvinfer1::Weights> weightMap,
+                                      nvinfer1::ITensor &input, std::string lname, float eps);
+
+nvinfer1::IElementWiseLayer *convBnSiLU(nvinfer1::INetworkDefinition *network,
+                                        std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor &input,
+                                        int ch, int k, int s, std::string lname, int g = 1);
+
+nvinfer1::IElementWiseLayer *C2F(nvinfer1::INetworkDefinition *network,
+                                 std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor &input, int c1,
+                                 int c2, int n, bool shortcut, float e, std::string lname);
+
+nvinfer1::IElementWiseLayer *C2(nvinfer1::INetworkDefinition *network,
+                                std::map<std::string, nvinfer1::Weights> &weightMap, nvinfer1::ITensor &input, int c1,
+                                int c2, int n, bool shortcut, float e, std::string lname);
+
+nvinfer1::IElementWiseLayer *SPPF(nvinfer1::INetworkDefinition *network,
+                                  std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor &input, int c1,
+                                  int c2, int k, std::string lname);
+
+nvinfer1::IShuffleLayer *DFL(nvinfer1::INetworkDefinition *network, std::map<std::string, nvinfer1::Weights> weightMap,
+                             nvinfer1::ITensor &input, int ch, int grid, int k, int s, int p, std::string lname);
+
+nvinfer1::IPluginV2Layer *addYoLoLayer(nvinfer1::INetworkDefinition *network,
+                                       std::vector<nvinfer1::ILayer *> dets, const int *px_arry,
+                                       int px_arry_num);
+
+nvinfer1::ILayer *SCDown(nvinfer1::INetworkDefinition *network,
+                         std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor &input,
+                         int ch, int k, int s, std::string lname);
+
+nvinfer1::ILayer *PSA(nvinfer1::INetworkDefinition *network,
+                      std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor &input,
+                      int ch, std::string lname);
+
+nvinfer1::ILayer *C2fCIB(nvinfer1::INetworkDefinition *network,
+                         std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor &input,
+                         int c1, int c2, int n, bool shortcut, bool lk, float e, std::string lname);