Skip to content

Commit 2beb657

Browse files
committed
feat: update v1.1 codes
1 parent da9d17a commit 2beb657

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+110396
-1187
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
compile_commands.json
77
../all_models/fastertransformer/1/*
88
*.pyc
9-
*.bin
9+
*.bin

CMakeLists.txt

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
1+
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -36,15 +36,18 @@ option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
3636
set(TRITON_PYTORCH_INCLUDE_PATHS "" CACHE PATH "Paths to Torch includes")
3737
set(TRITON_PYTORCH_LIB_PATHS "" CACHE PATH "Paths to Torch libraries")
3838

39-
set(TRITON_BACKEND_REPO_TAG "r21.02" CACHE STRING "Tag for triton-inference-server/backend repo")
40-
set(TRITON_CORE_REPO_TAG "r21.02" CACHE STRING "Tag for triton-inference-server/core repo")
41-
set(TRITON_COMMON_REPO_TAG "r21.02" CACHE STRING "Tag for triton-inference-server/common repo")
39+
set(TRITON_BACKEND_REPO_TAG "r22.03" CACHE STRING "Tag for triton-inference-server/backend repo")
40+
set(TRITON_CORE_REPO_TAG "r22.03" CACHE STRING "Tag for triton-inference-server/core repo")
41+
set(TRITON_COMMON_REPO_TAG "r22.03" CACHE STRING "Tag for triton-inference-server/common repo")
4242

4343
if(NOT CMAKE_BUILD_TYPE)
4444
set(CMAKE_BUILD_TYPE Release)
4545
endif()
4646

47-
set(BUILD_GPT "ON")
47+
set(BUILD_MULTI_GPU "ON")
48+
message("-- Enable BUILD_MULTI_GPU")
49+
set(USE_TRITONSERVER_DATATYPE "ON")
50+
message("-- Enable USE_TRITONSERVER_DATATYPE")
4851

4952
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
5053

@@ -63,6 +66,11 @@ if (${CUDA_VERSION} GREATER_EQUAL 11.0)
6366
add_definitions("-DCUDA11_MODE")
6467
endif()
6568

69+
if(${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11")
70+
add_definitions("-DENABLE_BF16")
71+
message("CUDA_VERSION ${CUDA_VERSION_MAJOR} is greater or equal than 11, enable -DENABLE_BF16 flag")
72+
endif()
73+
6674
#
6775
# Dependencies
6876
#
@@ -91,8 +99,8 @@ FetchContent_Declare(
9199
)
92100
FetchContent_Declare(
93101
repo-ft
94-
GIT_REPOSITORY https://github.com/NVIDIA/FasterTransformer
95-
GIT_TAG dev/v5.0_beta
102+
GIT_REPOSITORY https://github.com/NVIDIA/FasterTransformer.git
103+
GIT_TAG main
96104
GIT_SHALLOW ON
97105
)
98106
FetchContent_MakeAvailable(repo-common repo-core repo-backend repo-ft)
@@ -132,17 +140,17 @@ set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
132140
target_compile_definitions(triton-fastertransformer-backend
133141
PUBLIC
134142
USE_TRITONSERVER_DATATYPE
135-
BUILD_GPT)
143+
BUILD_MULTI_GPU)
136144

137145
target_include_directories(
138146
triton-fastertransformer-backend
139147
PRIVATE
140148
${CMAKE_CURRENT_SOURCE_DIR}/src
141-
# ${CMAKE_CURRENT_SOURCE_DIR}/test
142149
${TRITON_PYTORCH_INCLUDE_PATHS}
143150
${Python3_INCLUDE_DIRS}
144151
${MPI_INCLUDE_PATH}
145152
${repo-ft_SOURCE_DIR}
153+
${repo-core_SOURCE_DIR}/include
146154
)
147155

148156
target_link_directories(

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
1+
Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
22

33
Redistribution and use in source and binary forms, with or without
44
modification, are permitted provided that the following conditions

README.md

Lines changed: 17 additions & 281 deletions
Large diffs are not rendered by default.

all_models/gpt/ensemble/config.pbtxt

Lines changed: 164 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,71 @@ input [
1010
{
1111
name: "INPUT_1"
1212
data_type: TYPE_UINT32
13+
dims: [ -1 ]
14+
},
15+
{
16+
name: "INPUT_2"
17+
data_type: TYPE_STRING
18+
dims: [ -1 ]
19+
},
20+
{
21+
name: "INPUT_3"
22+
data_type: TYPE_STRING
23+
dims: [ -1 ]
24+
},
25+
{
26+
name: "runtime_top_k"
27+
data_type: TYPE_UINT32
28+
dims: [ 1 ]
29+
},
30+
{
31+
name: "runtime_top_p"
32+
data_type: TYPE_FP32
33+
dims: [ 1 ]
34+
},
35+
{
36+
name: "beam_search_diversity_rate"
37+
data_type: TYPE_FP32
38+
dims: [ 1 ]
39+
},
40+
{
41+
name: "temperature"
42+
data_type: TYPE_FP32
43+
dims: [ 1 ]
44+
},
45+
{
46+
name: "len_penalty"
47+
data_type: TYPE_FP32
48+
dims: [ 1 ]
49+
},
50+
{
51+
name: "repetition_penalty"
52+
data_type: TYPE_FP32
53+
dims: [ 1 ]
54+
},
55+
{
56+
name: "random_seed"
57+
data_type: TYPE_INT32
58+
dims: [ 1 ]
59+
},
60+
{
61+
name: "is_return_log_probs"
62+
data_type: TYPE_BOOL
63+
dims: [ 1 ]
64+
},
65+
{
66+
name: "beam_width"
67+
data_type: TYPE_UINT32
68+
dims: [ 1 ]
69+
},
70+
{
71+
name: "start_id"
72+
data_type: TYPE_UINT32
73+
dims: [ 1 ]
74+
},
75+
{
76+
name: "end_id"
77+
data_type: TYPE_UINT32
1378
dims: [ 1 ]
1479
}
1580
]
@@ -18,6 +83,21 @@ output [
1883
name: "OUTPUT_0"
1984
data_type: TYPE_STRING
2085
dims: [ -1, -1 ]
86+
},
87+
{
88+
name: "sequence_length"
89+
data_type: TYPE_UINT32
90+
dims: [ -1 ]
91+
},
92+
{
93+
name: "cum_log_probs"
94+
data_type: TYPE_FP32
95+
dims: [ -1 ]
96+
},
97+
{
98+
name: "output_log_probs"
99+
data_type: TYPE_FP32
100+
dims: [ -1, -1 ]
21101
}
22102
]
23103
ensemble_scheduling {
@@ -33,10 +113,26 @@ ensemble_scheduling {
33113
key: "REQUEST_OUTPUT_LEN"
34114
value: "INPUT_1"
35115
}
116+
input_map {
117+
key: "BAD_WORDS_DICT"
118+
value: "INPUT_2"
119+
}
120+
input_map {
121+
key: "STOP_WORDS_DICT"
122+
value: "INPUT_3"
123+
}
36124
output_map {
37125
key: "INPUT_ID"
38126
value: "_INPUT_ID"
39127
}
128+
output_map {
129+
key: "BAD_WORDS_IDS"
130+
value: "_BAD_WORDS_IDS"
131+
}
132+
output_map {
133+
key: "STOP_WORDS_IDS"
134+
value: "_STOP_WORDS_IDS"
135+
}
40136
output_map {
41137
key: "REQUEST_INPUT_LEN"
42138
value: "_REQUEST_INPUT_LEN"
@@ -50,21 +146,85 @@ ensemble_scheduling {
50146
model_name: "fastertransformer"
51147
model_version: -1
52148
input_map {
53-
key: "INPUT_ID"
149+
key: "input_ids"
54150
value: "_INPUT_ID"
55151
}
56152
input_map {
57-
key: "REQUEST_INPUT_LEN"
153+
key: "input_lengths"
58154
value: "_REQUEST_INPUT_LEN"
59155
}
60156
input_map {
61-
key: "REQUEST_OUTPUT_LEN"
157+
key: "request_output_len"
62158
value: "_REQUEST_OUTPUT_LEN"
63159
}
160+
input_map {
161+
key: "runtime_top_k"
162+
value: "runtime_top_k"
163+
}
164+
input_map {
165+
key: "runtime_top_p"
166+
value: "runtime_top_p"
167+
}
168+
input_map {
169+
key: "beam_search_diversity_rate"
170+
value: "beam_search_diversity_rate"
171+
}
172+
input_map {
173+
key: "temperature"
174+
value: "temperature"
175+
}
176+
input_map {
177+
key: "len_penalty"
178+
value: "len_penalty"
179+
}
180+
input_map {
181+
key: "repetition_penalty"
182+
value: "repetition_penalty"
183+
}
184+
input_map {
185+
key: "random_seed"
186+
value: "random_seed"
187+
}
188+
input_map {
189+
key: "is_return_log_probs"
190+
value: "is_return_log_probs"
191+
}
192+
input_map {
193+
key: "beam_width"
194+
value: "beam_width"
195+
}
196+
input_map {
197+
key: "start_id"
198+
value: "start_id"
199+
}
200+
input_map {
201+
key: "end_id"
202+
value: "end_id"
203+
}
204+
input_map {
205+
key: "stop_words_list"
206+
value: "_STOP_WORDS_IDS"
207+
}
208+
input_map {
209+
key: "bad_words_list"
210+
value: "_BAD_WORDS_IDS"
211+
}
64212
output_map {
65-
key: "OUTPUT0"
213+
key: "output_ids"
66214
value: "_TOKENS_BATCH"
67215
}
216+
output_map {
217+
key: "sequence_length"
218+
value: "sequence_length"
219+
}
220+
output_map {
221+
key: "cum_log_probs"
222+
value: "cum_log_probs"
223+
}
224+
output_map {
225+
key: "output_log_probs"
226+
value: "output_log_probs"
227+
}
68228
},
69229
{
70230
model_name: "postprocessing"

0 commit comments

Comments
 (0)