Skip to content

Commit 36bf558

Browse files
committed
feat: update v1.3 codes
1 parent 225b578 commit 36bf558

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+8577
-580
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
5454
# Python.h needed by torch headers.
5555
find_package(Python3 REQUIRED COMPONENTS Development)
5656

57-
find_package(FasterTransformer)
5857
find_package(CUDA 10.1 REQUIRED)
5958
if (BUILD_MULTI_GPU)
6059
message(STATUS "Enable BUILD_MULTI_GPU.")
@@ -111,7 +110,7 @@ else()
111110
FetchContent_Declare(
112111
repo-ft
113112
GIT_REPOSITORY https://github.com/NVIDIA/FasterTransformer.git
114-
GIT_TAG main
113+
GIT_TAG v5.2
115114
GIT_SHALLOW ON
116115
)
117116
endif()
@@ -159,6 +158,7 @@ target_include_directories(
159158
${TRITON_PYTORCH_INCLUDE_PATHS}
160159
${Python3_INCLUDE_DIRS}
161160
${repo-ft_SOURCE_DIR}
161+
${repo-ft_SOURCE_DIR}/3rdparty/cutlass/include
162162
${repo-core_SOURCE_DIR}/include
163163
)
164164

README.md

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@ Note that this is a research and prototyping tool, not a formal product or maint
5555

5656
| Models | FP16 | BF16 | Tensor parallel | Pipeline parallel |
5757
| -------- | ---- | ---- | --------------- | ----------------- |
58-
| GPT | Yes | Yes | Yes | Yes |
58+
| GPT/OPT | Yes | Yes | Yes | Yes |
59+
| BLOOM | Yes | Yes | Yes | Yes |
5960
| GPT-J | Yes | Yes | Yes | Yes |
60-
| T5 | Yes | Yes | Yes | Yes |
61+
| T5/UL2 | Yes | Yes | Yes | Yes |
6162
| GPT-NeoX | Yes | Yes | Yes | Yes |
6263
| BERT | Yes | Yes | Yes | Yes |
6364

@@ -136,6 +137,7 @@ But also you can build it manually in interactive session (ex during fixing code
136137

137138
```bash
138139
docker run -it \
140+
–shm-size=1g –ulimit memlock=-1 \
139141
-v ${WORKSPACE}:/workspace \
140142
--name ft_backend_builder \
141143
${TRITON_DOCKER_IMAGE} bash
@@ -242,8 +244,16 @@ Specifically `tools/issue_request.py` is a simple script that sends a request co
242244

243245
## Changelog
244246

247+
Oct 2022
248+
- Support IA3 in T5 and T5-Encoder
249+
250+
Sep 2022
251+
- Support T5-Encoder only backend
252+
- Support T5 prompt tuning and p tuning
253+
- Support factual-nucleus sampling ([link](https://arxiv.org/pdf/2206.04624.pdf))
254+
245255
Aug 2022
246-
- Release **FasterTransformer backend v1.2**
256+
- **Release the FasterTransformer backend 1.2**.
247257
- Support for interactive generation
248258

249259
July 2022
@@ -260,6 +270,7 @@ May 2022
260270
- Support optional input. (triton version must be after 22.05)
261271

262272
April 2022
273+
- **Release the FasterTransformer backend 1.1**.
263274
- Support bfloat16 inference in GPT model.
264275
- Support Nemo Megatron T5 and Megatron-LM T5 model.
265276
- Support optional input in fastertransformer backends. (Only supported after Triton 22.01)
@@ -278,4 +289,4 @@ Sep 2021
278289

279290
Apr 2021
280291
- **Release the FasterTransformer backend 1.0**.
281-
- Support Multi-GPU on GPT.
292+
- Support Multi-GPU on GPT.

all_models/bert/fastertransformer/config.pbtxt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ max_batch_size: 1024
3232
input [
3333
{
3434
name: "input_hidden_state"
35-
data_type: TYPE_FP16
3635
dims: [ -1, -1 ]
3736
},
3837
{
@@ -45,7 +44,6 @@ input [
4544
output [
4645
{
4746
name: "output_hidden_state"
48-
data_type: TYPE_FP16
4947
dims: [ -1, -1 ]
5048
}
5149
]
@@ -88,7 +86,7 @@ parameters {
8886
parameters {
8987
key: "model_checkpoint_path"
9088
value: {
91-
string_value: "../all_models/bert/fastertransformer/1/2-gpu/"
89+
string_value: "all_models/bert/fastertransformer/1/2-gpu/"
9290
}
9391
}
9492
parameters {

all_models/bloom/ensemble/1/.tmp

Whitespace-only changes.

0 commit comments

Comments
 (0)