Skip to content

Commit 1412282

Browse files
authored
Update README\RELEASE.md for 2.38.0/23.09 (#6357)
* Update README\RELEASE.md for 2.38.0/23.09 * Revert Windows changes
1 parent 19b0e6e commit 1412282

File tree

2 files changed

+158
-38
lines changed

2 files changed

+158
-38
lines changed

Dockerfile.win10.min

+35-38
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,6 @@ RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope Loca
3939
RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
4040
RUN choco install git docker unzip -y
4141

42-
#
43-
# Installing CMake
44-
#
45-
ARG CMAKE_VERSION=3.27.1
46-
ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
47-
ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
48-
49-
ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
50-
RUN unzip %CMAKE_FILE%.zip
51-
RUN move %CMAKE_FILE% "c:\CMake"
52-
RUN setx PATH "c:\CMake\bin;%PATH%"
53-
54-
ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
55-
ENV VCPKG_TARGET_TRIPLET x64-windows
56-
57-
LABEL CMAKE_VERSION=${CMAKE_VERSION}
58-
5942
# Be aware that pip can interact badly with VS cmd shell so need to pip install before
6043
# vsdevcmd.bat (see https://bugs.python.org/issue38989)
6144
ARG PYTHON_VERSION=3.8.10
@@ -68,31 +51,44 @@ RUN pip install grpcio-tools
6851

6952
LABEL PYTHON_VERSION=${PYTHON_VERSION}
7053

71-
#
72-
# Installing Visual Studio BuildTools: VS17 2022
73-
#
74-
ARG BUILDTOOLS_VERSION
75-
# Download collect.exe in case of an install failure.
76-
ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"
77-
78-
# Use the latest release channel. For more control, specify the location of an internal layout.
79-
ARG CHANNEL_URL=https://aka.ms/vs/17/release/channel
80-
ADD ${CHANNEL_URL} "C:\tmp\VisualStudio.chman"
81-
# Download the Build Tools bootstrapper.
82-
ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
83-
ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
84-
# Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
54+
# Download and install Build Tools for Visual Studio. The use of
55+
# powershell for the install seems to be required to make the command
56+
# wait for the install to complete before continuing. To avoid failures
57+
# caused by VS regressions we want to stick with a working
58+
# compiler. Currently this is 16.11.21. This page contains download
59+
# links for buildtools.
60+
# https://docs.microsoft.com/en-us/visualstudio/releases/2019/history#release-dates-and-build-numbers
61+
ARG BUILDTOOLS_VERSION=16.11.21
62+
ARG BUILDTOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/8f1eb024-006a-43f6-a372-0721f71058b3/cc5cc690ac094fbfa78dfb8e40089ba52056026579e8d8dc31e95e8ea5466df5/vs_BuildTools.exe
63+
ADD ${BUILDTOOLS_SOURCE} vs_buildtools.exe
8564
ARG VS_INSTALL_PATH_WP="C:\BuildTools"
86-
RUN vs_buildtools.exe --quiet --wait --norestart --nocache install --installPath %VS_INSTALL_PATH_WP% --channelUri "C:\tmp\VisualStudio.chman" --installChannelUri "C:\tmp\VisualStudio.chman" --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended --locale "En-us"
65+
RUN powershell.exe Start-Process -FilePath vs_buildtools.exe -ArgumentList "--wait","--quiet","--norestart","--nocache","--installPath","%VS_INSTALL_PATH_WP%","--channelUri","C:\tmp\doesnotexist.chman","--addProductLang","En-us","--add","Microsoft.VisualStudio.Workload.VCTools`;includeRecommended","--add","Microsoft.Component.MSBuild" -Wait -PassThru
8766

8867
LABEL BUILDTOOLS_VERSION=${BUILDTOOLS_VERSION}
8968

9069
WORKDIR /
9170

71+
#
72+
# Installing CMake
73+
#
74+
ARG CMAKE_VERSION=3.26.1
75+
ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
76+
ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
77+
78+
ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
79+
RUN unzip %CMAKE_FILE%.zip
80+
RUN move %CMAKE_FILE% CMake
81+
RUN setx PATH "c:\CMake\bin;%PATH%"
82+
83+
ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
84+
ENV VCPKG_TARGET_TRIPLET x64-windows
85+
86+
LABEL CMAKE_VERSION=${CMAKE_VERSION}
87+
9288
#
9389
# Installing Vcpkg
9490
#
95-
ARG VCPGK_VERSION=2023.07.21
91+
ARG VCPGK_VERSION=2022.11.14
9692
RUN git clone --single-branch --depth=1 -b %VCPGK_VERSION% https://github.com/microsoft/vcpkg.git
9793
WORKDIR /vcpkg
9894
RUN bootstrap-vcpkg.bat
@@ -104,12 +100,13 @@ LABEL VCPGK_VERSION=${VCPGK_VERSION}
104100

105101
WORKDIR /
106102

103+
107104
#
108105
# Installing CUDA
109106
#
110107
ARG CUDA_MAJOR=12
111108
ARG CUDA_MINOR=2
112-
ARG CUDA_PATCH=1
109+
ARG CUDA_PATCH=0
113110
ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
114111
ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
115112
cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
@@ -130,14 +127,14 @@ ADD ${CUDA_SOURCE} cuda_${CUDA_VERSION}_windows_network.exe
130127
RUN cuda_%CUDA_VERSION%_windows_network.exe -s %CUDA_PACKAGES%
131128
# Copy the CUDA visualstudio integration from where it was installed
132129
# into the appropriate place in BuildTools
133-
RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensions\*" "%VS_INSTALL_PATH_WP%\MSBuild\Microsoft\VC\v170\BuildCustomizations"
130+
RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensions\*" "%VS_INSTALL_PATH_WP%\MSBuild\Microsoft\VC\v160\BuildCustomizations"
134131

135132
RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
136133

137134
LABEL CUDA_VERSION="${CUDA_VERSION}"
138135

139136
#
140-
# Installing TensorRT
137+
# Installing Tensorrt
141138
#
142139
ARG TENSORRT_VERSION=8.6.1.6
143140
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.0.zip"
@@ -155,9 +152,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
155152

156153

157154
#
158-
# Installing cuDNN
155+
# Installing CUDNN
159156
#
160-
ARG CUDNN_VERSION=8.9.5.27
157+
ARG CUDNN_VERSION=8.9.4.25
161158
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
162159
ARG CUDNN_SOURCE=${CUDNN_ZIP}
163160

RELEASE.md

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
<!--
2+
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
-->
28+
29+
# Release Notes for 2.38.0
30+
31+
## New Freatures and Improvements
32+
33+
* Triton now has Python bindings for the C API. Please refer to
34+
[this PR](https://github.com/triton-inference-server/core/pull/265) for
35+
usage.
36+
37+
* Triton now forwards request parameters to each of the composing models of an
38+
ensemble model.
39+
40+
* The Filesystem API now supports named temporary cache directories when
41+
downloading models using the repository agent.
42+
43+
* Added the number of requests currently in the queue to the metrics API.
44+
Documentation can be found
45+
[here](https://github.com/triton-inference-server/server/blob/r23.09/docs/user_guide/metrics.md#pending-request-count-queue-size-per-model).
46+
47+
* Python backend models can now respond with error codes in addition to error
48+
messages.
49+
50+
* TensorRT backend now supports
51+
[TensortRT version compatibility](https://github.com/triton-inference-server/tensorrt_backend/tree/r23.09#command-line-options)
52+
across models generated with the same major version of TensorRT. Use the
53+
`--backend-config=tensorrt,--version-compatible=true` flag to enable this
54+
feature.
55+
56+
* Triton’s backend API now supports accessing the inference response outputs by
57+
name or by index. See the new API
58+
[here](https://github.com/triton-inference-server/core/blob/r23.09/include/triton/core/tritonbackend.h#L1572-L1608).
59+
60+
* The Python backend now supports loading
61+
[Pytorch models directly](https://github.com/triton-inference-server/python_backend/tree/r23.08#pytorch-platform-experimental).
62+
This feature is experimental and should be treated as Beta.
63+
64+
* Fixed an issue where if the user didn't call `SetResponseReleaseCallback`,
65+
canceling a new request could cancel the old response factory as well. Now
66+
when canceling a request which is being re-used, a new response factory is
67+
created for each inference.
68+
69+
* Refer to the 23.09 column of the
70+
[Frameworks Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
71+
for container image versions on which the 23.09 inference server container is
72+
based.
73+
74+
## Known Issues
75+
76+
* When using decoupled models, there is a possibility that response order as
77+
sent from the backend may not match with the order in which these responses
78+
are received by the streaming gRPC client. Note that this only applies to
79+
responses from different requests. Any responses corresponding to the same
80+
request will still be received in their expected order, relative to each
81+
other.
82+
83+
* The FasterTransformer backend is only officially supported for 22.12, though
84+
it can be built for Triton container versions up to 23.07.
85+
86+
* The Java CAPI is known to have intermittent segfaults we’re looking for a
87+
root cause.
88+
89+
* Some systems which implement `malloc()` may not release memory back to the
90+
operating system right away causing a false memory leak. This can be mitigated
91+
by using a different malloc implementation. Tcmalloc and jemalloc are
92+
installed in the Triton container and can be
93+
[used by specifying the library in LD_PRELOAD](https://github.com/triton-inference-server/server/blob/r22.12/docs/user_guide/model_management.md).
94+
We recommend experimenting with both `tcmalloc` and `jemalloc` to determine
95+
which one works better for your use case.
96+
97+
* Auto-complete may cause an increase in server start time. To avoid a start
98+
time increase, users can provide the full model configuration and launch the
99+
server with `--disable-auto-complete-config`.
100+
101+
* Auto-complete does not support PyTorch models due to lack of metadata in the
102+
model. It can only verify that the number of inputs and the input names
103+
matches what is specified in the model configuration. There is no model
104+
metadata about the number of outputs and datatypes. Related PyTorch bug:
105+
https://github.com/pytorch/pytorch/issues/38273
106+
107+
* Triton Client PIP wheels for ARM SBSA are not available from PyPI and pip
108+
will install an incorrect Jetson version of Triton Client library for Arm
109+
SBSA. The correct client wheel file can be pulled directly from the Arm SBSA
110+
SDK image and manually installed.
111+
112+
* Traced models in PyTorch seem to create overflows when int8 tensor values are
113+
transformed to int32 on the GPU. Refer to
114+
https://github.com/pytorch/pytorch/issues/66930 for more information.
115+
116+
* Triton cannot retrieve GPU metrics with MIG-enabled GPU devices (A100 and A30).
117+
118+
* Triton metrics might not work if the host machine is running a separate DCGM
119+
agent on bare-metal or in a container.
120+
121+
* When cloud storage (AWS, GCS, AZURE) is used as a model repository and a model
122+
has multiple versions, Triton creates an extra local copy of the cloud model’s
123+
folder in the temporary directory, which is deleted upon server’s shutdown.

0 commit comments

Comments
 (0)