Skip to content

Commit 737f70a

Browse files
committed
Update onnxruntime-extensions to include InternLM2Tokenizer
- Bump extensions commit to 087953cd (includes InternLM2Tokenizer support) - Remove local patch now that support is upstream Ref: microsoft/onnxruntime-extensions#1023
1 parent 263b29d commit 737f70a

File tree

2 files changed

+1
-17
lines changed

2 files changed

+1
-17
lines changed

cmake/deps.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.zip;f78029
1414
googletest;https://github.com/google/googletest/archive/530d5c8c84abd2a46f38583ee817743c9b3a42b4.zip;5e3a61db2aa975cfd0f97ba92c818744e7fa7034
1515
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
1616
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
17-
onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;2fbe0ebbb3eb21199ab74c92b6edf3804d827998
17+
onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;087953cde6149e423c6848c40c3791264272706c
1818

1919
# These two dependencies are for the optional constrained decoding feature (USE_GUIDANCE)
2020
llguidance;https://github.com/microsoft/llguidance.git;94fa39128ef184ffeda33845f6d333f332a34b4d

cmake/external/onnxruntime_external_deps.cmake

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -88,22 +88,6 @@ FetchContent_Declare(
8888
set(OCOS_BUILD_PRESET ort_genai)
8989
onnxruntime_fetchcontent_makeavailable(onnxruntime_extensions)
9090

91-
# Add InternLM2Tokenizer support: same BPE/LLaMA format as LlamaTokenizer (see Hugging Face model card).
92-
# This allows exported models to keep the official tokenizer_config.json (InternLM2Tokenizer) per
93-
# https://huggingface.co/internlm/internlm2-1_8b/blob/main/tokenizer_config.json
94-
FetchContent_GetProperties(onnxruntime_extensions)
95-
if(onnxruntime_extensions_POPULATED)
96-
set(ORTX_TOKENIZER_CONFIG "${onnxruntime_extensions_SOURCE_DIR}/operators/tokenizer/tokenizer_jsconfig.hpp")
97-
if(EXISTS "${ORTX_TOKENIZER_CONFIG}")
98-
file(READ "${ORTX_TOKENIZER_CONFIG}" ORTX_TOKENIZER_CONFIG_CONTENT)
99-
string(REPLACE "{\"LlamaTokenizer\", TokenType::kBPE},"
100-
"{\"LlamaTokenizer\", TokenType::kBPE},\n {\"InternLM2Tokenizer\", TokenType::kBPE},"
101-
ORTX_TOKENIZER_CONFIG_CONTENT "${ORTX_TOKENIZER_CONFIG_CONTENT}")
102-
file(WRITE "${ORTX_TOKENIZER_CONFIG}" "${ORTX_TOKENIZER_CONFIG_CONTENT}")
103-
message(STATUS "Patched onnxruntime_extensions: added InternLM2Tokenizer support")
104-
endif()
105-
endif()
106-
10791
list(APPEND EXTERNAL_LIBRARIES
10892
onnxruntime_extensions
10993
ocos_operators

0 commit comments

Comments
 (0)