-
Notifications
You must be signed in to change notification settings - Fork 53
Expand file tree
/
Copy pathdeepseek_trtrtx.json.config
More file actions
99 lines (99 loc) · 2.91 KB
/
Copy pathdeepseek_trtrtx.json.config
File metadata and controls
99 lines (99 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
"$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
"name": "Convert to NVIDIA TRT for RTX",
"oliveFile": "NvTensorRtRtx/DeepSeek-R1-Distill-Qwen-7B_nvmo_int4_rtn.json",
"isLLM": true,
"debugInfo": {
"autoGenerated": true,
"useModelBuilder": "builder"
},
"needHFLogin": true,
"runtimeOverwrite": {
"autoGenerated": true,
"executeRequirement": "General/CUDA_py3.12.9"
},
"executeRuntimeFeatures": [
"NVModelOptQuantization"
],
"runtime": {
"autoGenerated": true,
"name": "Evaluate on",
"type": "enum",
"displayNames": [
"NVIDIA TensorRT for RTX"
],
"path": "systems.local_system.accelerators.0.execution_providers.0",
"values": [
"NvTensorRTRTXExecutionProvider"
],
"readOnly": false
},
"optimizationPaths": [
{
"path": "passes.builder.precision"
}
],
"optimizationDefault": "fp16",
"sections": [
{
"autoGenerated": true,
"name": "Convert",
"phase": "Conversion",
"parameters": [],
"toggle": {
"autoGenerated": true,
"name": "Convert to ONNX format",
"type": "bool",
"path": "passes.builder",
"actions": [
[],
[]
],
"readOnly": true
}
},
{
"autoGenerated": true,
"name": "Optimization",
"phase": "Quantization",
"parameters": [
{
"autoGenerated": true,
"name": "Precision",
"description": "Precision of model",
"type": "enum",
"displayNames": [
"Int4",
"Bf16",
"Fp16",
"Fp32"
],
"displayType": "RadioGroup",
"path": "passes.builder.precision",
"values": [
"int4",
"bf16",
"fp16",
"fp32"
],
"template": {
"path": "passes.builder.precision",
"template": "ModelBuilderPrecision"
}
}
],
"disableToggleGeneration": true,
"toggle": {
"autoGenerated": true,
"name": "Optimize model",
"type": "bool",
"path": "passes.builder",
"actions": [
[],
[]
],
"readOnly": true
}
}
]
}