olive-recipes/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_trtrtx.json.config at main · microsoft/olive-recipes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
    "$schema": "https://github.com/microsoft/olive-recipes/raw/refs/heads/main/.aitk/configs/config_schema.json",
    "name": "Convert to NVIDIA TRT for RTX",
    "oliveFile": "NvTensorRtRtx/DeepSeek-R1-Distill-Qwen-7B_nvmo_int4_rtn.json",
    "isLLM": true,
    "debugInfo": {
        "autoGenerated": true,
        "useModelBuilder": "builder"
    },
    "needHFLogin": true,
    "runtimeOverwrite": {
        "autoGenerated": true,
        "executeRequirement": "General/CUDA_py3.12.9"
    },
    "executeRuntimeFeatures": [
        "NVModelOptQuantization"
    ],
    "runtime": {
        "autoGenerated": true,
        "name": "Evaluate on",
        "type": "enum",
        "displayNames": [
            "NVIDIA TensorRT for RTX"
        ],
        "path": "systems.local_system.accelerators.0.execution_providers.0",
        "values": [
            "NvTensorRTRTXExecutionProvider"
        ],
        "readOnly": false
    },
    "optimizationPaths": [
        {
            "path": "passes.builder.precision"
        }
    ],
    "optimizationDefault": "fp16",
    "sections": [
        {
            "autoGenerated": true,
            "name": "Convert",
            "phase": "Conversion",
            "parameters": [],
            "toggle": {
                "autoGenerated": true,
                "name": "Convert to ONNX format",
                "type": "bool",
                "path": "passes.builder",
                "actions": [
                    [],
                    []
                ],
                "readOnly": true
            }
        },
        {
            "autoGenerated": true,
            "name": "Optimization",
            "phase": "Quantization",
            "parameters": [
                {
                    "autoGenerated": true,
                    "name": "Precision",
                    "description": "Precision of model",
                    "type": "enum",
                    "displayNames": [
                        "Int4",
                        "Bf16",
                        "Fp16",
                        "Fp32"
                    ],
                    "displayType": "RadioGroup",
                    "path": "passes.builder.precision",
                    "values": [
                        "int4",
                        "bf16",
                        "fp16",
                        "fp32"
                    ],
                    "template": {
                        "path": "passes.builder.precision",
                        "template": "ModelBuilderPrecision"
                    }
                }
            ],
            "disableToggleGeneration": true,
            "toggle": {
                "autoGenerated": true,
                "name": "Optimize model",
                "type": "bool",
                "path": "passes.builder",
                "actions": [
                    [],
                    []
                ],
                "readOnly": true
            }
        }
    ]
}