|
58 | 58 |
|
59 | 59 | - name: Install rebel-compiler |
60 | 60 | run: | |
| 61 | + sudo apt-get update |
| 62 | + sudo apt-get install -y build-essential |
| 63 | + export CXX=$(which g++) |
61 | 64 | python3 -m pip uninstall rebel-compiler -y |
62 | 65 | PYPI_URL=$(echo ${{ env.REBEL_PYPI_ENDPOINT }} | sed "s/\/\//\0${{ env.REBEL_PYPI_USERNAME }}:${{ env.REBEL_PYPI_PASSWORD }}@/") |
63 | 66 | VERSION=${{ inputs.rebel_compiler_version || steps.get_latest_rebel_compiler.outputs.LATEST_COMPILER_VER }} |
@@ -88,63 +91,75 @@ jobs: |
88 | 91 |
|
89 | 92 | - name: Run decoder-only test (eager attn) (V1) |
90 | 93 | run: > |
91 | | - VLLM_USE_V1=1 python3 examples/optimum/run_decoder_only.py |
| 94 | + python3 examples/optimum/run_decoder_only.py |
92 | 95 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
93 | 96 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/llama2-7b_batch2 |
94 | 97 | --prompt_txt ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/prompts/copy_prompts.txt |
95 | 98 | --golden_json ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/golden/golden_llama7b_result_copy_prompts.json |
96 | 99 |
|
97 | 100 | - name: Run decoder-only test (Flash-attention mode) (V1) |
98 | 101 | run: > |
99 | | - VLLM_USE_V1=1 python3 examples/optimum/run_decoder_only.py --batch_size 4 --max_seq_len 131072 --kvcache_block_size 16384 |
| 102 | + python3 examples/optimum/run_decoder_only.py --max_seq_len 131072 |
100 | 103 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
101 | 104 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/llama3_2-3b-128k_kv16k_batch4 |
102 | 105 | --prompt_txt ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/prompts/copy_prompts.txt |
103 | 106 | --golden_json ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/golden/golden_llama3_2_3b_instruct_128k_copy_prompts.json |
104 | 107 |
|
105 | 108 | - name : Run Llava-next (Eager mode) (V1) |
106 | 109 | run: > |
107 | | - VLLM_USE_V1=1 python3 examples/optimum/run_llava.py --max_seq_len 32768 --kvcache_partition_len 32768 |
| 110 | + python3 examples/optimum/run_llava.py |
| 111 | + --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
| 112 | + --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/llava-v1.6-mistral-7b-hf-32k-b4/ |
| 113 | + |
| 114 | + - name : Run Llava-next (Eager mode) (V0) |
| 115 | + run: > |
| 116 | + VLLM_USE_V1=0 python3 examples/optimum/run_llava.py |
108 | 117 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
109 | 118 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/llava-v1.6-mistral-7b-hf-32k-b4/ |
110 | 119 |
|
111 | 120 | - name : Run Llava-next (Flash-attention mode) (V1) |
112 | 121 | run: > |
113 | | - VLLM_USE_V1=1 python3 examples/optimum/run_llava.py --max_seq_len 32768 --kvcache_partition_len 16384 |
| 122 | + python3 examples/optimum/run_llava.py |
| 123 | + --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
| 124 | + --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/llava-v1.6-mistral-7b-hf-32k-b4-kv16k |
| 125 | + |
| 126 | + - name : Run Llava-next (Flash-attention mode) (V0) |
| 127 | + run: > |
| 128 | + VLLM_USE_V1=0 python3 examples/optimum/run_llava.py |
114 | 129 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
115 | 130 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/llava-v1.6-mistral-7b-hf-32k-b4-kv16k |
116 | 131 |
|
117 | 132 | - name : Run Idefics3 (Eager mode) (V1) |
118 | 133 | run: > |
119 | | - VLLM_USE_V1=1 python3 examples/optimum/run_idefics3.py --max_seq_len 32768 --kvcache_partition_len 32768 |
| 134 | + python3 examples/optimum/run_idefics3.py |
120 | 135 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
121 | 136 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/idefics3-8b-llama3-32k-b4 |
122 | 137 |
|
123 | 138 | - name : Run Idefics3 (Flash-attention mode) (V1) |
124 | 139 | run: > |
125 | | - VLLM_USE_V1=1 python3 examples/optimum/run_idefics3.py --max_seq_len 32768 --kvcache_partition_len 16384 |
| 140 | + python3 examples/optimum/run_idefics3.py |
126 | 141 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
127 | 142 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/idefics3-8b-llama3-32k-b4-kv16k |
128 | 143 |
|
129 | 144 | - name : Run Blip2 (V1) |
130 | 145 | run: > |
131 | | - VLLM_USE_V1=1 python3 examples/optimum/run_blip2.py |
| 146 | + python3 examples/optimum/run_blip2.py |
132 | 147 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
133 | 148 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/blip2-opt-2.7b-2k-b4 |
134 | 149 |
|
135 | 150 | - name : Run Qwen2.5_VL (V1) |
136 | 151 | run: > |
137 | | - VLLM_USE_V1=1 python3 examples/optimum/run_qwen_vl.py |
| 152 | + python3 examples/optimum/run_qwen_vl.py |
138 | 153 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
139 | 154 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/qwen2_5-vl-7b-32k-b4-kv16k |
140 | 155 |
|
141 | | - - name : Run encoder-decoder |
| 156 | + - name : Run encoder-decoder (V1) |
142 | 157 | run: > |
143 | 158 | python3 examples/optimum/run_encoder_decoder.py |
144 | 159 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
145 | 160 | --model_id ${{ env.REBEL_VLLM_PRE_COMPILED_DIR }}/rbln_bart-small_batch2 |
146 | 161 |
|
147 | | - - name : Run text embedding model |
| 162 | + - name : Run text embedding model (V1) |
148 | 163 | run: > |
149 | 164 | python3 examples/optimum/run_encoder_only.py |
150 | 165 | --num_input_prompt ${{ env.NUM_INPUT_PROMPT }} |
|
0 commit comments