We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 7220d1f commit 339a554Copy full SHA for 339a554
models/tt_transformers/tt/generator.py
@@ -1228,18 +1228,12 @@ def decode_forward_llama_vision(
1228
else:
1229
tt_logits = self._decode_forward_no_trace(**decode_kwargs)
1230
1231
- output = None
1232
if read_from_device:
1233
to_host = self.read_decode_output(tt_logits)
1234
- output = self.process_decode_output_host(to_host)
+ # skip log_probs
+ return self.process_decode_output_host(to_host)[0]
1235
1236
- output = tt_logits
1237
-
1238
- # skip returning log-probs
1239
- if isinstance(output, tuple):
1240
- return output[0]
1241
- else:
1242
- return output
+ return tt_logits
1243
1244
# Note: This function is called by vLLM
1245
def read_decode_output(self, tt_out, async_read=False):
0 commit comments