Skip to content

Commit 7d23c3a

Browse files
Fix: presentation parsing & Embedding encode exception handling (#11933)
### What problem does this PR solve? Fix: presentation parsing #11920 Fix: Embeddin encode exception handling ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
1 parent 6be0338 commit 7d23c3a

File tree

2 files changed

+31
-5
lines changed

2 files changed

+31
-5
lines changed

rag/app/presentation.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
227227
for pn, (txt, img) in enumerate(sections):
228228
d = copy.deepcopy(doc)
229229
pn += from_page
230-
if img:
231-
d["image"] = img
230+
if not isinstance(img, Image.Image):
231+
img = None
232+
d["image"] = img
232233
d["page_num_int"] = [pn + 1]
233234
d["top_int"] = [0]
234235
d["position_int"] = [(pn + 1, 0, img.size[0] if img else 0, 0,

rag/llm/embedding_model.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,16 @@ def encode(self, texts: list):
121121
total_tokens += self.total_token_count(res)
122122
except Exception as _e:
123123
log_exception(_e, res)
124+
raise Exception(f"Error: {res}")
124125
return np.array(ress), total_tokens
125126

126127
def encode_queries(self, text):
127128
res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True})
128-
return np.array(res.data[0].embedding), self.total_token_count(res)
129+
try:
130+
return np.array(res.data[0].embedding), self.total_token_count(res)
131+
except Exception as _e:
132+
log_exception(_e, res)
133+
raise Exception(f"Error: {res}")
129134

130135

131136
class LocalAIEmbed(Base):
@@ -147,6 +152,7 @@ def encode(self, texts: list):
147152
ress.extend([d.embedding for d in res.data])
148153
except Exception as _e:
149154
log_exception(_e, res)
155+
raise Exception(f"Error: {res}")
150156
# local embedding for LmStudio donot count tokens
151157
return np.array(ress), 1024
152158

@@ -222,6 +228,7 @@ def encode_queries(self, text):
222228
return np.array(resp["output"]["embeddings"][0]["embedding"]), self.total_token_count(resp)
223229
except Exception as _e:
224230
log_exception(_e, resp)
231+
raise Exception(f"Error: {resp}")
225232

226233

227234
class ZhipuEmbed(Base):
@@ -249,6 +256,7 @@ def encode(self, texts: list):
249256
tks_num += self.total_token_count(res)
250257
except Exception as _e:
251258
log_exception(_e, res)
259+
raise Exception(f"Error: {res}")
252260
return np.array(arr), tks_num
253261

254262
def encode_queries(self, text):
@@ -257,6 +265,7 @@ def encode_queries(self, text):
257265
return np.array(res.data[0].embedding), self.total_token_count(res)
258266
except Exception as _e:
259267
log_exception(_e, res)
268+
raise Exception(f"Error: {res}")
260269

261270

262271
class OllamaEmbed(Base):
@@ -281,6 +290,7 @@ def encode(self, texts: list):
281290
arr.append(res["embedding"])
282291
except Exception as _e:
283292
log_exception(_e, res)
293+
raise Exception(f"Error: {res}")
284294
tks_num += 128
285295
return np.array(arr), tks_num
286296

@@ -293,6 +303,7 @@ def encode_queries(self, text):
293303
return np.array(res["embedding"]), 128
294304
except Exception as _e:
295305
log_exception(_e, res)
306+
raise Exception(f"Error: {res}")
296307

297308

298309
class XinferenceEmbed(Base):
@@ -315,6 +326,7 @@ def encode(self, texts: list):
315326
total_tokens += self.total_token_count(res)
316327
except Exception as _e:
317328
log_exception(_e, res)
329+
raise Exception(f"Error: {res}")
318330
return np.array(ress), total_tokens
319331

320332
def encode_queries(self, text):
@@ -324,6 +336,7 @@ def encode_queries(self, text):
324336
return np.array(res.data[0].embedding), self.total_token_count(res)
325337
except Exception as _e:
326338
log_exception(_e, res)
339+
raise Exception(f"Error: {res}")
327340

328341

329342
class YoudaoEmbed(Base):
@@ -399,6 +412,7 @@ def encode(self, texts: list[str|bytes], task="retrieval.passage"):
399412
token_count += self.total_token_count(res)
400413
except Exception as _e:
401414
log_exception(_e, response)
415+
raise Exception(f"Error: {response}")
402416
return np.array(ress), token_count
403417

404418
def encode_queries(self, text):
@@ -531,6 +545,7 @@ def encode(self, texts: list):
531545
ress.extend(result["embedding"])
532546
except Exception as _e:
533547
log_exception(_e, result)
548+
raise Exception(f"Error: {result}")
534549
return np.array(ress), token_count
535550

536551
def encode_queries(self, text):
@@ -541,6 +556,7 @@ def encode_queries(self, text):
541556
return np.array(result["embedding"]), token_count
542557
except Exception as _e:
543558
log_exception(_e, result)
559+
raise Exception(f"Error: {result}")
544560

545561

546562
class NvidiaEmbed(Base):
@@ -578,10 +594,11 @@ def encode(self, texts: list):
578594
response = requests.post(self.base_url, headers=self.headers, json=payload)
579595
try:
580596
res = response.json()
597+
ress.extend([d["embedding"] for d in res["data"]])
598+
token_count += self.total_token_count(res)
581599
except Exception as _e:
582600
log_exception(_e, response)
583-
ress.extend([d["embedding"] for d in res["data"]])
584-
token_count += self.total_token_count(res)
601+
raise Exception(f"Error: {response}")
585602
return np.array(ress), token_count
586603

587604
def encode_queries(self, text):
@@ -636,6 +653,7 @@ def encode(self, texts: list):
636653
token_count += res.meta.billed_units.input_tokens
637654
except Exception as _e:
638655
log_exception(_e, res)
656+
raise Exception(f"Error: {res}")
639657
return np.array(ress), token_count
640658

641659
def encode_queries(self, text):
@@ -649,6 +667,7 @@ def encode_queries(self, text):
649667
return np.array(res.embeddings.float[0]), int(res.meta.billed_units.input_tokens)
650668
except Exception as _e:
651669
log_exception(_e, res)
670+
raise Exception(f"Error: {res}")
652671

653672

654673
class TogetherAIEmbed(OpenAIEmbed):
@@ -716,6 +735,7 @@ def encode(self, texts: list):
716735
token_count += self.total_token_count(res)
717736
except Exception as _e:
718737
log_exception(_e, response)
738+
raise Exception(f"Error: {response}")
719739

720740
return np.array(ress), token_count
721741

@@ -731,6 +751,7 @@ def encode_queries(self, text):
731751
return np.array(res["data"][0]["embedding"]), self.total_token_count(res)
732752
except Exception as _e:
733753
log_exception(_e, response)
754+
raise Exception(f"Error: {response}")
734755

735756

736757
class ReplicateEmbed(Base):
@@ -777,6 +798,7 @@ def encode(self, texts: list, batch_size=16):
777798
)
778799
except Exception as _e:
779800
log_exception(_e, res)
801+
raise Exception(f"Error: {res}")
780802

781803
def encode_queries(self, text):
782804
res = self.client.do(model=self.model_name, texts=[text]).body
@@ -787,6 +809,7 @@ def encode_queries(self, text):
787809
)
788810
except Exception as _e:
789811
log_exception(_e, res)
812+
raise Exception(f"Error: {res}")
790813

791814

792815
class VoyageEmbed(Base):
@@ -809,6 +832,7 @@ def encode(self, texts: list):
809832
token_count += res.total_tokens
810833
except Exception as _e:
811834
log_exception(_e, res)
835+
raise Exception(f"Error: {res}")
812836
return np.array(ress), token_count
813837

814838
def encode_queries(self, text):
@@ -817,6 +841,7 @@ def encode_queries(self, text):
817841
return np.array(res.embeddings)[0], res.total_tokens
818842
except Exception as _e:
819843
log_exception(_e, res)
844+
raise Exception(f"Error: {res}")
820845

821846

822847
class HuggingFaceEmbed(Base):

0 commit comments

Comments
 (0)