fix: memory leak on chipper processor, beam search parameters, and bbox bug (#258)

ajjimeno · web-flow · commit b1dba87fc438 · 2023-10-16T23:57:31.000-07:00
This PR intends to solve the following issues:

* Memory leak in DonutProcessor when using large images in numpy format
* Use the right settings for beam search size &gt; 1
* Solve a bug that in very rare cases made the last element predicted by
Chipper to have a bbox = None
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 0.7.7
+
+• Fix a memory leak in DonutProcessor when using large images in numpy format
+• Set the right settings for beam search size > 1
+• Fix a bug that in very rare cases made the last element predicted by Chipper to have a bbox = None
+
 ## 0.7.6
 
 * fix a bug where invalid zoom factor lead to exceptions; now invalid zoom factors results in no scaling of the image
diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py
@@ -1 +1 @@
-__version__ = "0.7.6"  # pragma: no cover
+__version__ = "0.7.7"  # pragma: no cover
diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py
@@ -149,10 +149,7 @@ def predict_tokens(
         with torch.no_grad():
             encoder_outputs = self.model.encoder(
                 self.processor(
-                    np.array(
-                        image,
-                        np.float32,
-                    ),
+                    image,
                     return_tensors="pt",
                 ).pixel_values.to(self.device),
             )
@@ -177,9 +174,9 @@ def predict_tokens(
                     encoder_outputs=encoder_outputs,
                     input_ids=self.input_ids,
                     logits_processor=self.logits_processor,
-                    do_sample=False,
+                    do_sample=True,
                     no_repeat_ngram_size=0,
-                    num_beams=5,
+                    num_beams=3,
                     return_dict_in_generate=True,
                     output_attentions=True,
                     output_scores=True,
@@ -304,7 +301,7 @@ def postprocess(
                 end = i
 
         # If exited before eos is achieved
-        if start != -1 and start < end and len(parents) > 0:
+        if start != -1 and start <= end and len(parents) > 0:
             slicing_end = end + 1
             string = self.tokenizer.decode(output_ids[start:slicing_end])
 

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.7.6" # pragma: no cover`
	`1`	`+__version__ = "0.7.7" # pragma: no cover`