fix: linknet hyperparameters postprocessing + demo for rotation model (#865)

charlesmindee · web-flow · commit 9d03085643da · 2022-03-22T10:49:19.000+01:00
* fix: linknet parameters

* feat: add demo rotation

* feat: add rotation in demo
diff --git a/demo/app.py b/demo/app.py
@@ -21,7 +21,7 @@
 from doctr.models import ocr_predictor
 from doctr.utils.visualization import visualize_page
 
-DET_ARCHS = ["db_resnet50", "db_mobilenet_v3_large"]
+DET_ARCHS = ["db_resnet50", "db_mobilenet_v3_large", "linknet_resnet18_rotation"]
 RECO_ARCHS = ["crnn_vgg16_bn", "crnn_mobilenet_v3_small", "master", "sar_resnet31"]
 
 
@@ -73,7 +73,10 @@ def main():
 
         else:
             with st.spinner('Loading model...'):
-                predictor = ocr_predictor(det_arch, reco_arch, pretrained=True)
+                predictor = ocr_predictor(
+                    det_arch, reco_arch, pretrained=True,
+                    assume_straight_pages=(det_arch != "linknet_resnet18_rotation")
+                )
 
             with st.spinner('Analyzing...'):
 
@@ -97,8 +100,9 @@ def main():
 
                 # Page reconsitution under input page
                 page_export = out.pages[0].export()
-                img = out.pages[0].synthesize()
-                cols[3].image(img, clamp=True)
+                if det_arch != "linknet_resnet18_rotation":
+                    img = out.pages[0].synthesize()
+                    cols[3].image(img, clamp=True)
 
                 # Display JSON
                 st.markdown("\nHere are your analysis results in JSON format:")
diff --git a/doctr/models/detection/linknet/base.py b/doctr/models/detection/linknet/base.py
@@ -30,7 +30,7 @@ class LinkNetPostProcessor(DetectionPostProcessor):
     """
     def __init__(
         self,
-        bin_thresh: float = 0.5,
+        bin_thresh: float = 0.1,
         box_thresh: float = 0.1,
         assume_straight_pages: bool = True,
     ) -> None:
@@ -39,7 +39,7 @@ def __init__(
             bin_thresh,
             assume_straight_pages
         )
-        self.unclip_ratio = 1.5
+        self.unclip_ratio = 1.2
 
     def polygon_to_box(
         self,
@@ -103,13 +103,12 @@ def bitmap_to_boxes(
                 containing x, y, w, h, alpha, score for the box
         """
         height, width = bitmap.shape[:2]
-        min_size_box = 1 + int(height / 512)
         boxes = []
         # get contours from connected components on the bitmap
         contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         for contour in contours:
             # Check whether smallest enclosing bounding box is not too small
-            if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < min_size_box):
+            if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < 2):
                 continue
             # Compute objectness
             if self.assume_straight_pages:
diff --git a/doctr/utils/visualization.py b/doctr/utils/visualization.py
@@ -218,13 +218,16 @@ def visualize_page(
                             int(page['dimensions'][1] * word['geometry'][0][0]),
                             int(page['dimensions'][0] * word['geometry'][0][1])
                         )
-                    ax.text(
-                        *text_loc,
-                        word['value'],
-                        size=10,
-                        alpha=0.5,
-                        color=(0, 0, 1),
-                    )
+
+                    if len(word['geometry']) == 2:
+                        # We draw only if boxes are in straight format
+                        ax.text(
+                            *text_loc,
+                            word['value'],
+                            size=10,
+                            alpha=0.5,
+                            color=(0, 0, 1),
+                        )
 
         if display_artefacts:
             for artefact in block['artefacts']:
@@ -251,7 +254,6 @@ def visualize_page(
 def synthesize_page(
     page: Dict[str, Any],
     draw_proba: bool = False,
-    font_size: int = 13,
     font_family: Optional[str] = None,
 ) -> np.ndarray:
     """Draw a the content of the element page (OCR response) on a blank page.