Merge branch 'main' into hut101-19-jhwisdom

rwood-97 · web-flow · commit fdbc4ed62d4d · 2026-03-17T11:38:13.000Z
diff --git a/.github/workflows/mr_ci.yml b/.github/workflows/mr_ci.yml
@@ -2,9 +2,8 @@
 name: Units Tests
 
 on:
-  pull_request:
-    branches:
-      - main
+  # Manual trigger only
+  workflow_dispatch:
 
 # Cancel existing tests on the same PR if a new commit is added to a pull request
 concurrency:
@@ -39,7 +38,6 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install ".[dev]"
-          python -m pip install 'git+https://github.com/rwood-97/piffle.git@iiif_dataclasses'
           python -m pip install pytest-cov
 
       - name: Quality Assurance
@@ -52,15 +50,3 @@ jobs:
       - name: Test with pytest
         run: |
           python -m pytest ./tests --ignore=tests/test_text_spotting/
-
-
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v4
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          directory: ./coverage/reports/
-          env_vars: OS,PYTHON
-          fail_ci_if_error: false
-          files: ./coverage.xml,!./cache
-          flags: unittests
-          name: codecov-umbrella
diff --git a/.github/workflows/mr_ci_text_spotting.yml b/.github/workflows/mr_ci_text_spotting.yml
@@ -16,7 +16,7 @@ jobs:
   all_tests:
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest]
+        os: [ubuntu-latest]
       fail-fast: false
     env:
       # point datasets to ~/.torch so it's cached by CI
@@ -29,6 +29,16 @@ jobs:
         with:
           fetch-depth: 2
 
+      - name: Free disk space (Ubuntu)
+        if: runner.os == 'Linux'
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo apt-get clean
+          df -h
+
       - name: Set up Python 3.11
         uses: actions/setup-python@v2
         with:
@@ -46,16 +56,14 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python -m pip install wheel
-          python -m pip install numpy==1.26.4 torch==2.2.2 torchvision==0.17.2 -f https://download.pytorch.org/whl/torch_stable.html
-          python -m pip install ".[dev]"
-          python -m pip install pytest-cov
-          python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
-          python -m pip install 'git+https://github.com/maps-as-data/DeepSolo.git'
-          python -m pip install 'git+https://github.com/maps-as-data/DPText-DETR.git'
-          python -m pip install 'git+https://github.com/maps-as-data/MapTextPipeline.git'
-          python -m pip install 'git+https://github.com/rwood-97/piffle.git@iiif_dataclasses'
-
+          python -m pip install --no-cache-dir wheel
+          python -m pip install --no-cache-dir numpy==1.26.4 torch==2.2.2 torchvision==0.17.2 -f https://download.pytorch.org/whl/torch_stable.html
+          python -m pip install --no-cache-dir ".[dev]"
+          python -m pip install --no-cache-dir pytest-cov
+          python -m pip install --no-cache-dir --no-build-isolation 'git+https://github.com/facebookresearch/detectron2.git'
+          python -m pip install --no-cache-dir --no-build-isolation 'git+https://github.com/maps-as-data/DeepSolo.git'
+          python -m pip install --no-cache-dir --no-build-isolation 'git+https://github.com/maps-as-data/DPText-DETR.git'
+          python -m pip install --no-cache-dir --no-build-isolation 'git+https://github.com/maps-as-data/MapTextPipeline.git'
 
       - name: Clone DPText-DETR
         run: |
@@ -71,22 +79,11 @@ jobs:
 
       - name: Hugging Face CLI
         run: |
-          pip install -U "huggingface_hub[cli]"
+          pip install -U "huggingface-hub[cli]>=0.30.0,<0.34.0"
           huggingface-cli download rwood-97/DPText_DETR_ArT_R_50_poly art_final.pth --local-dir .
           huggingface-cli download rwood-97/DeepSolo_ic15_res50 ic15_res50_finetune_synth-tt-mlt-13-15-textocr.pth --local-dir .
           huggingface-cli download rwood-97/MapTextPipeline_rumsey rumsey-finetune.pth --local-dir .
 
       - name: Test with pytest
         run: |
-          python -m pytest --cov=./ --cov-report=xml ./tests
-
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v5
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          directory: ./coverage/reports/
-          env_vars: OS,PYTHON
-          fail_ci_if_error: false
-          files: ./coverage.xml,!./cache
-          flags: unittests
-          name: codecov-umbrella
+          python -m pytest ./tests
diff --git a/.github/workflows/mr_pip_ci.yml b/.github/workflows/mr_pip_ci.yml
@@ -36,7 +36,6 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install mapreader[dev]
-          python -m pip install 'git+https://github.com/rwood-97/piffle.git@iiif_dataclasses'
 
       - name: Quality Assurance
         run: |
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,9 +14,16 @@ The following table shows which versions of MapReader are compatible with which
 ## Pre-release
 _Add new changes here_
 
+### Fixed
+
+- Fixes the `model_summary` method in the `ClassifierContainer` class ([#574](https://github.com/maps-as-data/MapReader/pull/574))
+
+## [v1.8.2](https://github.com/Living-with-machines/MapReader/releases/tag/v1.8.2) (2025-12-19)
+
 ### Added
 
 - Added Hugging Face model support to ClassifierContainer
+- Added `piffle` package as dependency ([#575](https://github.com/maps-as-data/MapReader/pull/575))
 
 ## [v1.8.1](https://github.com/Living-with-machines/MapReader/releases/tag/v1.8.1) (2025-08-11)
 
diff --git a/docs/source/using-mapreader/step-by-step-guide/1-download.rst b/docs/source/using-mapreader/step-by-step-guide/1-download.rst
@@ -526,12 +526,6 @@ For more information on IIIF, see their documentation `here <https://iiif.io/>`_
 
 MapReader accepts any IIIF manifest which is compliant with the IIIF Presentation API (version `2 <https://iiif.io/api/presentation/2.1/>`__ or `3 <https://iiif.io/api/presentation/3.0/>`__).
 
-First, install piffle using the command below:
-
-.. code-block:: python
-
-    pip install piffle@git+https://github.com/rwood-97/piffle.git@iiif_dataclasses
-
 
 IIIFDownloader
 ~~~~~~~~~~~~~~~
diff --git a/mapreader/annotate/annotator.py b/mapreader/annotate/annotator.py
@@ -512,7 +512,12 @@ def check_eligibility(row):
         queue_df["eligible"] = queue_df.apply(check_eligibility, axis=1)
 
         if self._sortby is not None:
-            queue_df.sort_values(self._sortby, ascending=self._ascending, inplace=True)
+            queue_df.sort_values(
+                by=[self._sortby, "min_y"],
+                ascending=[self._ascending, True],
+                kind="mergesort",
+                inplace=True,
+            )
             queue_df = queue_df[queue_df.eligible]
         else:
             queue_df = queue_df[queue_df.eligible].sample(frac=1)  # shuffle
diff --git a/mapreader/classify/classifier.py b/mapreader/classify/classifier.py
@@ -590,7 +590,7 @@ def model_summary(
             col_names = ["output_size", "output_size", "num_params"]
 
         model_summary = summary(
-            self.model, input_size=input_size, col_names=col_names, **kwargs
+            self.model, input_size=input_size, col_names=col_names, device=self.device, **kwargs
         )
         print(model_summary)
 
diff --git a/setup.py b/setup.py
@@ -63,7 +63,7 @@
         "folium>=0.12,<1.0.0",
         "mapclassify>=2.0.0,<3.0.0",
         "xyzservices==2024.9.0",
-#         "piffle @ git+https://github.com/rwood-97/piffle.git@iiif_dataclasses",
+        "piffle>=0.7.0",
         "lxml",
     ],
     extras_require={
diff --git a/tests/test_classify/test_classifier.py b/tests/test_classify/test_classifier.py
@@ -161,31 +161,31 @@ def test_init_resnet18_timm(inputs):
     assert classifier.dataloaders == {}
 
 
-@pytest.mark.dependency(name="timm_models", scope="session")
-def test_init_models_timm(inputs):
-    annots, dataloaders = inputs
-    for model2test in [
-        ["resnest50d_4s2x40d", timm.models.ResNet],
-        ["resnest101e", timm.models.ResNet],
-        ["resnext101_32x8d.fb_swsl_ig1b_ft_in1k", timm.models.ResNet],
-        ["resnet152", timm.models.ResNet],
-        ["tf_efficientnet_b3.ns_jft_in1k", timm.models.EfficientNet],
-        ["swin_base_patch4_window7_224", timm.models.swin_transformer.SwinTransformer],
-        ["vit_base_patch16_224", timm.models.vision_transformer.VisionTransformer],
-    ]:  # these are models from 2021 paper
-        model, model_type = model2test
-        my_model = timm.create_model(
-            model, pretrained=True, num_classes=len(annots.labels_map)
-        )
-        assert isinstance(my_model, model_type)
-        classifier = ClassifierContainer(
-            my_model, labels_map=annots.labels_map, dataloaders=dataloaders
-        )
-        assert isinstance(classifier.model, model_type)
-        assert all(k in classifier.dataloaders.keys() for k in ["train", "test", "val"])
-        classifier = ClassifierContainer(my_model, labels_map=annots.labels_map)
-        assert isinstance(classifier.model, model_type)
-        assert classifier.dataloaders == {}
+# @pytest.mark.dependency(name="timm_models", scope="session")
+# def test_init_models_timm(inputs):
+#    annots, dataloaders = inputs
+#    for model2test in [
+#        ["resnest50d_4s2x40d", timm.models.ResNet],
+#        ["resnest101e", timm.models.ResNet],
+#        ["resnext101_32x8d.fb_swsl_ig1b_ft_in1k", timm.models.ResNet],
+#        ["resnet152", timm.models.ResNet],
+#        ["tf_efficientnet_b3.ns_jft_in1k", timm.models.EfficientNet],
+#        ["swin_base_patch4_window7_224", timm.models.swin_transformer.SwinTransformer],
+#        ["vit_base_patch16_224", timm.models.vision_transformer.VisionTransformer],
+#    ]:  # these are models from 2021 paper
+#        model, model_type = model2test
+#        my_model = timm.create_model(
+#            model, pretrained=True, num_classes=len(annots.labels_map)
+#        )
+#        assert isinstance(my_model, model_type)
+#        classifier = ClassifierContainer(
+#            my_model, labels_map=annots.labels_map, dataloaders=dataloaders
+#        )
+#        assert isinstance(classifier.model, model_type)
+#        assert all(k in classifier.dataloaders.keys() for k in ["train", "test", "val"])
+#        classifier = ClassifierContainer(my_model, labels_map=annots.labels_map)
+#        assert isinstance(classifier.model, model_type)
+#        assert classifier.dataloaders == {}
 
 
 # test loading object from pickle file
diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py
@@ -146,8 +146,10 @@ def test_deepsolo_init_tsv(init_dataframes, tmp_path):
 
 def test_deepsolo_init_geojson(init_dataframes, tmp_path, mock_response):
     parent_df, patch_df = init_dataframes
-    parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
-    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
+    parent_df.to_file(
+        f"{tmp_path}/parent_df.geojson", driver="GeoJSON", engine="pyogrio"
+    )
+    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON", engine="pyogrio")
     runner = DeepSoloRunner(
         f"{tmp_path}/patch_df.geojson",
         parent_df=f"{tmp_path}/parent_df.geojson",
diff --git a/tests/test_text_spotting/test_dptext_runner.py b/tests/test_text_spotting/test_dptext_runner.py
@@ -146,8 +146,10 @@ def test_dptext_init_tsv(init_dataframes, tmp_path):
 
 def test_dptext_init_geojson(init_dataframes, tmp_path, mock_response):
     parent_df, patch_df = init_dataframes
-    parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
-    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
+    parent_df.to_file(
+        f"{tmp_path}/parent_df.geojson", driver="GeoJSON", engine="pyogrio"
+    )
+    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON", engine="pyogrio")
     runner = DPTextDETRRunner(
         f"{tmp_path}/patch_df.geojson",
         parent_df=f"{tmp_path}/parent_df.geojson",
diff --git a/tests/test_text_spotting/test_maptext_runner.py b/tests/test_text_spotting/test_maptext_runner.py
@@ -145,8 +145,10 @@ def test_maptext_init_tsv(init_dataframes, tmp_path):
 
 def test_maptext_init_geojson(init_dataframes, tmp_path, mock_response):
     parent_df, patch_df = init_dataframes
-    parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
-    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
+    parent_df.to_file(
+        f"{tmp_path}/parent_df.geojson", driver="GeoJSON", engine="pyogrio"
+    )
+    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON", engine="pyogrio")
     runner = MapTextRunner(
         f"{tmp_path}/patch_df.geojson",
         parent_df=f"{tmp_path}/parent_df.geojson",

Original file line number	Diff line number	Diff line change
`@@ -590,7 +590,7 @@ def model_summary(`
`590`	`590`	`col_names = ["output_size", "output_size", "num_params"]`
`591`	`591`
`592`	`592`	`model_summary = summary(`
`593`		`- self.model, input_size=input_size, col_names=col_names, **kwargs`
	`593`	`+ self.model, input_size=input_size, col_names=col_names, device=self.device, **kwargs`
`594`	`594`	`)`
`595`	`595`	`print(model_summary)`
`596`	`596`