Merge pull request #610 from Steinbeck-Lab/development

NishaSharma14 · web-flow · commit af7347f87461 · 2025-06-10T10:00:12.000+02:00
fix: decimer to work on low res images
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -46,7 +46,6 @@ jobs:
         pip3 install --no-cache-dir -r requirements.txt
         pip install git+https://github.com/Kohulan/DECIMER-Image-Segmentation.git@bbox --no-deps
         pip3 install --no-deps decimer
-        pip3 install --no-deps STOUT-pypi==2.0.5
         pip install flake8 pytest
         pip install pytest-cov
         wget -O surge "https://github.com/StructureGenerator/surge/releases/download/v1.0/surge-linux-v1.0"
diff --git a/Dockerfile b/Dockerfile
@@ -31,11 +31,11 @@ RUN conda install -c conda-forge python=${PYTHON_VERSION} sqlite --force-reinsta
     pip3 install --no-cache-dir -r requirements.txt && \
     # Install specific packages without dependencies
     pip3 install --no-cache-dir --no-deps \
-        decimer-segmentation==1.1.3 \
-        decimer==2.3.0 \
+        git+https://github.com/Kohulan/DECIMER-Image-Segmentation.git@bbox \
+        decimer==2.7.1 \
         chembl_structure_pipeline
 
 
 COPY ./app ./app
 
-CMD uvicorn app.main:app --host 0.0.0.0 --port 80 --workers ${WORKERS}
+CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port 80 --workers ${WORKERS}"]
diff --git a/Dockerfile.lite b/Dockerfile.lite
@@ -35,4 +35,4 @@ RUN conda install -c conda-forge python=${PYTHON_VERSION} sqlite --force-reinsta
 
 COPY ./app /code/app
 
-CMD uvicorn app.main:app --host 0.0.0.0 --port 80 --workers ${WORKERS}
+CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port 80 --workers ${WORKERS}"]
diff --git a/app/modules/decimer.py b/app/modules/decimer.py
@@ -106,21 +106,38 @@ def get_predicted_segments(path: str) -> str:
         return ".".join(smiles_predicted)
 
 
-def get_predicted_segments_from_file(content: any, filename: str) -> tuple:
-    """Takes an image file path and returns a set of paths and image names of.
+def get_predicted_segments_from_file(content: any, filename: str) -> str:
+    """Takes an image file content and filename, saves it temporarily, and returns SMILES prediction.
 
-    segmented images.
+    If the image dimensions are below 500 pixels, uses predict_SMILES directly.
+    Otherwise, uses segmentation approach.
 
     Args:
-        input_path (str): the path of an image.
+        content (any): The image file content.
+        filename (str): The filename to save the content to.
 
     Returns:
-        image_name (str): image file name.
-        segments (list): a set of segmented images.
+        str: Predicted SMILES string.
     """
 
+    # Write the content to file and ensure it's closed
     with open(filename, "wb") as f:
         f.write(content)
-        smiles = get_predicted_segments(filename)
-        os.remove(filename)
+
+    try:
+        # Check image dimensions
+        img = Image.open(filename)
+        width, height = img.size
+        img.close()  # Close the image to free resources
+
+        # If image is small (below 500 pixels in either dimension), use direct prediction
+        if width < 500 or height < 500:
+            smiles = predict_SMILES(filename)
+        else:
+            smiles = get_predicted_segments(filename)
+
         return smiles
+    finally:
+        # Ensure the temporary file is always removed
+        if os.path.exists(filename):
+            os.remove(filename)
diff --git a/requirements_lite.txt b/requirements_lite.txt
@@ -14,7 +14,8 @@ prometheus-fastapi-instrumentator
 pystow>=0.4.9
 python-multipart
 selfies>=2.1.1
-tensorflow==2.12.0
+tensorflow==2.15.1
+Keras-Preprocessing==1.1.2
 unicodedata2==15.0.0
 websockets==10.4
 mapchiral
diff --git a/tests/small_molecule.png b/tests/small_molecule.png
diff --git a/tests/test_decimer.py b/tests/test_decimer.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
 
 import os
+import tempfile
+from unittest.mock import patch
 
 import pytest
+from PIL import Image
 
 from app.modules.decimer import convert_image
 from app.modules.decimer import get_predicted_segments
@@ -29,34 +32,184 @@ def sample_image_path():
     return os.path.join(TEST_FILES_DIR, "segment_sample.png")
 
 
+@pytest.fixture(scope="module")
+def small_image_path():
+    """Small image (400x300) - should trigger direct prediction"""
+    return os.path.join(TEST_FILES_DIR, "small_molecule.png")
+
+
+@pytest.fixture(scope="module")
+def tiny_image_path():
+    """Tiny image (200x150) - should trigger direct prediction"""
+    return os.path.join(TEST_FILES_DIR, "tiny_molecule.png")
+
+
+@pytest.fixture(scope="module")
+def caffeine_image_path():
+    """Caffeine image for testing"""
+    return os.path.join(TEST_FILES_DIR, "caffeine.png")
+
+
 # Test the convert_image function
 def test_convert_image(sample_gif_path, sample_png_path):
     converted_path = convert_image(sample_gif_path)
     assert os.path.isfile(converted_path)
     assert converted_path == sample_png_path
+    # Clean up the converted file
+    if os.path.exists(converted_path):
+        os.remove(converted_path)
 
 
-# Test the get_segments function
-def test_get_segments(sample_gif_path):
+# Test the get_segments function with GIF
+def test_get_segments_gif(sample_gif_path):
     image_name, segments = get_segments(sample_gif_path)
     assert image_name == "segment_sample.gif"
-    assert len(segments) > 0
+    assert isinstance(segments, list)
+
+
+# Test the get_segments function with PNG
+def test_get_segments_png(sample_png_path):
+    image_name, segments = get_segments(sample_png_path)
+    assert image_name == "segment_sample.png"
+    assert isinstance(segments, list)
 
 
 # Test the get_predicted_segments function
-def test_get_predicted_segments(sample_gif_path):
-    predicted_smiles = get_predicted_segments(sample_gif_path)
+@patch("app.modules.decimer.predict_SMILES")
+def test_get_predicted_segments(mock_predict_smiles, sample_png_path):
+    mock_predict_smiles.return_value = "CCO"
+    predicted_smiles = get_predicted_segments(sample_png_path)
+    assert isinstance(predicted_smiles, str)
+    assert len(predicted_smiles) > 0
+
+
+# Test get_predicted_segments_from_file with large image (should use segmentation)
+@patch("app.modules.decimer.get_predicted_segments")
+def test_get_predicted_segments_from_file_large_image(
+    mock_get_predicted_segments, caffeine_image_path
+):
+    """Test that large images (>=500 pixels) use segmentation approach"""
+    mock_get_predicted_segments.return_value = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
+
+    with open(caffeine_image_path, "rb") as f:
+        content = f.read()
+
+    predicted_smiles = get_predicted_segments_from_file(content, "test_large.png")
+
     assert isinstance(predicted_smiles, str)
     assert len(predicted_smiles) > 0
+    mock_get_predicted_segments.assert_called_once()
 
 
-# Test the get_predicted_segments_from_file function
-def test_get_predicted_segments_from_file(sample_image_path):
-    with open(sample_image_path, "rb") as f:
+# Test get_predicted_segments_from_file with small image (should use direct prediction)
+@patch("app.modules.decimer.predict_SMILES")
+def test_get_predicted_segments_from_file_small_image(
+    mock_predict_smiles, small_image_path
+):
+    """Test that small images (<500 pixels) use direct prediction"""
+    mock_predict_smiles.return_value = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
+
+    with open(small_image_path, "rb") as f:
         content = f.read()
-    predicted_smiles = get_predicted_segments_from_file(
-        content,
-        "caffeine.png",
-    )
+
+    predicted_smiles = get_predicted_segments_from_file(content, "test_small.png")
+
+    assert isinstance(predicted_smiles, str)
+    assert len(predicted_smiles) > 0
+    mock_predict_smiles.assert_called_once()
+
+
+# Test get_predicted_segments_from_file with tiny image (should use direct prediction)
+@patch("app.modules.decimer.predict_SMILES")
+def test_get_predicted_segments_from_file_tiny_image(
+    mock_predict_smiles, tiny_image_path
+):
+    """Test that tiny images (<500 pixels) use direct prediction"""
+    mock_predict_smiles.return_value = "C1CCC1"
+
+    with open(tiny_image_path, "rb") as f:
+        content = f.read()
+
+    predicted_smiles = get_predicted_segments_from_file(content, "test_tiny.png")
+
     assert isinstance(predicted_smiles, str)
     assert len(predicted_smiles) > 0
+    mock_predict_smiles.assert_called_once()
+
+
+# Test error handling in get_predicted_segments_from_file
+def test_get_predicted_segments_from_file_cleanup():
+    """Test that temporary files are always cleaned up, even on errors"""
+    test_content = b"invalid image content"
+    test_filename = "test_cleanup.png"
+
+    # This should fail but still clean up the file
+    try:
+        get_predicted_segments_from_file(test_content, test_filename)
+    except Exception:
+        pass  # Expected to fail with invalid image content
+
+    # File should not exist after function completes
+    assert not os.path.exists(test_filename)
+
+
+# Test image size detection logic
+def test_image_size_detection():
+    """Test that the image size detection works correctly"""
+    # Create temporary images with known sizes
+    with tempfile.NamedTemporaryFile(
+        suffix=".png", delete=False
+    ) as tmp_large, tempfile.NamedTemporaryFile(
+        suffix=".png", delete=False
+    ) as tmp_small:
+
+        try:
+            # Create large image (600x600)
+            large_img = Image.new("RGB", (600, 600), "white")
+            large_img.save(tmp_large.name)
+
+            # Create small image (300x300)
+            small_img = Image.new("RGB", (300, 300), "white")
+            small_img.save(tmp_small.name)
+
+            # Test with large image content
+            with open(tmp_large.name, "rb") as f:
+                large_content = f.read()
+
+            # Test with small image content
+            with open(tmp_small.name, "rb") as f:
+                small_content = f.read()
+
+            # Mock the prediction functions to verify which path is taken
+            with patch("app.modules.decimer.predict_SMILES") as mock_direct, patch(
+                "app.modules.decimer.get_predicted_segments"
+            ) as mock_segment:
+
+                mock_direct.return_value = "direct_prediction"
+                mock_segment.return_value = "segmented_prediction"
+
+                # Test large image uses segmentation
+                result_large = get_predicted_segments_from_file(
+                    large_content, "test_large_600x600.png"
+                )
+                assert result_large == "segmented_prediction"
+                mock_segment.assert_called()
+                mock_direct.assert_not_called()
+
+                # Reset mocks
+                mock_direct.reset_mock()
+                mock_segment.reset_mock()
+
+                # Test small image uses direct prediction
+                result_small = get_predicted_segments_from_file(
+                    small_content, "test_small_300x300.png"
+                )
+                assert result_small == "direct_prediction"
+                mock_direct.assert_called()
+                mock_segment.assert_not_called()
+
+        finally:
+            # Clean up temporary files
+            for tmp_file in [tmp_large.name, tmp_small.name]:
+                if os.path.exists(tmp_file):
+                    os.remove(tmp_file)
diff --git a/tests/test_deeplearningtools.py b/tests/test_deeplearningtools.py
@@ -3,29 +3,13 @@
 import pytest
 from DECIMER import predict_SMILES
 from rdkit import Chem
-from STOUT import translate_forward
-from STOUT import translate_reverse
 
 
 @pytest.fixture
 def test_smiles():
     return "CN1C(=O)C2=C(N=CN2C)N(C)C1=O"
 
 
-def test_smilestoiupac(test_smiles):
-    smiles = test_smiles
-    expected_result = "1,3,7-trimethylpurine-2,6-dione"
-    actual_result = translate_forward(smiles)
-    assert expected_result == actual_result
-
-
-def test_iupactosmiles(test_smiles):
-    iupac_name = "1,3,7-trimethylpurine-2,6-dione"
-    expected_result = "CN1C=NC2=C1C(=O)N(C)C(=O)N2C"
-    actual_result = translate_reverse(iupac_name)
-    assert expected_result == actual_result
-
-
 def test_imagetosmiles(test_smiles):
     img_path = "tests/caffeine.png"
     expected_result = test_smiles
diff --git a/tests/tiny_molecule.png b/tests/tiny_molecule.png

Original file line number	Diff line number	Diff line change
`@@ -35,4 +35,4 @@ RUN conda install -c conda-forge python=${PYTHON_VERSION} sqlite --force-reinsta`
`35`	`35`
`36`	`36`	`COPY ./app /code/app`
`37`	`37`
`38`		`-CMD uvicorn app.main:app --host 0.0.0.0 --port 80 --workers ${WORKERS}`
	`38`	`+CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port 80 --workers ${WORKERS}"]`