bgruening · SaimMomin12 · May 12, 2026 · May 6, 2026 · May 6, 2026 · May 7, 2026
diff --git a/tools/logan_proteins/.shed.yml b/tools/logan_proteins/.shed.yml
@@ -13,7 +13,7 @@ long_description: |
     are validated and aligned with MMseqs2.  The approach scales sub-linearly
     with the number of queries, making it practical for large-scale metagenomic
     and comparative-proteomics workflows.
-remote_repository_url: https://github.com/bgruening/galaxytools/tree/master/tools/search_proteins
+remote_repository_url: https://github.com/bgruening/galaxytools/tree/master/tools/logan_proteins
 type: unrestricted
 auto_tool_repositories:
   name_template: "{{ tool_id }}"

diff --git a/tools/logan_proteins/embed_query.xml b/tools/logan_proteins/embed_query.xml
@@ -9,10 +9,11 @@
             FORCE_CPU="--force_cpu"
         else
             FORCE_CPU="";
-        fi &&        
+        fi &&
+        ln -s '$query_sequences' query_sequences.fasta &&    
         mkdir -p results &&
         python /app/embed_query.py
-            --query_sequences '$query_sequences'
+            --query_sequences query_sequences.fasta
             --output results
             -F
             \$FORCE_CPU
@@ -50,9 +51,6 @@ This is the first step in a two-step protein search pipeline:
 
 - **Query sequences**: A FASTA file containing protein sequences to be embedded
 
-**Parameters**
-
-- **Force CPU usage**: By default, CPU will be used. Disable this option to use GPU instead if available.
 
 **Outputs**
 

diff --git a/tools/logan_proteins/macros.xml b/tools/logan_proteins/macros.xml
@@ -1,10 +1,10 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.2.0</token>
+    <token name="@TOOL_VERSION@">1.2.2</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">25.0</token>
     <xml name="requirements">
         <requirements>
-            <container type="docker">quay.io/bgruening/logan-protein:1.2.0</container>
+            <container type="docker">quay.io/bgruening/logan-protein:1.2.2</container>
         </requirements>
     </xml>
     <xml name="citations">

diff --git a/tools/logan_proteins/search_database.xml b/tools/logan_proteins/search_database.xml
@@ -9,11 +9,14 @@
         mkdir -p results/intermediate_files &&       
         ln -s '$query_embeddings_npy' results/intermediate_files/query_embeddings.npy &&
         ln -s '$query_embeddings_names' results/intermediate_files/query_embeddings.names.txt &&
+        ln -s '$query_sequences' query_sequences.fasta &&
         python /app/search_database.py
             --database '$database.fields.path'
+            $deep_search
             --output results/
-            --query_sequences '$query_sequences'
+            --query_sequences query_sequences.fasta
             --num_threads \${GALAXY_SLOTS:-8}
+            --memory \${GALAXY_MEMORY_MB:-16000}
             #if $outfmt:
                 --outfmt '$outfmt'
             #end if
@@ -36,6 +39,7 @@
             <option value="4">SAM (4)</option>
             <option value="5">Taxonomic classification (5)</option>
         </param>
+        <param argument="--deep-search" type="boolean" truevalue="--deep-search" falsevalue="" label="Deep search mode" help="If enabled, extract proteins from all search results instead of only aligned centroids, then align everything with MMseqs2)"/> 
     </inputs>
 
     <outputs>
@@ -49,6 +53,7 @@
             <param name="query_sequences" value="queries.fasta"/>
             <param name="query_embeddings_npy" value="query_embeddings.npy"/>
             <param name="query_embeddings_names" value="query_embeddings.names.txt"/>
+            <param name="deep_search" value="false"/>
             <param name="database" value="faiss-demo-db-20260203"/>
             <param name="outfmt" value="0"/>
             <assert_stdout>
@@ -74,6 +79,7 @@ with MMseqs2. It is the second step in the protein search pipeline.
 - **Original query sequences**: The same FASTA file used for embedding (required for MMseqs2 alignment)
 - **Query embeddings (npy)**: Output from the Embed Query tool
 - **Query embedding names**: Text file with sequence names from the Embed Query tool
+- **Deep search mode**: Optional flag to enable deep search, which extracts proteins from all search results instead of only aligned centroids, then aligns everything with MMseqs2
 
 **Parameters**
 

diff --git a/tools/logan_proteins/test-data/faiss_database.loc b/tools/logan_proteins/test-data/faiss_database.loc
@@ -3,4 +3,4 @@
 # - db-name
 # - version
 # - /path/to/data 
-faiss-demo-db-20260203	FAISS Test Database	1.2.0	${__HERE__}/test-db/
+faiss-demo-db-20260203	FAISS Test Database	1.2.2	${__HERE__}/test-db/
diff --git a/tools/logan_proteins/tool-data/faiss_database.loc.sample b/tools/logan_proteins/tool-data/faiss_database.loc.sample
@@ -1,5 +1,5 @@
 #This is a sample file distributed with Galaxy that enables tools
-#to use a directory of metagenomics files.  
+#to use a directory of FAISS database files.  
 #file has this format (white space characters are TAB characters)
 # - db-build-version-date
 # - db-name