Molmed
diff --git a/‎.github/workflows/run_tests.yml‎
Lines changed: 48 additions & 0 deletions b/‎.github/workflows/run_tests.yml‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 31 additions & 1 deletion b/‎README.md‎
Lines changed: 31 additions & 1 deletion
diff --git a/‎bin/get_metadata.py‎
Lines changed: 37 additions & 25 deletions b/‎bin/get_metadata.py‎
Lines changed: 37 additions & 25 deletions
diff --git a/‎bin/get_qc_config.py‎
Lines changed: 52 additions & 26 deletions b/‎bin/get_qc_config.py‎
Lines changed: 52 additions & 26 deletions
diff --git a/‎config/compute_resources.config‎ renamed to ‎config/nextflow_config/compute_resources.config‎ b/‎config/compute_resources.config‎ renamed to ‎config/nextflow_config/compute_resources.config‎
diff --git a/‎config/nextflow_config/singularity.config‎
Lines changed: 29 additions & 0 deletions b/‎config/nextflow_config/singularity.config‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎config/nextflow_config/test.config‎
Lines changed: 19 additions & 0 deletions b/‎config/nextflow_config/test.config‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎config/fastq_screen.conf‎ renamed to ‎config/tool_config/fastq_screen.conf‎ b/‎config/fastq_screen.conf‎ renamed to ‎config/tool_config/fastq_screen.conf‎
diff --git a/‎config/multiqc_flowcell_config.yaml‎ renamed to ‎config/tool_config/multiqc_flowcell_config.yaml‎ b/‎config/multiqc_flowcell_config.yaml‎ renamed to ‎config/tool_config/multiqc_flowcell_config.yaml‎
@@ -0,0 +1,48 @@
+name: Run tests
+on: [push]
+jobs:
+  run-tests:
+    runs-on: ubuntu-20.04
+    env:
+      NXF_VER: 21.04.1
+      NXF_ANSI_LOG: false
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v2
+
+      - name: Cache singularity images
+        uses: actions/cache@v2
+        with:
+          path: work/singularity
+          key: singularity-${{ hashFiles('config/nextflow_config/singularity.config') }}
+          restore-keys: singularity-
+
+      - name: Install Singularity
+        uses: eWaterCycle/setup-singularity@v7
+        with:
+          singularity-version: 3.8.3
+
+      - name: Install Nextflow
+        env:
+          CAPSULE_LOG: none
+        run: |
+          curl -s https://get.nextflow.io | bash
+          sudo mv nextflow /usr/local/bin/
+      
+      - name: Make Nextflow binary executable
+        run: chmod +x /usr/local/bin/nextflow
+      
+      - name: Set up python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+          architecture: x64
+
+      - name: Install test requirements
+        run: pip install -r requirements-dev.txt
+
+      - name: Run tests
+        run: pytest tests
+
+      - name: Run Black code formatting check
+        run: black --check .
@@ -9,3 +9,5 @@ resources
 *.simg
 *.img
 FastQ_Screen_Genomes
+venv
+__pycache__
@@ -29,6 +29,7 @@ These are the primary config profiles:
 - `irma`:         Uppmax slurm profile for use on the cluster `irma` (note: The parameter `params.project` must be supplied).
 - `snpseq`:       Run locally with greater memory available than `dev`.
 - `singularity`:  Enables singularity and provides container URLs.
+- `test`:         Run the pipeline using test data
 
 Additional profiles:
 - `debug`: prints out the `env` properties before executing processes.
@@ -52,6 +53,35 @@ There are two primary branches of this project:
 - `master`: The stable release branch
 - `dev`: The development and test branch, to which pull requests should be made.
 
-### Known issues:
+Tests are run through GitHub Actions when pushing code to the repo. See instructions below on how to reproduce it locally.
+
+To keep the python parts of the project nice and tidy, we enforce that code should be formatted according to [black](https://github.com/psf/black).
+To re-format your code with black, simply run:
+```
+black .
+```
+
+### Running tests locally
+
+Assuming you have installed all pre-requisites (except the fastq screen database: test data comes with a minimal version of it), you can run tests locally by following these steps:
+
+```
+# create virtual environment 
+virtualenv -p python3.9 venv/   
+
+# activate venv
+source venv/bin/activate
+
+# install dependencies
+pip install -r requirements-dev.txt
+
+# run tests
+pytest tests/
+
+# perform black formatter check
+black --check .
+```
+
+## Known issues:
 
 - Unable to download genome indicies using `fastq_screen --get_genomes` as wget within the container does not resolve the address correctly. Fastq Screen must be installed separately (e.g. with conda) and the genomes downloaded prior to running the workflow. The path to the databases must then be given using the `params.fastqscreen_databases` parameter.
@@ -8,21 +8,24 @@
 import json
 
 
-class RunfolderInfo():
-
+class RunfolderInfo:
     def __init__(self, runfolder, bcl2fastq_outdir):
         self.runfolder = runfolder
         self.run_parameters = self.read_run_parameters()
         self.stats_json = self.read_stats_json(bcl2fastq_outdir)
         self.description_and_identifier = OrderedDict()
-        self.run_parameters_tags = \
-            {'RunId': 'Run ID', 'RunID': 'Run ID',
-             'ApplicationName': 'Control software', 'Application': 'Control software',
-             'ApplicationVersion': 'Control software version',
-             'Flowcell': 'Flowcell type', 'FlowCellMode': 'Flowcell type',
-             'ReagentKitVersion': 'Reagent kit version',
-             'RTAVersion': 'RTA Version', 'RtaVersion': 'RTA Version',
-             }
+        self.run_parameters_tags = {
+            "RunId": "Run ID",
+            "RunID": "Run ID",
+            "ApplicationName": "Control software",
+            "Application": "Control software",
+            "ApplicationVersion": "Control software version",
+            "Flowcell": "Flowcell type",
+            "FlowCellMode": "Flowcell type",
+            "ReagentKitVersion": "Reagent kit version",
+            "RTAVersion": "RTA Version",
+            "RtaVersion": "RTA Version",
+        }
 
     def find(self, d, tag):
         if tag in d:
@@ -45,7 +48,8 @@ def read_run_parameters(self):
 
     def read_stats_json(self, bcl2fastq_outdir):
         stats_json_path = os.path.join(
-            self.runfolder, bcl2fastq_outdir, "Stats/Stats.json")
+            self.runfolder, bcl2fastq_outdir, "Stats/Stats.json"
+        )
         if os.path.exists(stats_json_path):
             with open(stats_json_path) as f:
                 return json.load(f)
@@ -72,10 +76,14 @@ def get_read_cycles(self):
         try:
             for read_info in self.stats_json["ReadInfosForLanes"][0]["ReadInfos"]:
                 if read_info["IsIndexedRead"]:
-                    read_and_cycles[f"Index {index_counter} (bp)"] = read_info["NumCycles"]
+                    read_and_cycles[f"Index {index_counter} (bp)"] = read_info[
+                        "NumCycles"
+                    ]
                     index_counter += 1
                 else:
-                    read_and_cycles[f"Read {read_counter} (bp)"] = read_info["NumCycles"]
+                    read_and_cycles[f"Read {read_counter} (bp)"] = read_info[
+                        "NumCycles"
+                    ]
                     read_counter += 1
             return read_and_cycles
         except TypeError:
@@ -85,19 +93,21 @@ def get_info(self):
         results = self.get_read_cycles()
         results.update(self.get_run_parameters())
         if os.path.exists(os.path.join(self.runfolder, "bcl2fastq_version")):
-            results['bcl2fastq version'] = self.get_bcl2fastq_version(
-                self.runfolder)
+            results["bcl2fastq version"] = self.get_bcl2fastq_version(self.runfolder)
         return results
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description='Dumps a metadata yaml for MultiQC')
-    parser.add_argument('--runfolder', type=str,
-                        required=True, help='Path to runfolder')
-    parser.add_argument('--bcl2fastq-outdir', type=str,
-                        default='Data/Intensities/BaseCalls',
-                        help='Path to bcl2fastq output folder relative to the runfolder')
+    parser = argparse.ArgumentParser(description="Dumps a metadata yaml for MultiQC")
+    parser.add_argument(
+        "--runfolder", type=str, required=True, help="Path to runfolder"
+    )
+    parser.add_argument(
+        "--bcl2fastq-outdir",
+        type=str,
+        default="Data/Intensities/BaseCalls",
+        help="Path to bcl2fastq output folder relative to the runfolder",
+    )
 
     args = parser.parse_args()
     runfolder = args.runfolder
@@ -106,14 +116,16 @@ def get_info(self):
     runfolder_info = RunfolderInfo(runfolder, bcl2fastq_outdir)
     results = runfolder_info.get_info()
 
-    print ('''
+    print(
+        """
 id: 'sequencing_metadata'
 section_name: 'Sequencing Metadata'
 plot_type: 'html'
 description: 'regarding the sequencing run'
 data: |
     <dl class="dl-horizontal">
-''')
+"""
+    )
     for k, v in results.items():
         print("        <dt>{}</dt><dd><samp>{}</samp></dd>".format(k, v))
-    print ("    </dl>")
+    print("    </dl>")
@@ -13,20 +13,30 @@ def __init__(self, handler_name, multiqc_mapping, compare_direction):
         self.compare_direction = compare_direction
 
 
-class HandlerMapper():
+class HandlerMapper:
     def __init__(self):
-        self._mapper_list = [ValueHandlerMapper(handler_name = 'ClusterPFHandler',
-                                                multiqc_mapping = 'total',
-                                                compare_direction = 'lt'),
-                             ValueHandlerMapper(handler_name = 'ErrorRateHandler',
-                                                multiqc_mapping = 'Error',
-                                                compare_direction = 'gt'),
-                             ValueHandlerMapper(handler_name = 'Q30Handler',
-                                                multiqc_mapping = 'percent_Q30',
-                                                compare_direction = 'lt'),
-                             ValueHandlerMapper(handler_name = 'ReadsPerSampleHandler',
-                                                multiqc_mapping = 'mqc-generalstats-bcl2fastq-total',
-                                                compare_direction = 'lt')]
+        self._mapper_list = [
+            ValueHandlerMapper(
+                handler_name="ClusterPFHandler",
+                multiqc_mapping="total",
+                compare_direction="lt",
+            ),
+            ValueHandlerMapper(
+                handler_name="ErrorRateHandler",
+                multiqc_mapping="Error",
+                compare_direction="gt",
+            ),
+            ValueHandlerMapper(
+                handler_name="Q30Handler",
+                multiqc_mapping="percent_Q30",
+                compare_direction="lt",
+            ),
+            ValueHandlerMapper(
+                handler_name="ReadsPerSampleHandler",
+                multiqc_mapping="mqc-generalstats-bcl2fastq-total",
+                compare_direction="lt",
+            ),
+        ]
 
         self.mapping = self._convert_to_mappings(self._mapper_list)
 
@@ -36,33 +46,45 @@ def _convert_to_mappings(self, mapper_list):
             mapper_dict[mapper.handler_name] = mapper
         return mapper_dict
 
+
 def convert_to_multiqc_config(checkqc_config_dict):
     multiqc_config_format = {}
     handler_mapper = HandlerMapper()
     for mapper_name, mapper in handler_mapper.mapping.items():
         qc_criteria = checkqc_config_dict.get(mapper.handler_name)
         multiqc_config_value = {mapper.multiqc_mapping: {}}
-        if not qc_criteria['warning'] == 'unknown':
-            multiqc_config_value[mapper.multiqc_mapping]['warn'] = [{mapper.compare_direction: qc_criteria['warning']}]
-        if not qc_criteria['error'] == 'unknown':
-            multiqc_config_value[mapper.multiqc_mapping]['fail'] = [{mapper.compare_direction: qc_criteria['error']}]
+        if not qc_criteria["warning"] == "unknown":
+            multiqc_config_value[mapper.multiqc_mapping]["warn"] = [
+                {mapper.compare_direction: qc_criteria["warning"]}
+            ]
+        if not qc_criteria["error"] == "unknown":
+            multiqc_config_value[mapper.multiqc_mapping]["fail"] = [
+                {mapper.compare_direction: qc_criteria["error"]}
+            ]
+
+        multiqc_config_format[mapper.multiqc_mapping] = multiqc_config_value[
+            mapper.multiqc_mapping
+        ]
 
-        multiqc_config_format[mapper.multiqc_mapping] = multiqc_config_value[mapper.multiqc_mapping]
+    return {"table_cond_formatting_rules": multiqc_config_format}
 
-    return {'table_cond_formatting_rules': multiqc_config_format}
 
 def convert_to_dict(checkqc_config):
     checkqc_config_dict = {}
     for qc_handler in checkqc_config:
-        checkqc_config_dict[qc_handler['name']] = qc_handler
+        checkqc_config_dict[qc_handler["name"]] = qc_handler
 
     return checkqc_config_dict
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Converts CheckQC tresholds to MultiQC conditional format')
-    parser.add_argument('--runfolder', type=str, required=True, help='Path to runfolder')
-    parser.add_argument('--config', type=str, help='Path to checkQC config')
+    parser = argparse.ArgumentParser(
+        description="Converts CheckQC tresholds to MultiQC conditional format"
+    )
+    parser.add_argument(
+        "--runfolder", type=str, required=True, help="Path to runfolder"
+    )
+    parser.add_argument("--config", type=str, help="Path to checkQC config")
 
     args = parser.parse_args()
     runfolder = args.runfolder
@@ -71,12 +93,16 @@ def convert_to_dict(checkqc_config):
     run_type_recognizer = RunTypeRecognizer(runfolder)
     config = ConfigFactory.from_config_path(config)
 
-    instrument_and_reagent_version = run_type_recognizer.instrument_and_reagent_version()
+    instrument_and_reagent_version = (
+        run_type_recognizer.instrument_and_reagent_version()
+    )
     both_read_lengths = run_type_recognizer.read_length()
     read_length = int(both_read_lengths.split("-")[0])
-    checkqc_config = config.get_handler_configs(instrument_and_reagent_version, read_length)
+    checkqc_config = config.get_handler_configs(
+        instrument_and_reagent_version, read_length
+    )
     checkqc_config_dict = convert_to_dict(checkqc_config)
     multiqc_config = convert_to_multiqc_config(checkqc_config_dict)
 
-    with open('qc_thresholds.yaml', 'w') as outfile:
+    with open("qc_thresholds.yaml", "w") as outfile:
         yaml.dump(multiqc_config, outfile)
@@ -0,0 +1,29 @@
+singularity {
+    enabled = true
+    autoMounts = true
+}
+
+process {
+    withName: 'FASTQC' {
+        container = 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--hdfd78af_1'
+    }
+    withName: 'FASTQ_SCREEN' {
+        container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.14.0--pl5262hdfd78af_1'
+    }
+    withName: 'GET_QC_THRESHOLDS' {
+        container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
+    }
+    withName: 'GET_METADATA' {
+        container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
+    }
+    withName: 'INTEROP_SUMMARY' {
+        container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.1.23--h1b792b2_0'
+    }
+    withName: 'MULTIQC_PER_FLOWCELL' {
+        container = 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0'
+    }
+    withName: 'MULTIQC_PER_PROJECT' {
+        container = 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0'
+    }
+}
+
@@ -0,0 +1,19 @@
+/*
+========================================================================================
+    Nextflow config file for running minimal tests
+========================================================================================
+    Defines input files and everything required to run a fast and simple pipeline test.
+    Use as follows:
+        nextflow run main.nf -profile dev,test,singularity
+
+
+    This config takes inspiration from https://github.com/nf-core/rnaseq
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    run_folder = "$baseDir/test_data/210510_M03910_0104_000000000-JHGJL"
+    fastqscreen_databases = "$baseDir/test_data/Test_FastQ_Screen_Genomes"
+    checkqc_config = "$baseDir/test_data/checkqc_config.yaml"
+    config_dir = "$baseDir/test_data/test_config"
+}