diff --git a/bio/reference/inphared-db/environment.yaml b/bio/reference/inphared-db/environment.yaml
new file mode 100644
index 00000000000..3f8f6dadf4a
--- /dev/null
+++ b/bio/reference/inphared-db/environment.yaml
@@ -0,0 +1,5 @@
+channels:
+ - conda-forge
+ - nodefaults
+dependencies:
+ - curl
diff --git a/bio/reference/inphared-db/meta.yaml b/bio/reference/inphared-db/meta.yaml
new file mode 100644
index 00000000000..48abf517578
--- /dev/null
+++ b/bio/reference/inphared-db/meta.yaml
@@ -0,0 +1,4 @@
+name: inphared-db
+description: Download sequence file from the Inphared database (https://github.com/RyanCook94/inphared/blob/main/README.md), and store them in a single .fasta file. Please check the current database available at the above link and adjust the config file.
+authors:
+ - Noriko A. Cassman
diff --git a/bio/reference/inphared-db/old_wrapper.py b/bio/reference/inphared-db/old_wrapper.py
new file mode 100644
index 00000000000..50ea7d46b96
--- /dev/null
+++ b/bio/reference/inphared-db/old_wrapper.py
@@ -0,0 +1,80 @@
+__author__ = "Johannes Köster"
+__copyright__ = "Copyright 2019, Johannes Köster"
+__email__ = "johannes.koester@uni-due.de"
+__license__ = "MIT"
+
+import subprocess as sp
+import sys
+from itertools import product
+from snakemake.shell import shell
+
+species = snakemake.params.species.lower()
+release = int(snakemake.params.release)
+build = snakemake.params.build
+
+branch = ""
+if release >= 81 and build == "GRCh37":
+ # use the special grch37 branch for new releases
+ branch = "grch37/"
+elif snakemake.params.get("branch"):
+ branch = snakemake.params.branch + "/"
+
+log = snakemake.log_fmt_shell(stdout=False, stderr=True)
+
+spec = ("{build}" if int(release) > 75 else "{build}.{release}").format(
+ build=build, release=release
+)
+
+suffixes = ""
+datatype = snakemake.params.get("datatype", "")
+chromosome = snakemake.params.get("chromosome", "")
+if datatype == "dna":
+ if chromosome:
+ suffixes = ["dna.chromosome.{}.fa.gz".format(chromosome)]
+ else:
+ suffixes = ["dna.primary_assembly.fa.gz", "dna.toplevel.fa.gz"]
+elif datatype == "cdna":
+ suffixes = ["cdna.all.fa.gz"]
+elif datatype == "cds":
+ suffixes = ["cds.all.fa.gz"]
+elif datatype == "ncrna":
+ suffixes = ["ncrna.fa.gz"]
+elif datatype == "pep":
+ suffixes = ["pep.all.fa.gz"]
+else:
+ raise ValueError("invalid datatype, must be one of dna, cdna, cds, ncrna, pep")
+
+if chromosome:
+ if not datatype == "dna":
+ raise ValueError(
+ "invalid datatype, to select a single chromosome the datatype must be dna"
+ )
+
+spec = spec.format(build=build, release=release)
+url_prefix = f"ftp://ftp.ensembl.org/pub/{branch}release-{release}/fasta/{species}/{datatype}/{species.capitalize()}.{spec}"
+
+success = False
+for suffix in suffixes:
+ url = f"{url_prefix}.{suffix}"
+
+ try:
+ shell("curl -sSf {url} > /dev/null 2> /dev/null")
+ except sp.CalledProcessError:
+ continue
+
+ shell("(curl -L {url} | gzip -d > {snakemake.output[0]}) {log}")
+ success = True
+ break
+
+if not success:
+ if len(suffixes) > 1:
+ url = f"{url_prefix}.[{'|'.join(suffixes)}]"
+ else:
+ url = f"{url_prefix}.{suffixes[0]}"
+ print(
+ f"Unable to download requested sequence data from Ensembl ({url}). "
+ "Please check whether above URL is currently available (might be a temporal server issue). "
+ "Apart from that, did you check that this combination of species, build, and release is actually provided?",
+ file=sys.stderr,
+ )
+ exit(1)
diff --git a/bio/reference/inphared-db/test/Snakefile b/bio/reference/inphared-db/test/Snakefile
new file mode 100644
index 00000000000..0c09c919190
--- /dev/null
+++ b/bio/reference/inphared-db/test/Snakefile
@@ -0,0 +1,12 @@
+configfile: "config.yaml"
+
+rule get_inphareddb:
+ output:
+ expand("{date}{suffix}", date=config["date"], suffix=config["suffix"])
+ params:
+ prefix = config["prefix"],
+ date = config["date"],
+ suffix = config["suffix"]
+ wrapper:
+ "master/bio/reference/inphared-db"
+
diff --git a/bio/reference/inphared-db/test/config.yaml b/bio/reference/inphared-db/test/config.yaml
new file mode 100644
index 00000000000..0f3ca1d9ef8
--- /dev/null
+++ b/bio/reference/inphared-db/test/config.yaml
@@ -0,0 +1,9 @@
+date:
+ "2Jul2023"
+
+suffix:
+ "_refseq_genomes.fa"
+ #"_genomes_excluding_refseq.fa"
+
+prefix:
+ "https://millardlab-inphared.s3.climb.ac.uk/"
diff --git a/bio/reference/inphared-db/test/old_release.smk b/bio/reference/inphared-db/test/old_release.smk
new file mode 100644
index 00000000000..a698b982d04
--- /dev/null
+++ b/bio/reference/inphared-db/test/old_release.smk
@@ -0,0 +1,29 @@
+rule get_genome:
+ output:
+ "refs/genome.fasta",
+ params:
+ species="saccharomyces_cerevisiae",
+ datatype="dna",
+ build="R64-1-1",
+ release="75",
+ log:
+ "logs/get_genome.log",
+ cache: "omit-software" # save space and time with between workflow caching (see docs)
+ wrapper:
+ "master/bio/reference/ensembl-sequence"
+
+
+rule get_chromosome:
+ output:
+ "refs/old_release.chr1.fasta",
+ params:
+ species="saccharomyces_cerevisiae",
+ datatype="dna",
+ build="R64-1-1",
+ release="75",
+ chromosome="I",
+ log:
+ "logs/get_genome.log",
+ cache: "omit-software" # save space and time with between workflow caching (see docs)
+ wrapper:
+ "master/bio/reference/ensembl-sequence"
diff --git a/bio/reference/inphared-db/test/old_snakefile.smk b/bio/reference/inphared-db/test/old_snakefile.smk
new file mode 100644
index 00000000000..79f249a0d3b
--- /dev/null
+++ b/bio/reference/inphared-db/test/old_snakefile.smk
@@ -0,0 +1,30 @@
+rule get_genome:
+ output:
+ "refs/genome.fasta",
+ params:
+ species="saccharomyces_cerevisiae",
+ datatype="dna",
+ build="R64-1-1",
+ release="98",
+ log:
+ "logs/get_genome.log",
+ cache: "mit-software" # save space and time with between workflow caching (see docs)
+ wrapper:
+ "master/bio/reference/ensembl-sequence"
+
+
+rule get_chromosome:
+ output:
+ "refs/chr1.fasta",
+ params:
+ species="saccharomyces_cerevisiae",
+ datatype="dna",
+ build="R64-1-1",
+ release="101",
+ chromosome="I", # optional: restrict to chromosome
+ # branch="plants", # optional: specify branch
+ log:
+ "logs/get_genome.log",
+ cache: "omit-software" # save space and time with between workflow caching (see docs)
+ wrapper:
+ "master/bio/reference/ensembl-sequence"
diff --git a/bio/reference/inphared-db/wrapper.py b/bio/reference/inphared-db/wrapper.py
new file mode 100644
index 00000000000..be912f72f31
--- /dev/null
+++ b/bio/reference/inphared-db/wrapper.py
@@ -0,0 +1,9 @@
+__author__ = "Noriko A. Cassman"
+__copyright__ = "Copyright 2023, Noriko A. Cassman"
+__email__ = "noriko.cassman@gmail.com"
+__license__ = "MIT"
+
+from snakemake.shell import shell
+
+ shell:
+ "curl {params.prefix}{params.date}{params.suffix} -o {params.date}{params.suffix}"