Skip to content

Commit d66703a

Browse files
authored
v1.3.4 update (#122)
1 parent aa321be commit d66703a

18 files changed

+82
-45
lines changed

README.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ github_pages_url <- description$GITHUB_PAGES
2626

2727
<p style="font-size: 16px;"><em>Public Database Submission Pipeline</em></p>
2828

29-
**Beta Version**: v1.3.3. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!
29+
**Beta Version**: v1.3.4. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!
3030

3131
**General Disclaimer**: This repository was created for use by CDC programs to collaborate on public health related projects in support of the [CDC mission](https://www.cdc.gov/about/organization/mission.htm). GitHub is not hosted by the CDC, but is a third party website used by CDC and its partners to share information and collaborate on software. CDC use of GitHub does not imply an endorsement of any one particular service, product, or enterprise.
3232

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
<!-- ![GitHub last commit](https://img.shields.io/github/last-commit/montilab/cadra) -->
1111

12-
**Beta Version**: 1.3.3. This pipeline is currently in Beta testing, and
12+
**Beta Version**: 1.3.4. This pipeline is currently in Beta testing, and
1313
issues could appear during submission. Please use it at your own risk.
1414
Feedback and suggestions are welcome\!
1515

argument_handler.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def args_parser():
2323
config_file_parser = argparse.ArgumentParser(add_help=False)
2424
file_parser = argparse.ArgumentParser(add_help=False)
2525
test_parser = argparse.ArgumentParser(add_help=False)
26-
26+
publication_parser = argparse.ArgumentParser(add_help=False)
2727
database_parser.add_argument("--biosample", "-b",
2828
help="Create/Submit BioSample data.",
2929
action="store_const",
@@ -87,6 +87,15 @@ def args_parser():
8787
action="store_const",
8888
default=False,
8989
const=True)
90+
publication_parser.add_argument("--publication_title",
91+
help="Publication Title associated with sample submission. For GenBank only, overwrites value given via config file.",
92+
required=False,
93+
default=None)
94+
publication_parser.add_argument("--publication_status",
95+
help="Status of publication associated with sample submission. For GenBank only, overwrites value given via config file.",
96+
required=False,
97+
default=None,
98+
choices=["Unpublished", "In-press", "Published"])
9099

91100
# Create the submodule commands
92101
subparser_modules = parser.add_subparsers(dest="command")
@@ -96,15 +105,15 @@ def args_parser():
96105
"prep",
97106
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
98107
description="Generate all files required to submit to databases selected.",
99-
parents=[database_parser, organism_parser, submission_name_parser, submission_dir_parser, config_file_parser, file_parser, validate_parser]
108+
parents=[database_parser, organism_parser, submission_name_parser, submission_dir_parser, config_file_parser, file_parser, validate_parser, publication_parser]
100109
)
101110

102111
# submit command
103112
submit_module = subparser_modules.add_parser(
104113
"submit",
105114
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
106115
description="Generate all files required and begin the submission process to databases selected.",
107-
parents=[database_parser, organism_parser, submission_name_parser, submission_dir_parser, config_file_parser, file_parser, test_parser, validate_parser]
116+
parents=[database_parser, organism_parser, submission_name_parser, submission_dir_parser, config_file_parser, file_parser, test_parser, validate_parser, publication_parser]
108117
)
109118

110119
# check_submission_status command

biosample_sra_handler.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def check_raw_read_files(submission_name: str, submission_dir: str, metadata: pd
2222
raw_reads_path_default = os.path.join(os.path.split(os.path.split(submission_dir)[0])[0], "raw_reads")
2323
# Separate samples stored in local and cloud
2424
local_df = metadata[metadata["sra-file_location"] == "local"]
25-
file_columns = [col for col in local_df.columns if re.match("sra-file_[1-9]\d*", col)]
25+
file_columns = [col for col in local_df.columns if re.match(r"sra-file_[1-9]\d*", col)]
2626
validated_files = set()
2727
invalid_raw_files = []
2828
for index, row in local_df.iterrows():
@@ -58,7 +58,7 @@ def create_manual_submission_files(database: str, submission_dir: str, metadata:
5858
column_ordered = ["sample_name","library_ID"]
5959
prefix = "sra-"
6060
# Create SRA specific fields
61-
filename_cols = [col for col in metadata.columns.tolist() if re.match("sra-file_[1-9]\d*", col)]
61+
filename_cols = [col for col in metadata.columns.tolist() if re.match(r"sra-file_[1-9]\d*", col)]
6262
# Correct index for filename column
6363
for col in filename_cols:
6464
# Remove 0 index
@@ -176,7 +176,7 @@ def create_submission_xml(organism: str, database: str, submission_name: str, co
176176
if "SRA" in database:
177177
database_df = metadata.filter(regex=(SRA_REGEX)).copy()
178178
database_df = database_df.drop_duplicates()
179-
file_columns = [col for col in database_df.columns if re.match("sra-file_[1-9]\d*", col)]
179+
file_columns = [col for col in database_df.columns if re.match(r"sra-file_[1-9]\d*", col)]
180180
for index, row in database_df.iterrows():
181181
action = etree.SubElement(root, "Action")
182182
addfiles = etree.SubElement(action, "AddFiles", target_db="SRA")
@@ -197,7 +197,7 @@ def create_submission_xml(organism: str, database: str, submission_name: str, co
197197
datatype = etree.SubElement(file, "DataType")
198198
datatype.text = "generic-data"
199199
# Remove columns with sra- prefix that are not attributes
200-
sra_cols = [col for col in database_df.columns.tolist() if col.startswith('sra-') and not re.match("(sra-sample_name|sra-title|sra-comment|sra-file_location|sra-file_\d*)", col)]
200+
sra_cols = [col for col in database_df.columns.tolist() if col.startswith('sra-') and not re.match(r"(sra-sample_name|sra-title|sra-comment|sra-file_location|sra-file_\d*)", col)]
201201
for col in sra_cols:
202202
attribute_value = row[col]
203203
if pd.notnull(attribute_value) and attribute_value.strip() != "":

config/seqsender/config_file/gisaid_schema.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,11 @@
203203
'type': 'integer',
204204
'allowed': [1, 2],
205205
'nullable': True
206+
},
207+
'CLI_Path': {
208+
'required': False,
209+
'type': 'string',
210+
'nullable': True
206211
}
207212
}
208213
}

config/seqsender/config_file/ncbi_gisaid_schema.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@
185185
'type': ['integer', 'string'],
186186
'allowed': [1, 2, 'None'],
187187
'nullable': True
188+
},
189+
'CLI_Path': {
190+
'required': False,
191+
'type': 'string',
192+
'nullable': True
188193
}
189194
}
190195
}

config/seqsender/config_file/ncbi_schema.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@
188188
'type': 'integer',
189189
'allowed': [1, 2],
190190
'nullable': True
191+
},
192+
'CLI_Path': {
193+
'required': False,
194+
'type': 'string',
195+
'nullable': True
191196
}
192197
}
193198
}

config/seqsender/upload_log_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"Organism": Column(
1818
dtype="object",
1919
checks=[
20-
Check.isin(["FLU", "COV", "POX", "ARBO", "OTHER"]),
20+
Check.isin(["FLU", "COV", "POX", "ARBO", "RSV", "OTHER"]),
2121
],
2222
nullable=False,
2323
unique=False,

docs/app.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

file_handler.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import sys
88
import os
9-
from typing import Dict
9+
from typing import Dict, Any
1010
import pandas as pd
1111
from Bio import SeqIO
1212
from Bio.Seq import Seq
@@ -33,7 +33,7 @@ def validate_directory(name: str, path: str):
3333
sys.exit(1)
3434

3535
# Validate gisaid cli exists or error out
36-
def validate_gisaid_installer(submission_dir: str, organism: str) -> str:
36+
def validate_gisaid_installer(submission_dir: str, organism: str, config_dict: Dict[str, Any]) -> str:
3737
# /<submission_dir>/gisaid_cli/<organism>_CLI
3838
gisaid_cli_path_option_one = os.path.join(submission_dir, "gisaid_cli", organism.lower()+"CLI")
3939
# /seqsender/gisaid_cli/<organism>_CLI
@@ -42,7 +42,10 @@ def validate_gisaid_installer(submission_dir: str, organism: str) -> str:
4242
gisaid_cli_path_option_three = os.path.join(submission_dir, "gisaid_cli", organism.lower()+"CLI", organism.lower()+"CLI")
4343
# /seqsender>/gisaid_cli/<organism>_CLI/<organism>_CLI
4444
gisaid_cli_path_option_four = os.path.join(PROG_DIR, "gisaid_cli", organism.lower()+"CLI", organism.lower()+"CLI")
45-
if os.path.isfile(gisaid_cli_path_option_one):
45+
# gisaid cli path provided by config file
46+
if "CLI_Path" in config_dict and config_dict["CLI_Path"] is not None and config_dict["CLI_Path"].strip() != "" and os.path.isfile(config_dict["CLI_Path"].strip()):
47+
return config_dict["CLI_Path"].strip()
48+
elif os.path.isfile(gisaid_cli_path_option_one):
4649
return gisaid_cli_path_option_one
4750
elif os.path.isfile(gisaid_cli_path_option_two):
4851
return gisaid_cli_path_option_two
@@ -51,6 +54,9 @@ def validate_gisaid_installer(submission_dir: str, organism: str) -> str:
5154
elif os.path.isfile(gisaid_cli_path_option_four):
5255
return gisaid_cli_path_option_four
5356
else:
57+
if "CLI_Path" in config_dict and config_dict["CLI_Path"] is not None and config_dict["CLI_Path"].strip() != "":
58+
cli_path_error = config_dict["CLI_Path"]
59+
print(f"Error: There is not a GISAID CLI for {organism} provided via config file at: '{cli_path_error}'", file=sys.stderr)
5460
print(f"Error: There is not a GISAID CLI for {organism} located at: '{gisaid_cli_path_option_one}' or '{gisaid_cli_path_option_two}'", file=sys.stderr)
5561
print(f"Download the GISAID CLI for {organism} from \"https://gisaid.org/\".", file=sys.stderr)
5662
print(f"Extract the zip file and place the CLI binary at either: '{gisaid_cli_path_option_one}' or '{gisaid_cli_path_option_two}'", file=sys.stderr)

0 commit comments

Comments
 (0)