Skip to content
24 changes: 16 additions & 8 deletions BRB/ET.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,26 @@ def RNA(config, outputDir, baseDict, sample2lib):


def sendToParkour(config, msg):
FCID = config.get("Options", "runID").split("_")[3][1:]
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
d['sequences'] = json.dumps(msg)
basePath= config.get("Paths","baseData")
aviti_check= glob.glob(f"{basePath}/*/RunManifest.csv")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not directly from the folder name ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the notice! opted for this approach.

if aviti_check:
FCID = config.get("Options", "runID").split("_")[2]
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
d['sequences'] = json.dumps(msg)
else:
FCID = config.get("Options", "runID").split("_")[3][1:]
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
d['sequences'] = json.dumps(msg)
log.info(f"sendToParkour: Sending {d} to Parkour")
res = requests.post(config.get("Parkour", "ResultsURL"), auth=(config.get("Parkour", "user"), config.get("Parkour", "password")), data=d, verify=config.get("Parkour", "cert"))
log.info(f"sendToParkour return {res}")
return res





def phoneHome(config, outputDir, pipeline, samples_tuples, organism, project, libType):
"""
Return metrics to Parkour, the results are in outputDir and pipeline needs to be run on them
Expand Down
50 changes: 36 additions & 14 deletions BRB/PushButton.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,16 @@ def relinkFiles(config, group, project, org_label, libraryType, tuples):
log.info(f"no multiqc report under {mqcf}.")


def getsambaPath(lane_dir,Sequencer):
if Sequencer == "Aviti":
current_year = str(lane_dir)[0:4]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("AVITI24_" + current_year)
else:
current_year = "20" + str(lane_dir)[0:2]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("Illumina_" + current_year)
return current_year, year_postfix


def copyCellRanger(config, d):
'''
copy Cellranger web_summaries to sequencing facility lane subdirectory & bioinfocore qc directory.
Expand All @@ -102,12 +112,13 @@ def copyCellRanger(config, d):
'''

files = glob.glob(os.path.join(d, '*/outs/', 'web_summary.html'))

sequencing_type= config.get("Options", "sequencerType")


# /data/xxx/yyyy_lanes_1/Analysis_2526_zzzz/RNA-Seqsinglecell_mouse ->
# yyyy_lanes_1
lane_dir = Path(d).parents[1].stem
current_year = "20" + str(lane_dir)[0:2]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("Illumina_" + current_year)
current_year, year_postfix = getsambaPath(lane_dir,sequencing_type)
for fname in files:
# to seqfac dir.
nname = fname.split('/')
Expand Down Expand Up @@ -140,12 +151,12 @@ def copyRELACS(config, d):
'''

files = glob.glob(os.path.join(d, "RELACS_demultiplexing", 'Sample*/', '*_fig.png')) + glob.glob(os.path.join(d, "multiQC", '*html'))
sequencing_type=config.get("Options", "sequencerType")

# /data/xxx/yyyy_lanes_1/Analysis_2526_zzzz/ChIP-Seq_mouse/RELACS_demultiplexing ->
# Sequence_Quality_yyyy/Illumina_yyyy/yyyy_lanes_1
lane_dir = Path(d).parents[1].stem
current_year = "20" + str(lane_dir)[0:2]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("Illumina_" + current_year)
current_year, year_postfix = getsambaPath(lane_dir, sequencing_type)
log.info(f"copyRELACS - copying over RELACS files to samba path {year_postfix}")
for fname in files:
# to seqfac dir.
Expand Down Expand Up @@ -238,18 +249,26 @@ def RELACS(config, group, project, organism, libraryType, tuples):
There better not be any duplicate RELACS sample names!
"""
runID = config.get('Options', 'runID').split("_lanes")[0]
sequencerType = config.get('Options', 'sequencerType')
org_name, org_label, org_yaml = organism
outputDir = createPath(config, group, BRB.misc.pacifier(project), org_label, libraryType, tuples)
if os.path.exists(os.path.join(outputDir, "analysis.done")):
return outputDir, 0, True

sampleSheet = "/dont_touch_this/short_runs/{}/RELACS_Project_{}.txt".format(runID, BRB.misc.pacifier(project))
project = BRB.misc.pacifier(project)

if sequencerType == "Aviti":
matches = glob.glob(f"/dont_touch_this/short_runs/AV*/{runID}/RELACS_Project_{project}.txt")
sampleSheet = matches[0] if matches else None
else:
sampleSheet = f"/dont_touch_this/short_runs/{runID}/RELACS_Project_{project}.txt"

# Fallback if exact path doesn't exist
if not os.path.exists(sampleSheet) and not os.path.exists(os.path.join(outputDir, "RELACS_sampleSheet.txt")):
log.critical("RELACS: wrong samplesheet name: {}".format(sampleSheet))
print("wrong samplesheet name!", sampleSheet)
return None, 1, False

project = BRB.misc.pacifier(project)

baseDir = "{}/{}/{}/{}/Project_{}".format(config.get('Paths', 'groupData'),
BRB.misc.pacifier(group),
BRB.misc.getLatestSeqdir(config.get('Paths','groupData'), group),
Expand Down Expand Up @@ -300,7 +319,7 @@ def RELACS(config, group, project, organism, libraryType, tuples):

# Back to the normal DNA pipeline
CMD = "PATH={}/bin:$PATH".format(os.path.join(config.get('Options', 'snakemakeWorkflowBaseDir')))
CMD = [CMD, 'DNAmapping', '--DAG', '--trim', '--UMIDedup', '--mapq', '3', '-i', outputDir, '-o', outputDir, org_yaml]
CMD = [CMD, 'DNAmapping', '--DAG', '--trim', r"--trimmerOptions '-a AGATCGGAAGAG -A AGATCGGAAGAG'", '--UMIDedup', '--mapq', '3', '-i', outputDir, '-o', outputDir, org_yaml]
log.info(f"RELACS DNA wf CMD: {CMD}")
try:
subprocess.check_call(' '.join(CMD), shell=True)
Expand Down Expand Up @@ -451,7 +470,8 @@ def scRNAseq(config, group, project, organism, libraryType, tuples):
if 'GRCh38' in org_yaml:
org_yaml = 'GRCh38'
PE = linkFiles(config, group, project, outputDir, tuples)
CMD = [config.get('10x', 'RNA'), outputDir, outputDir, org_yaml]
snakeMakePath= "{}/bin".format(os.path.join(config.get('Options', 'snakemakeWorkflowBaseDir')))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not reflected in the ini file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for noticing!
Path updated in the ini file

CMD = [config.get('10x', 'RNA'), outputDir, outputDir, org_yaml, " --snakemakePath ", snakeMakePath]
log.info(f"scRNA wf CMD: {' '.join(CMD)}")
try:
subprocess.check_call(' '.join(CMD), shell=True)
Expand Down Expand Up @@ -544,7 +564,7 @@ def scATAC(config, group, project, organism, libraryType, tuples):
"""
scATAC 10x
"""

project = BRB.misc.pacifier(project)
org_name, org_label, org_yaml = organism
outputDir = createPath(config, group, project, org_label, libraryType, tuples)
Expand All @@ -563,10 +583,14 @@ def scATAC(config, group, project, organism, libraryType, tuples):
BRB.misc.getLatestSeqdir(config.get('Paths','groupData'), group),
config.get('Options', 'runID'),
BRB.misc.pacifier(project))

snakeMakePath= "{}/bin".format(os.path.join(config.get('Options', 'snakemakeWorkflowBaseDir')))
CMD = config.get('10x', 'ATAC')+" -i "+inDir
CMD += " -o "+outputDir
CMD += " "+org_yaml
CMD += " --projectID "+project+" --samples "+samples
CMD += " --projectID "+project+" --samples "+ samples
CMD += " --snakemakePath "+snakeMakePath

log.info(f"scATAC wf CMD: {CMD}")
try:
subprocess.check_call(CMD, shell=True)
Expand Down Expand Up @@ -612,7 +636,6 @@ def GetResults(config, project, libraries):
)
log.info(f"Processing {dataPath}")
except:
print("external data")
ignore = True
validLibraryTypes = {v: i for i, v in enumerate(config.get('Options', 'validLibraryTypes').split(','))}
pipelines = config.get('Options', 'pipelines').split(',')
Expand All @@ -621,7 +644,6 @@ def GetResults(config, project, libraries):
skipList = []
external_skipList = []
org_dict = {}
print(libraries)
for library, v in libraries.items():
sampleName, libraryType, libraryProtocol, organism, indexType, requestDepth = v
org_name, org_label, org_yaml = organism
Expand Down
40 changes: 33 additions & 7 deletions BRB/findFinishedFlowCells.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,51 @@ def markFinished(config):


def queryParkour(config):
FCID = config.get("Options", "runID").split("_")[3][1:] # C605HACXX from 150416_SN7001180_0196_BC605HACXX
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
basePath= config.get("Paths","baseData")
sequencer_type = config.get("Options", "sequencerType")
if sequencer_type == "Aviti":
FCID = config.get("Options", "runID").split("_")[2]
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
else:
FCID = config.get("Options", "runID").split("_")[3][1:] # C605HACXX from 150416_SN7001180_0196_BC605HACXX
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
res = requests.get(config.get("Parkour", "QueryURL"), auth=(config.get("Parkour", "user"), config.get("Parkour", "password")), params=d, verify=config.get("Parkour", "cert"))
if res.status_code == 200:
return res.json()
return dict()


def detect_sequencer_type(base_path: str) -> str:
"""
Detect whether a sequencing run is Aviti or Illumina
based on the presence of the Aviti-specific RunManifest.csv file.
"""
aviti_check = glob.glob(f"{base_path}/*/RunManifest.csv")
if aviti_check:
return "Aviti"
else:
return "Illumina"


def newFlowCell(config):
dirs = glob.glob("{}/*/fastq.made".format(config.get("Paths","baseData")))
for d in dirs :
#Get the flow cell ID (e.g., 150416_SN7001180_0196_BC605HACXX)
config.set('Options','runID',d.split("/")[-2])

run_id = Path(d).parents[0].name
config.set('Options','runID',run_id)

if config.get("Options","runID")[:4] < "1804":
continue

# Detect sequencer type
base_path = str(Path(d).parents[0])
seq_type = detect_sequencer_type(base_path)
config.set("Options", "sequencerType", seq_type)

if not flowCellProcessed(config):
print(f'Found new flow cell: [green]{config.get("Options","runID")}[/green]')
# Query parkour to see if there's anything to be done for this
Expand All @@ -55,4 +81,4 @@ def newFlowCell(config):
continue
return config, ParkourDict
print("No new flow cells found...")
return config, None
return config, None
2 changes: 1 addition & 1 deletion BigRedButton.ini
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ validOrganisms=human,mouse,drosophila
#An ordered list of the organism names used in the pipeline (the order matches validOrganisms)
organismNames=hg38,mm10,dm6
#Snakemake workflow base directory
snakemakeWorkflowBaseDir=/package/anaconda3/envs/snakePipes-1.0.0_alpha2/bin/RNA-seq
snakemakeWorkflowBaseDir=/path/anaconda3/envs/snakePipes-V.x.y/
#Don't set these, they're just for passing info around
runID=

Expand Down
84 changes: 79 additions & 5 deletions tests/test_findFinishedFlowCells.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
import pytest
import configparser
import urllib3
from BRB.findFinishedFlowCells import flowCellProcessed, markFinished, queryParkour

from BRB.findFinishedFlowCells import flowCellProcessed, markFinished, queryParkour, detect_sequencer_type
from unittest.mock import patch, Mock

@pytest.fixture(scope='session')
def ifs(tmp_path_factory):
fp = tmp_path_factory.mktemp("flowcells")
Path(fp,"fc_fin").mkdir()
Path(fp, "fc_fin", "analysis.done").touch()
Path(fp,"fc_unfin").mkdir()
(fp / "aviti_run" / "flowcellXXX" / "RunManifest.csv").parent.mkdir(parents=True, exist_ok=True)
(fp / "aviti_run" / "flowcellXXX" / "RunManifest.csv").touch()

return fp


Expand All @@ -28,9 +31,17 @@ def create_conf(l = []):
"password": "123",
"cert": ""
}
config['Options'] = {'runID': '150416_SN7001180_0196_BC605HACXX'}

for _l in l:
config[_l[0]] = {_l[1]: _l[2]}

if ( config.get("Options", "sequencerType", fallback="Illumina") != "Aviti" ):
if not config.has_option("Options", "runID"):
config.set("Options", "runID", "150416_SN7001180_0196_BC605HACXX")
else:
if not config.has_option("Options", "runID"):
config.set("Options", "runID", "20250901_AV25XXX9_250443KMND")

return config

class TestfindFinishedFlowCells:
Expand All @@ -56,5 +67,68 @@ def test_markFinished(self, ifs):

def test_queryParkour(self):
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
config = create_conf()
assert len(queryParkour(config)) == 0
config = create_conf([
['Paths', 'baseData', ifs],
['Options', 'sequencerType', 'Illumina']])
assert len(queryParkour(config)) == 0

## Sequencing type detection
def test_typedetection_aviti(self, ifs):
basedir_path = Path(ifs, "aviti_run")
seq_type = detect_sequencer_type(str(basedir_path))
assert seq_type == "Aviti"

def test_typedetection_illumina(self, ifs):
basedir_path = Path(ifs, "fc_fin")
seq_type = detect_sequencer_type(str(basedir_path))
assert seq_type == "Illumina"

## query Parkour flow cell id extraction test
@patch("BRB.findFinishedFlowCells.requests.get")
def test_queryParkour_illumina(self, mock_get, ifs):
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

mock_resp = Mock()
mock_resp.status_code = 200
mock_resp.json.return_value = {}
mock_get.return_value = mock_resp

config = create_conf([
['Paths', 'baseData', ifs],
['Options', 'sequencerType', 'Illumina']
])
result = queryParkour(config)

FCID = "C605HACXX"
mock_get.assert_called_once_with(
"https://parkour-demo.ie-freiburg.mpg.de/nonext_api",
auth=("jefke", "123"),
params={'flowcell_id': FCID},
verify=""
)

assert result == {}

@patch("BRB.findFinishedFlowCells.requests.get")
def test_queryParkour_aviti(self, mock_get, ifs):
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

mock_resp = Mock()
mock_resp.status_code = 200
mock_resp.json.return_value = {"some": "data"}
mock_get.return_value = mock_resp

config = create_conf([
['Paths', 'baseData', ifs],
['Options', 'sequencerType', 'Aviti']
])
result = queryParkour(config)

FCID = "250443KMND"
mock_get.assert_called_once_with(
"https://parkour-demo.ie-freiburg.mpg.de/nonext_api",
auth=("jefke", "123"),
params={'flowcell_id': FCID},
verify=""
)
assert result == {"some": "data"}