Skip to content
24 changes: 16 additions & 8 deletions BRB/ET.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,26 @@ def RNA(config, outputDir, baseDict, sample2lib):


def sendToParkour(config, msg):
FCID = config.get("Options", "runID").split("_")[3][1:]
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
d['sequences'] = json.dumps(msg)
basePath= config.get("Paths","baseData")
aviti_check= glob.glob(f"{basePath}/*/RunManifest.csv")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not directly from the folder name ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the notice! opted for this approach.

if aviti_check:
FCID = config.get("Options", "runID").split("_")[2]
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
d['sequences'] = json.dumps(msg)
else:
FCID = config.get("Options", "runID").split("_")[3][1:]
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
d['sequences'] = json.dumps(msg)
log.info(f"sendToParkour: Sending {d} to Parkour")
res = requests.post(config.get("Parkour", "ResultsURL"), auth=(config.get("Parkour", "user"), config.get("Parkour", "password")), data=d, verify=config.get("Parkour", "cert"))
log.info(f"sendToParkour return {res}")
return res





def phoneHome(config, outputDir, pipeline, samples_tuples, organism, project, libType):
"""
Return metrics to Parkour, the results are in outputDir and pipeline needs to be run on them
Expand Down
43 changes: 31 additions & 12 deletions BRB/PushButton.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,13 @@ def copyCellRanger(config, d):
# /data/xxx/yyyy_lanes_1/Analysis_2526_zzzz/RNA-Seqsinglecell_mouse ->
# yyyy_lanes_1
lane_dir = Path(d).parents[1].stem
current_year = "20" + str(lane_dir)[0:2]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("Illumina_" + current_year)
sequencing_type=lane_dir.split("_")[1]
if sequencing_type.startswith("AV"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now a second way of discriminating aviti runs from illumina, this should be consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

implemented!

current_year = str(lane_dir)[0:4]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("AVITI24_" + current_year)
else:
current_year = "20" + str(lane_dir)[0:2]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("Illumina_" + current_year)
for fname in files:
# to seqfac dir.
nname = fname.split('/')
Expand Down Expand Up @@ -144,8 +149,14 @@ def copyRELACS(config, d):
# /data/xxx/yyyy_lanes_1/Analysis_2526_zzzz/ChIP-Seq_mouse/RELACS_demultiplexing ->
# Sequence_Quality_yyyy/Illumina_yyyy/yyyy_lanes_1
lane_dir = Path(d).parents[1].stem
current_year = "20" + str(lane_dir)[0:2]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("Illumina_" + current_year)
sequencing_type=lane_dir.split("_")[1]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be great to get rid of the duplicated code here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed duplicated part

if sequencing_type.startswith("AV"):
current_year = str(lane_dir)[0:4]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("AVITI24_" + current_year)
else:
current_year = "20" + str(lane_dir)[0:2]
year_postfix = Path("Sequence_Quality_" + current_year) / Path("Illumina_" + current_year)

log.info(f"copyRELACS - copying over RELACS files to samba path {year_postfix}")
for fname in files:
# to seqfac dir.
Expand Down Expand Up @@ -243,13 +254,19 @@ def RELACS(config, group, project, organism, libraryType, tuples):
if os.path.exists(os.path.join(outputDir, "analysis.done")):
return outputDir, 0, True

sampleSheet = "/dont_touch_this/short_runs/{}/RELACS_Project_{}.txt".format(runID, BRB.misc.pacifier(project))
project = BRB.misc.pacifier(project)
sampleSheet = f"/dont_touch_this/short_runs/{runID}/RELACS_Project_{project}.txt"

# Fallback if exact path doesn't exist
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And a third way.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implemented! taken care off redundancies.

if not os.path.exists(sampleSheet):
matches = glob.glob(f"/dont_touch_this/short_runs/AV*/{runID}/RELACS_Project_{project}.txt")
sampleSheet = matches[0] if matches else None

if not os.path.exists(sampleSheet) and not os.path.exists(os.path.join(outputDir, "RELACS_sampleSheet.txt")):
log.critical("RELACS: wrong samplesheet name: {}".format(sampleSheet))
print("wrong samplesheet name!", sampleSheet)
return None, 1, False

project = BRB.misc.pacifier(project)

baseDir = "{}/{}/{}/{}/Project_{}".format(config.get('Paths', 'groupData'),
BRB.misc.pacifier(group),
BRB.misc.getLatestSeqdir(config.get('Paths','groupData'), group),
Expand Down Expand Up @@ -300,7 +317,7 @@ def RELACS(config, group, project, organism, libraryType, tuples):

# Back to the normal DNA pipeline
CMD = "PATH={}/bin:$PATH".format(os.path.join(config.get('Options', 'snakemakeWorkflowBaseDir')))
CMD = [CMD, 'DNAmapping', '--DAG', '--trim', '--UMIDedup', '--mapq', '3', '-i', outputDir, '-o', outputDir, org_yaml]
CMD = [CMD, 'DNAmapping', '--DAG', '--trim', r"--trimmerOptions '-a AGATCGGAAGAG -A AGATCGGAAGAG'", '--UMIDedup', '--mapq', '3', '-i', outputDir, '-o', outputDir, org_yaml]
log.info(f"RELACS DNA wf CMD: {CMD}")
try:
subprocess.check_call(' '.join(CMD), shell=True)
Expand Down Expand Up @@ -451,7 +468,8 @@ def scRNAseq(config, group, project, organism, libraryType, tuples):
if 'GRCh38' in org_yaml:
org_yaml = 'GRCh38'
PE = linkFiles(config, group, project, outputDir, tuples)
CMD = [config.get('10x', 'RNA'), outputDir, outputDir, org_yaml]
snakeMakePath= "{}/bin".format(os.path.join(config.get('Options', 'snakemakeWorkflowBaseDir')))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not reflected in the ini file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for noticing!
Path updated in the ini file

CMD = [config.get('10x', 'RNA'), outputDir, outputDir, org_yaml, " --snakemakePath ", snakeMakePath]
log.info(f"scRNA wf CMD: {' '.join(CMD)}")
try:
subprocess.check_call(' '.join(CMD), shell=True)
Expand Down Expand Up @@ -544,7 +562,7 @@ def scATAC(config, group, project, organism, libraryType, tuples):
"""
scATAC 10x
"""

project = BRB.misc.pacifier(project)
org_name, org_label, org_yaml = organism
outputDir = createPath(config, group, project, org_label, libraryType, tuples)
Expand All @@ -563,10 +581,13 @@ def scATAC(config, group, project, organism, libraryType, tuples):
BRB.misc.getLatestSeqdir(config.get('Paths','groupData'), group),
config.get('Options', 'runID'),
BRB.misc.pacifier(project))
snakeMakePath= "{}/bin".format(os.path.join(config.get('Options', 'snakemakeWorkflowBaseDir')))
CMD = config.get('10x', 'ATAC')+" -i "+inDir
CMD += " -o "+outputDir
CMD += " "+org_yaml
CMD += " --projectID "+project+" --samples "+samples
CMD += " --snakemakePath "+snakeMakePath

log.info(f"scATAC wf CMD: {CMD}")
try:
subprocess.check_call(CMD, shell=True)
Expand Down Expand Up @@ -612,7 +633,6 @@ def GetResults(config, project, libraries):
)
log.info(f"Processing {dataPath}")
except:
print("external data")
ignore = True
validLibraryTypes = {v: i for i, v in enumerate(config.get('Options', 'validLibraryTypes').split(','))}
pipelines = config.get('Options', 'pipelines').split(',')
Expand All @@ -621,7 +641,6 @@ def GetResults(config, project, libraries):
skipList = []
external_skipList = []
org_dict = {}
print(libraries)
for library, v in libraries.items():
sampleName, libraryType, libraryProtocol, organism, indexType, requestDepth = v
org_name, org_label, org_yaml = organism
Expand Down
19 changes: 13 additions & 6 deletions BRB/findFinishedFlowCells.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,29 @@ def markFinished(config):


def queryParkour(config):
FCID = config.get("Options", "runID").split("_")[3][1:] # C605HACXX from 150416_SN7001180_0196_BC605HACXX
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
basePath= config.get("Paths","baseData")
aviti_check= glob.glob(f"{basePath}/*/RunManifest.csv")
if aviti_check:
FCID = config.get("Options", "runID").split("_")[2]
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
else:
FCID = config.get("Options", "runID").split("_")[3][1:] # C605HACXX from 150416_SN7001180_0196_BC605HACXX
if '-' in FCID:
FCID = FCID.split('-')[-1]
d = {'flowcell_id': FCID}
res = requests.get(config.get("Parkour", "QueryURL"), auth=(config.get("Parkour", "user"), config.get("Parkour", "password")), params=d, verify=config.get("Parkour", "cert"))
if res.status_code == 200:
return res.json()
return dict()


def newFlowCell(config):
dirs = glob.glob("{}/*/fastq.made".format(config.get("Paths","baseData")))
for d in dirs :
#Get the flow cell ID (e.g., 150416_SN7001180_0196_BC605HACXX)
config.set('Options','runID',d.split("/")[-2])

if config.get("Options","runID")[:4] < "1804":
continue

Expand Down
Loading