Skip to content

Commit 675be6f

Browse files
committed
Add support for OCM platform in create_library_files.py
1 parent 74c1f4c commit 675be6f

1 file changed

Lines changed: 60 additions & 28 deletions

File tree

scripts/fb/create_library_files.py

Lines changed: 60 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,42 +21,74 @@ def main(raw_args=None):
2121
parser.add_argument('fastqs', metavar="outs/fastq_path/HLGW3DRXX",
2222
nargs='?', action="store", type=str,
2323
help="Full path to FASTQ files, used to help fill in new library files. If missing will just use values in the libraries.csv file. Multiple paths should be comma delimited")
24+
# parser.add_argument('--ocm', action='store_true', default=False,
25+
# help="Use this flag if OCM platform is used")
2426

2527
args = parser.parse_args(raw_args)
2628
if args.fastqs != None:
2729
fastqs = args.fastqs.split(',')
2830

29-
30-
with open(args.file_name) as f:
31-
headers = next(f).strip().split(',')
32-
#print(headers)
33-
samples = dict()
34-
for line in f:
35-
line = line.strip().split(',')
36-
if line[0] in samples:
37-
samples[line[0]].append(line[1:])
38-
else:
39-
samples[line[0]] = [line[1:]]
40-
41-
for sample in samples:
42-
text = []
43-
for values in samples[sample]:
44-
if args.fastqs != None:
45-
runs = [path for path in fastqs if values[0] in path]
46-
if len(runs) != 1:
47-
sys.exit("Problems finding unique match for %s in %s" % (values[0], args.fastqs))
31+
## Check if the input file is in OCM format
32+
import pandas as pd
33+
df = pd.read_csv(args.lib, header=0)
34+
if 'ocm_barcode_ids' in df.columns:
35+
if df['ocm_barcode_ids'].any():
36+
print("OCM platform detected, using OCM specific libraries file format")
37+
args.ocm = True
38+
else:
39+
args.ocm = False
40+
if args.ocm == False:
41+
with open(args.file_name) as f:
42+
headers = next(f).strip().split(',')
43+
#print(headers)
44+
samples = dict()
45+
for line in f:
46+
line = line.strip().split(',')
47+
if line[0] in samples:
48+
samples[line[0]].append(line[1:])
4849
else:
49-
if values[2] != values[-1]:
50-
text.append(",".join([runs[0], values[1], values[2], values[-1]]))
51-
else:
52-
text.append(",".join([runs[0], values[1], values[2]]))
53-
else:
54-
text.append(",".join(values))
50+
samples[line[0]] = [line[1:]]
5551

56-
with open('%s_libraries.csv' % sample, 'w') as f:
57-
f.write('fastqs,sample,library_type\n')
58-
f.write('\n'.join(text))
52+
for sample in samples:
53+
text = []
54+
for values in samples[sample]:
55+
if args.fastqs != None:
56+
runs = [path for path in fastqs if values[0] in path]
57+
if len(runs) != 1:
58+
sys.exit("Problems finding unique match for %s in %s" % (values[0], args.fastqs))
59+
else:
60+
if values[2] != values[-1]:
61+
text.append(",".join([runs[0], values[1], values[2], values[-1]]))
62+
else:
63+
text.append(",".join([runs[0], values[1], values[2]]))
64+
else:
65+
text.append(",".join(values))
5966

67+
with open('%s_libraries.csv' % sample, 'w') as f:
68+
f.write('fastqs,sample,library_type\n')
69+
f.write('\n'.join(text))
70+
else:
71+
import pandas as pd
72+
df = pd.read_csv(args.file_name, header=0)
73+
samples = df['Sample'].unique()
74+
for sample in samples:
75+
subdf = df[df['Sample'] == sample].copy()
76+
# If fastqs is provided, try to match each row's Flowcell to a fastq path
77+
if args.fastqs is not None:
78+
matched_fastqs = []
79+
for idx, row in subdf.iterrows():
80+
# Try to match Flowcell in fastqs path
81+
matches = [fq for fq in fastqs if str(row['Flowcell']) in fq]
82+
if len(matches) == 1:
83+
matched_fastqs.append(matches[0])
84+
elif len(matches) > 1:
85+
sys.exit(f"Multiple matches for Flowcell '{row['Flowcell']}' in fastqs: {matches}")
86+
else:
87+
matched_fastqs.append("") # Or handle as needed
88+
subdf.insert(0, 'fastqs', matched_fastqs)
89+
# Write all columns for this sample to a new CSV
90+
out_file = f"{sample}_libraries.csv"
91+
subdf.to_csv(out_file, index=False)
6092
#print(samples)
6193

6294

0 commit comments

Comments
 (0)