@@ -21,42 +21,74 @@ def main(raw_args=None):
2121 parser .add_argument ('fastqs' , metavar = "outs/fastq_path/HLGW3DRXX" ,
2222 nargs = '?' , action = "store" , type = str ,
2323 help = "Full path to FASTQ files, used to help fill in new library files. If missing will just use values in the libraries.csv file. Multiple paths should be comma delimited" )
24+ # parser.add_argument('--ocm', action='store_true', default=False,
25+ # help="Use this flag if OCM platform is used")
2426
2527 args = parser .parse_args (raw_args )
2628 if args .fastqs != None :
2729 fastqs = args .fastqs .split (',' )
2830
29-
30- with open (args .file_name ) as f :
31- headers = next (f ).strip ().split (',' )
32- #print(headers)
33- samples = dict ()
34- for line in f :
35- line = line .strip ().split (',' )
36- if line [0 ] in samples :
37- samples [line [0 ]].append (line [1 :])
38- else :
39- samples [line [0 ]] = [line [1 :]]
40-
41- for sample in samples :
42- text = []
43- for values in samples [sample ]:
44- if args .fastqs != None :
45- runs = [path for path in fastqs if values [0 ] in path ]
46- if len (runs ) != 1 :
47- sys .exit ("Problems finding unique match for %s in %s" % (values [0 ], args .fastqs ))
31+ ## Check if the input file is in OCM format
32+ import pandas as pd
33+ df = pd .read_csv (args .lib , header = 0 )
34+ if 'ocm_barcode_ids' in df .columns :
35+ if df ['ocm_barcode_ids' ].any ():
36+ print ("OCM platform detected, using OCM specific libraries file format" )
37+ args .ocm = True
38+ else :
39+ args .ocm = False
40+ if args .ocm == False :
41+ with open (args .file_name ) as f :
42+ headers = next (f ).strip ().split (',' )
43+ #print(headers)
44+ samples = dict ()
45+ for line in f :
46+ line = line .strip ().split (',' )
47+ if line [0 ] in samples :
48+ samples [line [0 ]].append (line [1 :])
4849 else :
49- if values [2 ] != values [- 1 ]:
50- text .append ("," .join ([runs [0 ], values [1 ], values [2 ], values [- 1 ]]))
51- else :
52- text .append ("," .join ([runs [0 ], values [1 ], values [2 ]]))
53- else :
54- text .append ("," .join (values ))
50+ samples [line [0 ]] = [line [1 :]]
5551
56- with open ('%s_libraries.csv' % sample , 'w' ) as f :
57- f .write ('fastqs,sample,library_type\n ' )
58- f .write ('\n ' .join (text ))
52+ for sample in samples :
53+ text = []
54+ for values in samples [sample ]:
55+ if args .fastqs != None :
56+ runs = [path for path in fastqs if values [0 ] in path ]
57+ if len (runs ) != 1 :
58+ sys .exit ("Problems finding unique match for %s in %s" % (values [0 ], args .fastqs ))
59+ else :
60+ if values [2 ] != values [- 1 ]:
61+ text .append ("," .join ([runs [0 ], values [1 ], values [2 ], values [- 1 ]]))
62+ else :
63+ text .append ("," .join ([runs [0 ], values [1 ], values [2 ]]))
64+ else :
65+ text .append ("," .join (values ))
5966
67+ with open ('%s_libraries.csv' % sample , 'w' ) as f :
68+ f .write ('fastqs,sample,library_type\n ' )
69+ f .write ('\n ' .join (text ))
70+ else :
71+ import pandas as pd
72+ df = pd .read_csv (args .file_name , header = 0 )
73+ samples = df ['Sample' ].unique ()
74+ for sample in samples :
75+ subdf = df [df ['Sample' ] == sample ].copy ()
76+ # If fastqs is provided, try to match each row's Flowcell to a fastq path
77+ if args .fastqs is not None :
78+ matched_fastqs = []
79+ for idx , row in subdf .iterrows ():
80+ # Try to match Flowcell in fastqs path
81+ matches = [fq for fq in fastqs if str (row ['Flowcell' ]) in fq ]
82+ if len (matches ) == 1 :
83+ matched_fastqs .append (matches [0 ])
84+ elif len (matches ) > 1 :
85+ sys .exit (f"Multiple matches for Flowcell '{ row ['Flowcell' ]} ' in fastqs: { matches } " )
86+ else :
87+ matched_fastqs .append ("" ) # Or handle as needed
88+ subdf .insert (0 , 'fastqs' , matched_fastqs )
89+ # Write all columns for this sample to a new CSV
90+ out_file = f"{ sample } _libraries.csv"
91+ subdf .to_csv (out_file , index = False )
6092 #print(samples)
6193
6294
0 commit comments