66import matplotlib .pyplot as plt
77import argparse
88import os
9+ import logging
10+ import sys
11+
12+ # Configure logging to stderr
13+ logging .basicConfig (
14+ level = logging .INFO ,
15+ format = '%(asctime)s - %(levelname)s - %(message)s' ,
16+ stream = sys .stderr
17+ )
918
1019def main ():
1120 parser = argparse .ArgumentParser ()
@@ -30,66 +39,97 @@ def main():
3039 )
3140 args = parser .parse_args ()
3241
33- # Load the data generated by pod5_to_df.py
34- if not os .path .exists (args .csv ):
35- print (f"Error: CSV file { args .csv } not found." )
36- return
42+ try :
43+ logging .info (f"Starting visualization for CSV: { args .csv } " )
44+
45+ # Load the data generated by pod5_to_df.py
46+ if not os .path .exists (args .csv ):
47+ logging .error (f"CSV file { args .csv } not found." )
48+ sys .exit (1 )
3749
38- raw_signal_df = pd .read_csv (args .csv )
50+ try :
51+ raw_signal_df = pd .read_csv (args .csv )
52+ logging .info (f"Successfully loaded CSV file with { len (raw_signal_df )} data points" )
53+ except Exception as e :
54+ logging .error (f"Failed to load CSV file { args .csv } : { e } " )
55+ raise
3956
40- # Plotting configuration
41- sns .set (font_scale = 1 )
42- sns .set_style ("white" )
43- fig , ax = plt .subplots (1 , 1 , figsize = (args .figwidth , args .figheight ))
44- x_feature , y_feature = 'time' , 'signal'
57+ # Plotting configuration
58+ try :
59+ sns .set (font_scale = 1 )
60+ sns .set_style ("white" )
61+ fig , ax = plt .subplots (1 , 1 , figsize = (args .figwidth , args .figheight ))
62+ x_feature , y_feature = 'time' , 'signal'
63+ logging .info ("Initialized plotting figure" )
64+ except Exception as e :
65+ logging .error (f"Failed to initialize plot: { e } " )
66+ raise
4567
46-
47- # Check if 'ann' column exists (requires Dorado move tags)
48- if 'ann' in raw_signal_df .columns :
49- # 1. Unannotated part (ann == -2)
50- df_neg2 = raw_signal_df [raw_signal_df ['ann' ] == - 2 ]
51- if not df_neg2 .empty :
52- sns .scatterplot (data = df_neg2 , x = x_feature , y = y_feature , color = 'blue' ,
53- label = 'unannotated part' , s = 50 , zorder = 4 , ax = ax )
68+
69+ # Check if 'ann' column exists (requires Dorado move tags)
70+ try :
71+ if 'ann' in raw_signal_df .columns :
72+ # 1. Unannotated part (ann == -2)
73+ df_neg2 = raw_signal_df [raw_signal_df ['ann' ] == - 2 ]
74+ if not df_neg2 .empty :
75+ sns .scatterplot (data = df_neg2 , x = x_feature , y = y_feature , color = 'blue' ,
76+ label = 'unannotated part' , s = 50 , zorder = 4 , ax = ax )
5477
55- # 2. Trimmed primer/adapter (ann == -1)
56- df_neg1 = raw_signal_df [raw_signal_df ['ann' ] == - 1 ]
57- if not df_neg1 .empty :
58- sns .lineplot (data = df_neg1 , x = x_feature , y = y_feature , color = 'green' ,
59- label = 'trimmed primer and adapter' , zorder = 2 , ax = ax )
78+ # 2. Trimmed primer/adapter (ann == -1)
79+ df_neg1 = raw_signal_df [raw_signal_df ['ann' ] == - 1 ]
80+ if not df_neg1 .empty :
81+ sns .lineplot (data = df_neg1 , x = x_feature , y = y_feature , color = 'green' ,
82+ label = 'trimmed primer and adapter' , zorder = 2 , ax = ax )
6083
61- # 3. Basecalled region (ann is 0 or 1)
62- df_base = raw_signal_df [raw_signal_df ['ann' ].isin ([0 , 1 ])]
63- if not df_base .empty :
64- sns .lineplot (data = df_base , x = x_feature , y = y_feature , color = 'orange' ,
65- label = 'basecalled region' , zorder = 3 , ax = ax )
84+ # 3. Basecalled region (ann is 0 or 1)
85+ df_base = raw_signal_df [raw_signal_df ['ann' ].isin ([0 , 1 ])]
86+ if not df_base .empty :
87+ sns .lineplot (data = df_base , x = x_feature , y = y_feature , color = 'orange' ,
88+ label = 'basecalled region' , zorder = 3 , ax = ax )
6689
67- # 5. Samples that emit bases (ann == 1) - Red Circles
68- df_emit = raw_signal_df [raw_signal_df ['ann' ] == 1 ]
69- if not df_emit .empty :
70- sns .scatterplot (data = df_emit , x = x_feature , y = y_feature , color = 'red' ,
71- label = 'samples that emit bases' , s = 50 , fc = "none" , ec = 'red' , zorder = 6 , ax = ax )
72- else :
73- # Fallback: Plot raw signal if 'ann' is missing (Move tags were likely missing in BAM)
74- print (f"Warning: 'ann' column missing in { args .csv } . Plotting raw signal only." )
75- sns .lineplot (data = raw_signal_df , x = x_feature , y = y_feature , color = 'grey' ,
76- alpha = 0.5 , label = 'raw signal (unannotated)' , ax = ax )
90+ # 5. Samples that emit bases (ann == 1) - Red Circles
91+ df_emit = raw_signal_df [raw_signal_df ['ann' ] == 1 ]
92+ if not df_emit .empty :
93+ sns .scatterplot (data = df_emit , x = x_feature , y = y_feature , color = 'red' ,
94+ label = 'samples that emit bases' , s = 50 , fc = "none" , ec = 'red' , zorder = 6 , ax = ax )
95+ logging .info ("Plotted annotated signal regions" )
96+ else :
97+ # Fallback: Plot raw signal if 'ann' is missing (Move tags were likely missing in BAM)
98+ logging .warning (f"'ann' column missing in { args .csv } . Plotting raw signal only." )
99+ sns .lineplot (data = raw_signal_df , x = x_feature , y = y_feature , color = 'grey' ,
100+ alpha = 0.5 , label = 'raw signal (unannotated)' , ax = ax )
101+ except Exception as e :
102+ logging .error (f"Error plotting signal regions: { e } " )
103+ raise
77104
78- # 4. Poly-A Tail Region (independent check for 'polyA' column)
79- if 'polyA' in raw_signal_df .columns :
80- df_polya = raw_signal_df [raw_signal_df ['polyA' ] > - 1 ]
81- if not df_polya .empty :
82- sns .lineplot (data = df_polya , x = x_feature , y = y_feature , color = 'magenta' ,
83- label = 'polyA-tail region' , zorder = 5 , linewidth = 2 , ax = ax )
105+ # 4. Poly-A Tail Region (independent check for 'polyA' column)
106+ try :
107+ if 'polyA' in raw_signal_df .columns :
108+ df_polya = raw_signal_df [raw_signal_df ['polyA' ] > - 1 ]
109+ if not df_polya .empty :
110+ sns .lineplot (data = df_polya , x = x_feature , y = y_feature , color = 'magenta' ,
111+ label = 'polyA-tail region' , zorder = 5 , linewidth = 2 , ax = ax )
112+ logging .info ("Plotted Poly-A tail region" )
113+ except Exception as e :
114+ logging .error (f"Error plotting Poly-A tail region: { e } " )
115+ raise
84116
85- ax .set (title = args .title )
86-
87- # Legend placement
88- ax .legend (bbox_to_anchor = (1.05 , 1 ), loc = 2 , borderaxespad = 0.0 )
89- plt .tight_layout ()
90- plt .savefig (args .output , dpi = 300 )
91- plt .close ()
92- print (f"Successfully saved plot to { args .output } " )
117+ try :
118+ ax .set (title = args .title )
119+
120+ # Legend placement
121+ ax .legend (bbox_to_anchor = (1.05 , 1 ), loc = 2 , borderaxespad = 0.0 )
122+ plt .tight_layout ()
123+ plt .savefig (args .output , dpi = 300 )
124+ plt .close ()
125+ logging .info (f"Successfully saved plot to { args .output } " )
126+ except Exception as e :
127+ logging .error (f"Failed to save plot to { args .output } : { e } " )
128+ raise
129+
130+ except Exception as e :
131+ logging .error (f"Fatal error in visualization: { e } " )
132+ sys .exit (1 )
93133
94134if __name__ == "__main__" :
95135 main ()
0 commit comments