-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcum_hist_line.py
More file actions
executable file
·174 lines (153 loc) · 6.49 KB
/
cum_hist_line.py
File metadata and controls
executable file
·174 lines (153 loc) · 6.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env python3
"""Plot a line plot version of a cumulative histogram.
Accepts a column name with a min/max/step.
Data is found via TSV config file.
Each line in the config file has the format:
<statistics TSV>\t<format index>\t<label>
Format indexes 0-7 use COLORS with linestyle -
Next 8-15 use linestyle --, 16-23 :
Don't use more than that, seriously
For running this script outside the Snakemake,
the config file may have four columns where the
second is a string interpretable by Matplotlib
as a color, and the third is a Matplotlib linestyle.
There are also a bunch of options to customize dimensions
in the final plot, which the Snakemake does not use, but
those options are meant for ad hoc runs.
"""
import argparse # for command-line argument parsing
import csv # for reading TSV files
import numpy as np # for numerical operations
import matplotlib.pyplot as plt # for plotting
from typing import List # for type hints
# https://davidmathlogic.com/colorblind/
COLORS = ['#E69F00', '#56B4E9', '#009E73', '#0072B2',
'#D55E00', '#CC79A7', '#F0E442', '#000000']
"""Default color palette"""
def parse_args() -> argparse.Namespace:
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(
description="Plot cumulative histograms as line plots")
parser.add_argument("--column-name", "-c", type=str,
help="Column name to plot")
parser.add_argument("--min-val", "-m", type=float,
help="Minimum value for histogram bins")
parser.add_argument("--max-val", "-M", type=float,
help="Maximum value for histogram bins")
parser.add_argument("--step", "-s", type=float,
help="Step size for histogram bins")
parser.add_argument("--title", "-t", type=str, help="Title for the plot")
parser.add_argument("--output-file", "-o", type=str, help="Output file")
parser.add_argument("--width", type=float, default=8,
help="Figure width")
parser.add_argument("--height", type=float, default=4.5,
help="Figure height")
parser.add_argument("--legend-shrink", type=float, default=0.5,
help="How much to shrink the plot for legend")
parser.add_argument("--legend-x", type=float, default=1.02,
help="Legend x coordinate")
parser.add_argument("--legend-y", type=float, default=1.02,
help="Legend y coordinate")
parser.add_argument("config", type=str, help="Path to the config file")
return parser.parse_args()
def read_config(config_file: str, column_name: str) -> List[tuple]:
"""Read and use a config file
Parameters
----------
config_file : str
Path to the config file.
column_name : str
Name of the column to read from the TSV files.
Returns
-------
List[tuple]
Each tuple contains ([values], color, linestyle, label).
"""
config = []
with open(config_file, 'r') as file:
for line in file:
parts = line.strip().split('\t')
if len(parts) != 3 and len(parts) != 4:
raise ValueError("Each row must have either 3 or 4 columns.")
with open(parts[0]) as tsv:
values = [float(row[column_name])
for row in csv.DictReader(tsv, delimiter='\t')]
format_index = parts[1]
if len(parts) == 3:
# Interpret format index (see file docstring)
try:
format_index = int(parts[1])
if 0 <= format_index <= 7:
linestyle = '-'
elif 8 <= format_index <= 15:
linestyle = '--'
elif 16 <= format_index <= 23:
linestyle = ':'
else:
raise ValueError()
except ValueError:
raise ValueError('Format index must be an integer 0-23')
color = COLORS[format_index % 8]
label = parts[2]
elif len(parts) == 4:
# Color & linestyle are explicit
color = parts[1]
linestyle = parts[2]
label = parts[3]
config.append((values, color, linestyle, label))
return config
def plot_cumulative_line(to_plot: List[tuple], column_name: str,
bins: np.ndarray, title: str, output_file: str,
width: float, height: float, legend_shrink: float,
legend_x: float, legend_y: float) -> None:
"""Plot a cumulative histogram as a line plot.
Parameters
----------
to_plot : List[tuple]
List of tuples containing ([values], color, linestyle, label).
column_name : str
Name of column for labels & image name.
bins : np.ndarray
Histogram bins.
title : str
Title for the plot.
output_file : str
Output file name.
width : float
Figure width.
height : float
Figure height.
legend_shrink : float
How much to shrink the plot to make room for the legend.
legend_x : float
X-coordinate of legend
legend_y : float
Y-coordinate of legend
"""
plt.figure(figsize=(width, height))
bin_centers = (bins[:-1] + bins[1:]) / 2
for values, color, linestyle, label in to_plot:
counts, _ = np.histogram(values, bins=bins)
if len(values) == 0:
print(f"Warning: No values to plot for label '{label}'")
else:
cum_counts = np.cumsum(counts[::-1])[::-1] / len(values) * 100
plt.plot(bin_centers, cum_counts, color=color,
linestyle=linestyle, label=label)
plt.grid()
plt.title(title)
plt.xlabel(column_name, fontsize=13)
plt.ylabel('% reads ≥', fontsize=13)
# Shrink plot area to make room for legend
plt.subplots_adjust(right=legend_shrink)
plt.legend(bbox_to_anchor=(legend_x, legend_y), loc='upper left')
plt.savefig(output_file)
plt.close()
if __name__ == '__main__':
args = parse_args()
max_bin = args.max_val + args.step * 1 # Include last bin as well
bins = np.arange(args.min_val, max_bin, args.step)
config = read_config(args.config, args.column_name)
plot_cumulative_line(config, args.column_name, bins, args.title,
args.output_file, args.width, args.height,
args.legend_shrink, args.legend_x, args.legend_y)