-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBinnedPlotter.py
More file actions
136 lines (131 loc) · 6.93 KB
/
BinnedPlotter.py
File metadata and controls
136 lines (131 loc) · 6.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#Import necessary libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
""" BinnedIntensityPlotter class for processing and plotting binned intensity data made by Suyash v 26/05/2025
This class reads intensity data from CSV files, bins the data based on specified bin edges, and plots the binned data.
It also allows for treatment-specific data handling and visualization.
The class includes methods for:
- Initializing with file paths, bin edges, and treatment mapping
- Processing data from CSV files to extract and bin intensity values
- Importing binned data from a DataFrame
- Plotting the binned intensity data with treatment differentiation
- Getting treatment names based on position numbers
- Averaging different experiments and treatments to create a binned intensity DataFrame
- Changing the parameter for plotting (e.g., Mean, Median, etc.) is also possible as long as it is a Series vs Value function
"""
class BinnedIntensityPlotter:
def __init__(self, intensity_files, bin_edges, save_folder, treatment_map,timeframe : float):
"""
intensity_files: list of csv file paths
bin_edges: array-like, bin edges for time binning
save_folder: str, folder to save plots
treatment_map: dict, e.g. {"Control": [3, 5], "K4K8MO": [6, 7, 9]}
"""
self.intensity_files = intensity_files
self.bin_edges = bin_edges
self.save_folder = save_folder
self.treatment_map = treatment_map
self.intensity_data = pd.DataFrame()
self.binned_intensity_data = pd.DataFrame()
self.treatment_indices = {k: [] for k in treatment_map}
self.binned_intensity_data["Time (hpf)"] = []
self.timeframe = timeframe
def get_treatment(self, pos_number):
"""
Returns the treatment name for a given position number.
"""
for treatment, pos_list in self.treatment_map.items():
if pos_number in pos_list:
return treatment
return None
def process_data(self,plotparam:str="Mean"):
"""
Reads intensity files, assigns treatments, bins data, and stores results.
"""
bin_centers = 0.5 * (self.bin_edges[1:] + self.bin_edges[:-1])
if "Time (hpf)" not in self.binned_intensity_data.columns:
self.binned_intensity_data["Time (hpf)"] = bin_centers
for file in self.intensity_files:
date=file.split("/")[-1].split("_")[0]
posstr=file.split("Pos")[1][0:3]
posnumber = int(posstr)
treatment = self.get_treatment(posnumber)
if treatment is None:
print(f"Treatment not found for position {posnumber} in file {file}. Skipping.")
continue
data= pd.read_csv(file)
data["Label"] = f"{date}_Pos{posstr}"
data["Treatment"] = treatment
# Change the first " " column to time in hours post fertilization (hpf) [timeframe in seconds +4.5 hrs]
data["Time"] = [(x - 1) * self.timeframe/60/60+4 for x in data[" "]]
plt.plot(data["Time"], data[plotparam], label=posstr, alpha=0.5)
plt.legend()
self.intensity_data = pd.concat([self.intensity_data, data])
# Bin the data
bin_averages = [
np.mean(data["Mean"][(data["Time"] > self.bin_edges[i]) & (data["Time"] < self.bin_edges[i + 1])])
for i in range(len(bin_centers))
]
col_name = f"{treatment}_{plotparam}_{date}_Pos{posstr}"
#check if the column is new
if col_name not in self.binned_intensity_data.columns:
self.binned_intensity_data[col_name] = bin_averages
self.treatment_indices[treatment].append(col_name)
else:
print(f"Column {col_name} already exists in binned intensity data. Skipping.")
# Ensure "Time (hpf)" is the first column
self.binned_intensity_data = self.binned_intensity_data[["Time (hpf)"] + [col for col in self.binned_intensity_data.columns if col != "Time (hpf)"]]
#export binned intensity data
self.binned_intensity_data.to_csv(os.path.join(self.save_folder, "binned_intensity_data.csv"), index=False)
return self.binned_intensity_data
def import_data(self, binned_intdf):
""" Imports binned intensity data from a DataFrame.
binned_intdf: DataFrame containing binned intensity data
"""
self.binned_intensity_data = binned_intdf
# Extract treatment indices from the DataFrame columns
for col in self.binned_intensity_data.columns:
if col.startswith("Control_"):
self.treatment_indices["Control"].append(col)
elif col.startswith("K4K8MO_"):
self.treatment_indices["K4K8MO"].append(col)
# Ensure "Time (hpf)" is the first column
if "Time (hpf)" not in self.binned_intensity_data.columns:
raise ValueError("DataFrame must contain 'Time (hpf)' column.")
def plot_data(self):
fig, ax = plt.subplots(figsize=(7, 5.3))
plt.rcParams['figure.dpi'] = 100
plt.rcParams['font.size'] = 24
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
ax.set_xlim(4.0, 10.0)
ax.set_xticks(np.arange(4.0, 10.0, 0.5),minor=True)
#ax.set_xticklabels(["4.5", "", "", "", "5.5", "", "", "", "6.5", "", "", "", "7.5", "", "", "", "8.5", ""])
ax.set_yticks(np.arange(0, 470, 25), minor=True)
#split self.binned_intensity_data into control and k4k8
control_cols = self.treatment_indices.get("Control", [])
k4k8_cols = self.treatment_indices.get("K4K8MO", [])
if control_cols:
control_data = self.binned_intensity_data[control_cols]
ax.plot(self.binned_intensity_data["Time (hpf)"], control_data.mean(axis=1), color="#83bb03", linewidth=2)
ax.fill_between(
self.binned_intensity_data["Time (hpf)"],
control_data.mean(axis=1) - control_data.sem(axis=1),
control_data.mean(axis=1) + control_data.sem(axis=1),
color="#83bb03", alpha=0.3
)
if k4k8_cols:
k4k8_data = self.binned_intensity_data[k4k8_cols]
ax.plot(self.binned_intensity_data["Time (hpf)"], k4k8_data.mean(axis=1), color="#ff7f00", linewidth=2)
ax.fill_between(
self.binned_intensity_data["Time (hpf)"],
k4k8_data.mean(axis=1) - k4k8_data.sem(axis=1),
k4k8_data.mean(axis=1) + k4k8_data.sem(axis=1),
color="#ff7f00", alpha=0.3
)
plt.show()