-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathimpute.py
41 lines (28 loc) · 1.09 KB
/
impute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
import pandas as pd
def impute_missing_values(df):
"""
Impute missing values with the minimum observed value for each protein.
Parameters:
df (pandas.DataFrame): DataFrame with raw protein expression data.
Returns:
df_imputed (pandas.DataFrame): DataFrame with imputed values for missing data.
"""
# Compute the minimum value for each protein
min_values = df.min()
# Impute missing values
df_imputed = df.fillna(min_values)
return df_imputed
if __name__ == "__main__":
import pandas as pd
# df = pd.read_csv('read_counts.csv', index_col=0)
df = pd.read_csv('read_counts.csv', sep = '\t')
from quality_control import filter_low_counts
df_filtered = filter_low_counts(df)
print(df)
print(df_filtered)
df_imputed = impute_missing_values(df_filtered)
# # Impute missing values
# df_imputed = impute_missing_values(df)
# # Now pass the imputed dataframe to the normalization function
# df, case_df_cpm, control_df_cpm = normalize_rnaseq_data(df_imputed, case_samples)