-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.py
More file actions
63 lines (54 loc) · 2.35 KB
/
example.py
File metadata and controls
63 lines (54 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from simulation import generate_data
from confounder_analyzer import ConfounderAnalyzer
import pandas as pd
def main():
"""
Main function to demonstrate the confounder analysis workflow.
1. Generates synthetic data with a known causal structure.
2. Initializes the ConfounderAnalyzer.
3. Ranks potential confounders and prints the results.
"""
print("Step 1: Generating synthetic data...")
# Generate data with 5000 samples and 6 potential confounders
try:
df = generate_data(n_samples=5000, n_features=6)
print("Data generation complete.")
print(f"Generated {len(df)} samples and {len(df['cluster'].unique())} clusters.")
print("True confounders in simulation: Z1, Z2 (linear)")
print("Variable with non-linear effect on Y: Z3")
print("-" * 30)
except Exception as e:
print(f"Failed to generate data. Error: {e}")
return
# Define the list of potential confounders to be analyzed
confounder_candidates = [f'Z{i+1}' for i in range(6)]
print("\nStep 2: Initializing and running ConfounderAnalyzer...")
# Initialize the analyzer with the list of candidates
analyzer = ConfounderAnalyzer(
confounder_candidates=confounder_candidates,
n_boot=100, # Using 100 bootstrap samples for reasonable speed
pc_alpha=0.05,
random_state=42
)
# Run the full analysis
try:
final_ranking_df = analyzer.rank_confounders(df)
print("\nStep 3: Confounder Analysis Results")
print("-" * 30)
print("Final Confounder Ranking (higher score is stronger evidence):")
# Set display options for better formatting
pd.set_option('display.width', 100)
pd.set_option('display.max_columns', 10)
print(final_ranking_df)
print("\nAnalysis complete. Z1 and Z2 are expected to rank highest.")
# Generate and save the text report
print("\nStep 4: Generating and saving report...")
report_text = analyzer.generate_report(final_ranking_df)
report_filename = "confounder_analysis_report.txt"
with open(report_filename, "w") as f:
f.write(report_text)
print(f"Report saved to {report_filename}")
except Exception as e:
print(f"An error occurred during confounder analysis. Error: {e}")
if __name__ == '__main__':
main()