Statistical analysis tools for root trait data from SLEAP Roots.
# Clone the repository
git clone https://github.com/talmolab/sleap-roots-analyze.git
cd sleap-roots-analyze
# Install with uv
uv sync --group dev # Includes development dependenciesfrom sleap_roots_analyze.data_cleanup import (
load_trait_data,
get_trait_columns,
remove_nan_samples,
)
# Load data
df = load_trait_data("path/to/traits.csv")
# Get trait columns (excludes metadata automatically)
trait_cols = get_trait_columns(df)
# Remove samples with >20% missing data
df_clean, df_removed, stats = remove_nan_samples(
df, trait_cols, max_nan_fraction=0.2
)from sleap_roots_analyze.statistics import calculate_heritability_estimates
# Calculate heritability for all traits
h2_results = calculate_heritability_estimates(
df_clean,
trait_cols,
genotype_col="geno",
replicate_col="rep"
)
# Filter low heritability traits
h2_results, df_filtered, removed, details = calculate_heritability_estimates(
df_clean,
trait_cols,
remove_low_h2=True,
h2_threshold=0.3
)from sleap_roots_analyze.pca import perform_pca_analysis
# Run PCA with automatic component selection
result = perform_pca_analysis(
df_filtered,
standardize=True,
explained_variance_threshold=0.95
)
# Access results
pca_model = result['pca']
transformed_data = result['transformed_data']
loadings = result['loadings']from sleap_roots_analyze.outlier_detection import (
detect_outliers_mahalanobis,
detect_outliers_isolation_forest,
remove_outliers_from_data
)
# Detect outliers using Mahalanobis distance
outliers_maha = detect_outliers_mahalanobis(
df_filtered[trait_cols],
use_robust=True
)
# Or use Isolation Forest for complex patterns
outliers_iso = detect_outliers_isolation_forest(
df_filtered[trait_cols],
contamination=0.1
)
# Remove outliers from data
df_clean, df_outliers = remove_outliers_from_data(
df_filtered,
outliers_maha['outlier_indices'],
return_outliers=True
)from sleap_roots_analyze.visualization import (
create_heritability_plot,
create_pca_biplot,
create_feature_contribution_heatmap,
create_phenotype_variation_plot,
save_publication_figure
)
# Create heritability plot
fig = create_heritability_plot(h2_results, threshold=0.3)
# Create PCA biplot
fig_biplot = create_pca_biplot(
pca_result,
color_by="geno",
metadata_df=df_filtered[["Barcode", "geno"]]
)
# Create feature contribution heatmap
fig_heatmap = create_feature_contribution_heatmap(
pca_result['feature_contributions'],
n_components=5
)
# Save in publication format
save_publication_figure(fig, "heritability", formats=["pdf", "png"])from sleap_roots_analyze.pca import run_pca_and_export_artifacts
# Run comprehensive PCA analysis with CSV exports
results = run_pca_and_export_artifacts(
df_filtered,
trait_cols=trait_cols,
analysis_dir="pca_results",
n_components=10,
save_csv=True,
save_prefix="experiment1_"
)
# Access results DataFrames
loadings_df = results['loadings_df']
pc_scores_df = results['pc_scores_df']
variance_df = results['variance_explained_df']
contributions_df = results['trait_variance_contributions_df']from sleap_roots_analyze.interactive_visualization import (
create_interactive_pca_with_images,
create_interactive_umap_with_hover_highlight,
create_trait_explorer_dashboard,
create_interactive_image_gallery
)
# Create interactive PCA with sample images
fig = create_interactive_pca_with_images(
pca_result,
image_paths, # Dict mapping sample IDs to image paths
show_images=True,
metadata_df=df_filtered[["Barcode", "geno"]]
)
# Interactive UMAP with hover highlights
fig_umap = create_interactive_umap_with_hover_highlight(
umap_result,
highlight_on_hover=True,
size=8
)
# Create comprehensive trait explorer dashboard
dashboard = create_trait_explorer_dashboard(
df_filtered,
trait_cols,
groupby_col="geno"
)
# Generate interactive HTML gallery with images
html = create_interactive_image_gallery(
image_paths,
metadata_df=df_filtered[["Barcode", "geno", "trait1"]],
images_per_row=4,
image_width=200
)- Data Cleaning: Automatic metadata detection, NaN handling, zero-inflated trait removal
- Statistical Analysis: Broad-sense heritability (H²), ANOVA, trait statistics
- PCA Analysis: Dimensionality reduction with automatic component selection, comprehensive export artifacts
- Outlier Detection: Mahalanobis, PCA reconstruction, and Isolation Forest methods
- Visualization: Publication-ready plots for heritability, PCA, outliers, and phenotype variation
- Interactive Visualization: Plotly-based interactive plots with image integration and hover effects
- UMAP Analysis: Non-linear dimensionality reduction for complex trait relationships
- Cross-Experiment Analysis: Compare and correlate data across multiple experiments
Expected CSV structure:
Barcode,geno,rep,trait1,trait2,trait3,...
BC001,Genotype1,1,100.5,200.3,50.2,...
BC002,Genotype1,2,102.3,195.8,48.9,...
Required columns:
- Genotype:
geno(configurable) - Replicate:
rep(configurable) - Sample ID:
Barcode(configurable) - Traits: Any numeric columns
# Run tests
uv run pytest
# Format code
uv run black src tests
# Lint code
uv run ruff check src tests
# Coverage report
uv run pytest --cov --cov-branchsleap-roots-analyze/
├── src/sleap_roots_analyze/
│ ├── data_cleanup.py # Data loading and cleaning
│ ├── statistics.py # Statistical analysis
│ ├── pca.py # PCA analysis
│ ├── outlier_detection.py # Outlier detection
│ ├── visualization.py # Plotting and visualization
│ ├── outlier_visualization.py # Outlier-specific plots
│ ├── interactive_visualization.py # Interactive Plotly visualizations
│ ├── cross_experiment_analysis.py # Cross-experiment comparisons
│ ├── umap.py # UMAP dimensionality reduction
│ └── data_utils.py # Utility functions
├── tests/ # Test suite (150+ tests)
├── docs/ # Documentation
└── pyproject.toml # Project configuration
GNU General Public License v3.0 - see LICENSE file.
@software{sleap_roots_analyze,
title = {SLEAP Roots Analyze},
author = {Elizabeth Berrigan},
year = {2025},
url = {https://github.com/talmolab/sleap-roots-analyze}
}