Complete API documentation for programmatic use.
pip install raptor-rnaseq
import raptorProfile RNA-seq count data for pipeline recommendation.
from raptor import RNAseqDataProfiler
import pandas as pd
# Load data
counts = pd.read_csv('counts.csv', index_col=0)
metadata = pd.read_csv('metadata.csv')
# Create profiler
profiler = RNAseqDataProfiler(counts, metadata)
# Run profiling
profile = profiler.profile()
# Access results
print(f"BCV: {profile['bcv']:.3f}")
print(f"Mean depth: {profile['mean_depth']:.0f}")
print(f"Samples: {profile['n_samples']}")Methods:
profile(): Run complete profiling, returns dictcalculate_bcv(): Calculate biological variationassess_depth(): Analyze sequencing depthdetect_outliers(): Identify problematic samplesgenerate_plots(): Create diagnostic plots
Profile Output:
{
'n_samples': 12,
'n_genes': 15234,
'bcv': 0.42,
'bcv_category': 'medium',
'mean_depth': 25000000,
'depth_category': 'high',
'zero_inflation': 0.15,
'library_size_cv': 0.18,
'outliers': [],
'quality_flags': []
}Recommend optimal pipelines based on data profile.
from raptor import PipelineRecommender
recommender = PipelineRecommender()
# Get recommendations
recommendations = recommender.recommend(profile)
# Top recommendation
top = recommendations[0]
print(f"Pipeline: {top['pipeline_name']}")
print(f"Score: {top['score']:.2f}")
print(f"Reasoning: {top['reasoning']}")Methods:
recommend(profile, n=3): Get top N recommendationsscore_pipeline(pipeline_id, profile): Score specific pipelinecompare_pipelines(profile): Compare all pipelines
Recommendation Output:
[
{
'pipeline_id': 3,
'pipeline_name': 'Salmon-edgeR',
'score': 0.88,
'confidence': 'high',
'reasoning': 'Excellent balance...',
'expected_runtime': 1.2,
'expected_memory': 10,
'expected_accuracy': 0.90
},
...
]Run and benchmark multiple pipelines.
from raptor import PipelineBenchmark
benchmark = PipelineBenchmark(
data_dir='fastq/',
output_dir='results/',
threads=8,
memory='32G'
)
# Run pipelines
results = benchmark.run_pipelines([1, 3, 4])
# Save results
benchmark.save_results(results)Methods:
run_pipelines(pipeline_ids): Run specified pipelinesrun_single_pipeline(pipeline_id): Run one pipelinesave_results(results): Save to JSONload_results(): Load from JSON
Generate simulated RNA-seq data.
from raptor import DataSimulator
simulator = DataSimulator(
n_genes=2000,
n_samples=6,
n_de=400,
fold_changes=[0.5, 2.0],
seed=42
)
summary = simulator.generate_data('sim_data/')Methods:
generate_data(output_dir): Create simulated data- Quick presets:
quick_simulate(output_dir, size='small')
Generate HTML/PDF reports.
from raptor import ReportGenerator
generator = ReportGenerator()
# Profile report
generator.generate_profile_report(
profile,
recommendations,
output='report.html'
)
# Benchmark report
generator.generate_benchmark_report(
results,
output='comparison.html'
)Methods:
generate_profile_report(): Profiling + recommendationsgenerate_benchmark_report(): Pipeline comparisonsgenerate_full_report(): Complete analysis
from raptor.utils import *
# File operations
ensure_dir('output/')
check_file_exists('data.csv')
# System checks
check_command_exists('STAR')
check_required_tools(['STAR', 'salmon', 'kallisto'])
get_available_memory() # Returns GB
get_cpu_count() # Returns cores
# Data validation
validate_count_matrix(counts)
validate_metadata(metadata, counts)
# Configuration
config = load_config('config.yaml')
save_config(config, 'my_config.yaml')
# Formatting
format_time(3665) # "1h 1m 5s"
format_bytes(1536000) # "1.5 MB"import pandas as pd
from raptor import (
RNAseqDataProfiler,
PipelineRecommender,
PipelineBenchmark,
ReportGenerator
)
# 1. Load data
counts = pd.read_csv('counts.csv', index_col=0)
metadata = pd.read_csv('metadata.csv')
# 2. Profile data
profiler = RNAseqDataProfiler(counts, metadata)
profile = profiler.profile()
# 3. Get recommendations
recommender = PipelineRecommender()
recommendations = recommender.recommend(profile, n=3)
# 4. Generate report
report_gen = ReportGenerator()
report_gen.generate_profile_report(
profile,
recommendations,
output='my_recommendation.html'
)
# 5. Optional: Run benchmark
top_3 = [r['pipeline_id'] for r in recommendations]
benchmark = PipelineBenchmark(
data_dir='fastq/',
output_dir='benchmark_results/'
)
results = benchmark.run_pipelines(top_3)
# 6. Generate comparison report
report_gen.generate_benchmark_report(
results,
output='my_comparison.html'
)Load and modify configuration:
from raptor.utils import load_config, save_config
# Load config
config = load_config('config/config.yaml')
# Modify settings
config['resources']['default_threads'] = 16
config['statistics']['fdr_threshold'] = 0.01
# Save
save_config(config, 'my_custom_config.yaml')
# Use in profiler
profiler = RNAseqDataProfiler(
counts,
metadata,
config_file='my_custom_config.yaml'
)from raptor import RNAseqDataProfiler
from raptor.utils import validate_count_matrix
try:
# Validate first
validate_count_matrix(counts)
# Then profile
profiler = RNAseqDataProfiler(counts, metadata)
profile = profiler.profile()
except FileNotFoundError as e:
print(f"File not found: {e}")
except ValueError as e:
print(f"Invalid data: {e}")
except Exception as e:
print(f"Unexpected error: {e}")from raptor.utils import setup_logging
import logging
# Setup logging
setup_logging(level='DEBUG', log_file='raptor.log')
# Use logger
logger = logging.getLogger('raptor')
logger.info("Starting analysis...")import raptor
print(raptor.__version__)
print(raptor.__author__)
print(raptor.__email__)
# Get environment info
from raptor.utils import get_environment_info
info = get_environment_info()
print(info)All classes use type hints:
from typing import Dict, List
from pathlib import Path
def my_analysis(
counts: pd.DataFrame,
metadata: pd.DataFrame,
output_dir: Path
) -> Dict[str, float]:
profiler = RNAseqDataProfiler(counts, metadata)
return profiler.profile()See source code for complete type annotations.