-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathtest_process_BESS_table_performance.py
More file actions
107 lines (85 loc) · 3.49 KB
/
test_process_BESS_table_performance.py
File metadata and controls
107 lines (85 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
"""
Performance test script for process_BESS_table function.
This script loads a sample of data from the ECOv002 calibration/validation
inputs CSV and runs it through process_BESS_table to analyze performance.
"""
import logging
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from pytictoc import TicToc
# Configure logging to show timing information
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Import BESS-JPL package
from BESS_JPL.process_BESS_table import process_BESS_table
def main():
"""Run performance test on process_BESS_table with sample data."""
timer = TicToc()
# Path to input CSV file
csv_path = Path(__file__).parent / "BESS_JPL" / "ECOv002-cal-val-BESS-JPL-inputs.csv"
if not csv_path.exists():
print(f"Error: Input CSV file not found at {csv_path}")
sys.exit(1)
print("=" * 80)
print("BESS-JPL process_BESS_table Performance Test")
print("=" * 80)
# Load the full dataset
print(f"\nLoading data from: {csv_path}")
timer.tic()
base_df = pd.read_csv(csv_path)
elapsed = timer.tocvalue()
print(f"Loaded {len(base_df)} rows in {elapsed:.2f} seconds")
# Duplicate the data 10 times for larger volume testing
print(f"\nDuplicating data 10 times for larger volume testing...")
timer.tic()
input_df = pd.concat([base_df] * 10, ignore_index=True)
elapsed = timer.tocvalue()
print(f"Created duplicated dataset with {len(input_df)} rows in {elapsed:.2f} seconds")
print(f"\nInput data shape: {input_df.shape}")
print(f"Columns: {len(input_df.columns)}")
# Display sample info
if 'geometry' in input_df.columns:
print(f"Geometry column present: Yes")
if 'time_UTC' in input_df.columns:
print(f"Time range: {input_df['time_UTC'].min()} to {input_df['time_UTC'].max()}")
print(f"Unique times: {input_df['time_UTC'].nunique()}")
# Run process_BESS_table with timing
print(f"\nRunning process_BESS_table on full dataset (offline mode)...")
timer.tic()
try:
output_df = process_BESS_table(
input_df,
GEOS5FP_connection=None,
verbose=True,
offline_mode=True # Use offline mode to avoid external data fetches
)
total_elapsed = timer.tocvalue()
print(f"\n{'=' * 80}")
print(f"COMPLETED in {total_elapsed:.2f} seconds")
print(f"{'=' * 80}")
print(f"Output shape: {output_df.shape}")
print(f"Processing rate: {len(input_df) / total_elapsed:.2f} rows/second")
print(f"Time per row: {total_elapsed / len(input_df) * 1000:.2f} milliseconds")
# Show some output columns
new_columns = set(output_df.columns) - set(input_df.columns)
if new_columns:
print(f"\nNew columns added ({len(new_columns)}): {sorted(list(new_columns))[:10]}...")
# Check for any issues
null_counts = output_df.isnull().sum()
if null_counts.sum() > 0:
print(f"\nWarning: Found null values in output")
print(null_counts[null_counts > 0].head())
except Exception as e:
print(f"\nERROR: {type(e).__name__}: {e}")
import traceback
traceback.print_exc()
print(f"\n{'=' * 80}")
print("Performance test complete")
print(f"{'=' * 80}\n")
if __name__ == "__main__":
main()