-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyze_pokemon_accuracy.py
More file actions
70 lines (58 loc) · 2.79 KB
/
analyze_pokemon_accuracy.py
File metadata and controls
70 lines (58 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
import argparse
from collections import defaultdict
# Parse command line arguments
parser = argparse.ArgumentParser(description='Analyze Pokémon recognition accuracy from LLM results')
parser.add_argument('json_file', help='Path to the JSON results file')
args = parser.parse_args()
# Read the JSON file
with open(args.json_file, 'r') as f:
data = json.load(f)
# Dictionary to store results for each Pokemon
pokemon_results = defaultdict(lambda: {'correct': 0, 'total': 0})
# Process all models and their responses
# Handle both old format (direct providers) and new format (default.providers)
if 'default' in data and 'providers' in data['default']:
# New format: data['default']['providers']
providers = data['default']['providers']
else:
# Old format: data directly contains providers
providers = data
for provider, provider_models in providers.items():
for model_name, model_data in provider_models.items():
for response in model_data['responses']:
pokemon = response['pokemon']
correct = response['correct']
pokemon_results[pokemon]['total'] += 1
if correct:
pokemon_results[pokemon]['correct'] += 1
# Calculate accuracy for each Pokemon and sort by success rate
pokemon_accuracy = []
for pokemon, results in pokemon_results.items():
accuracy = (results['correct'] / results['total']) * 100 if results['total'] > 0 else 0
pokemon_accuracy.append({
'pokemon': pokemon,
'correct': results['correct'],
'total': results['total'],
'accuracy': accuracy
})
# Sort by accuracy (descending) and then by total attempts (descending) for ties
pokemon_accuracy.sort(key=lambda x: (x['accuracy'], x['total']), reverse=True)
# Print results
print("Pokémon Recognition Accuracy (sorted by success rate)")
print("=" * 60)
print(f"{'Rank':<4} {'Pokémon':<15} {'Correct':<7} {'Total':<5} {'Accuracy':<8}")
print("-" * 60)
for i, pokemon_data in enumerate(pokemon_accuracy, 1):
print(f"{i:<4} {pokemon_data['pokemon']:<15} {pokemon_data['correct']:<7} {pokemon_data['total']:<5} {pokemon_data['accuracy']:<8.1f}%")
# Also show top 10 and bottom 10
print("\n" + "=" * 60)
print("TOP 10 MOST RECOGNIZABLE POKÉMON:")
print("=" * 60)
for i, pokemon_data in enumerate(pokemon_accuracy[:10], 1):
print(f"{i}. {pokemon_data['pokemon']} - {pokemon_data['correct']}/{pokemon_data['total']} ({pokemon_data['accuracy']:.1f}%)")
print("\n" + "=" * 60)
print("BOTTOM 10 LEAST RECOGNIZABLE POKÉMON:")
print("=" * 60)
for i, pokemon_data in enumerate(pokemon_accuracy[-10:], len(pokemon_accuracy)-9):
print(f"{i}. {pokemon_data['pokemon']} - {pokemon_data['correct']}/{pokemon_data['total']} ({pokemon_data['accuracy']:.1f}%)")