Kilo-Org
diff --git a/‎src/commands/models.py‎
Lines changed: 0 additions & 635 deletions b/‎src/commands/models.py‎
Lines changed: 0 additions & 635 deletions
diff --git a/‎src/commands/models/__init__.py‎
Lines changed: 30 additions & 0 deletions b/‎src/commands/models/__init__.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎src/commands/models/cache.py‎
Lines changed: 76 additions & 0 deletions b/‎src/commands/models/cache.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎src/commands/models/costs.py‎
Lines changed: 117 additions & 0 deletions b/‎src/commands/models/costs.py‎
Lines changed: 117 additions & 0 deletions
diff --git a/‎src/commands/models/info.py‎
Lines changed: 137 additions & 0 deletions b/‎src/commands/models/info.py‎
Lines changed: 137 additions & 0 deletions
@@ -0,0 +1,30 @@
+"""
+Models Command Group
+
+Model management commands for alex-treBENCH.
+"""
+
+import click
+from .list import models_list
+from .search import models_search
+from .info import models_info
+from .refresh import models_refresh
+from .cache import models_cache
+from .test import models_test
+from .costs import models_costs
+
+
+@click.group()
+def models():
+    """Model management commands."""
+    pass
+
+
+# Register all subcommands
+models.add_command(models_list, name='list')
+models.add_command(models_search, name='search')
+models.add_command(models_info, name='info')
+models.add_command(models_refresh, name='refresh')
+models.add_command(models_cache, name='cache')
+models.add_command(models_test, name='test')
+models.add_command(models_costs, name='costs')
@@ -0,0 +1,76 @@
+"""
+Models Cache Command
+
+Manage model cache.
+"""
+
+import click
+from rich.console import Console
+from rich.table import Table
+
+from src.utils.logging import get_logger
+
+console = Console()
+logger = get_logger(__name__)
+
+
+@click.command()
+@click.option('--clear', is_flag=True, help='Clear the model cache')
+@click.option('--info', is_flag=True, help='Show detailed cache information', default=True)
+@click.pass_context
+def models_cache(ctx, clear, info):
+    """Manage model cache."""
+    try:
+        from src.models.model_cache import get_model_cache
+        
+        cache = get_model_cache()
+        
+        if clear:
+            if cache.clear_cache():
+                console.print("[green]✓ Model cache cleared[/green]")
+            else:
+                console.print("[red]✗ Failed to clear cache[/red]")
+            return
+        
+        if info:
+            cache_info = cache.get_cache_info()
+            
+            # Cache status table
+            status_table = Table(title="Model Cache Status")
+            status_table.add_column("Property", style="cyan")
+            status_table.add_column("Value", style="green")
+            
+            status_table.add_row("Cache Path", cache_info['cache_path'])
+            status_table.add_row("Exists", "✓ Yes" if cache_info['exists'] else "✗ No")
+            status_table.add_row("Valid", "✓ Yes" if cache_info['valid'] else "✗ No")
+            status_table.add_row("TTL", f"{cache_info['ttl_seconds']} seconds")
+            
+            if cache_info['exists']:
+                status_table.add_row("Size", f"{cache_info['size_bytes']:,} bytes")
+                status_table.add_row("Model Count", str(cache_info['model_count']))
+                
+                if cache_info['cached_at']:
+                    status_table.add_row("Cached At", cache_info['cached_at'])
+                
+                if cache_info['age_seconds'] is not None:
+                    age_mins = cache_info['age_seconds'] / 60
+                    age_hours = age_mins / 60
+                    if age_hours > 1:
+                        age_str = f"{age_hours:.1f} hours"
+                    else:
+                        age_str = f"{age_mins:.1f} minutes"
+                    status_table.add_row("Age", age_str)
+            
+            console.print(status_table)
+            
+            # Cache recommendations
+            if not cache_info['exists']:
+                console.print("\n[yellow]💡 Run 'models refresh' to populate the cache[/yellow]")
+            elif not cache_info['valid']:
+                console.print("\n[yellow]💡 Cache has expired. Run 'models refresh' to update[/yellow]")
+            else:
+                console.print("\n[green]💡 Cache is up to date[/green]")
+            
+    except Exception as e:
+        console.print(f"[red]Error managing cache: {str(e)}[/red]")
+        logger.exception("Cache management failed")
@@ -0,0 +1,117 @@
+"""
+Models Costs Command
+
+Estimate costs for running benchmarks with a model.
+"""
+
+import asyncio
+import click
+from rich.console import Console
+from rich.table import Table
+
+from src.utils.logging import get_logger
+
+console = Console()
+logger = get_logger(__name__)
+
+
+@click.command()
+@click.option('--model', '-m', required=True, help='Model ID to estimate costs for')
+@click.option('--questions', '-q', type=int, default=100, help='Number of questions')
+@click.option('--input-tokens', type=int, help='Average input tokens per question')
+@click.option('--output-tokens', type=int, help='Average output tokens per question')
+@click.pass_context
+def models_costs(ctx, model, questions, input_tokens, output_tokens):
+    """Estimate costs for running benchmarks with a model."""
+    
+    async def calculate_costs_async():
+        try:
+            from src.models.model_registry import model_registry
+            from src.models.cost_calculator import CostCalculator
+            
+            # Validate model using dynamic system
+            models = await model_registry.get_available_models()
+            model_info = None
+            
+            for m in models:
+                if m.get('id', '').lower() == model.lower():
+                    model_info = m
+                    break
+            
+            if not model_info:
+                console.print(f"[red]Model not found: {model}[/red]")
+                console.print("[dim]Use 'models list' or 'models search' to find available models[/dim]")
+                return
+            
+            # Use defaults if not specified - fix variable scoping
+            default_input_tokens = 100
+            default_output_tokens = 50
+            
+            config = ctx.obj.get('config') if ctx.obj else None
+            if config and hasattr(config, 'costs') and hasattr(config.costs, 'estimation'):
+                try:
+                    default_input_tokens = getattr(config.costs.estimation, 'default_input_tokens_per_question', 100)
+                    default_output_tokens = getattr(config.costs.estimation, 'default_output_tokens_per_question', 50)
+                except AttributeError:
+                    pass  # Use defaults
+            
+            # Apply the values - use different variable names to avoid shadowing
+            actual_input_tokens = input_tokens if input_tokens is not None else default_input_tokens
+            actual_output_tokens = output_tokens if output_tokens is not None else default_output_tokens
+            
+            # Calculate costs using the proper ModelRegistry method
+            total_input_tokens = questions * actual_input_tokens
+            total_output_tokens = questions * actual_output_tokens
+            total_tokens = total_input_tokens + total_output_tokens
+            
+            # Use ModelRegistry.estimate_cost for proper cost calculation
+            from src.models.model_registry import ModelRegistry
+            total_cost = ModelRegistry.estimate_cost(model, total_input_tokens, total_output_tokens)
+            input_cost = ModelRegistry.estimate_cost(model, total_input_tokens, 0)
+            output_cost = ModelRegistry.estimate_cost(model, 0, total_output_tokens)
+            cost_per_question = total_cost / questions if questions > 0 else 0
+            
+            # Get pricing information for display purposes
+            pricing = model_info.get('pricing', {})
+            input_cost_per_1m = pricing.get('input_cost_per_1m_tokens', 0)
+            output_cost_per_1m = pricing.get('output_cost_per_1m_tokens', 0)
+            
+            # If not found in dynamic model info, try static config
+            if input_cost_per_1m == 0 and output_cost_per_1m == 0:
+                static_config = ModelRegistry.get_model_config(model)
+                if static_config:
+                    input_cost_per_1m = static_config.input_cost_per_1m_tokens
+                    output_cost_per_1m = static_config.output_cost_per_1m_tokens
+            
+            # Display estimate
+            table = Table(title=f"Cost Estimate: {model_info.get('name', model)}")
+            table.add_column("Parameter", style="cyan")
+            table.add_column("Value", style="green")
+            
+            table.add_row("Model ID", model)
+            table.add_row("Model Name", model_info.get('name', 'N/A'))
+            table.add_row("Provider", (model_info.get('provider', 'Unknown')).title())
+            table.add_row("Questions", f"{questions:,}")
+            table.add_row("Input Tokens per Question", f"{actual_input_tokens:,}")
+            table.add_row("Output Tokens per Question", f"{actual_output_tokens:,}")
+            table.add_row("Total Input Tokens", f"{total_input_tokens:,}")
+            table.add_row("Total Output Tokens", f"{total_output_tokens:,}")
+            table.add_row("Total Tokens", f"{total_tokens:,}")
+            table.add_row("Input Cost", f"${input_cost:.6f}")
+            table.add_row("Output Cost", f"${output_cost:.6f}")
+            table.add_row("Total Cost", f"${total_cost:.4f}")
+            table.add_row("Cost per Question", f"${cost_per_question:.6f}")
+            
+            console.print(table)
+            
+            # Add context about pricing
+            if input_cost_per_1m == 0 and output_cost_per_1m == 0:
+                console.print("\n[yellow]⚠️  No pricing information available for this model[/yellow]")
+            else:
+                console.print(f"\n[dim]Based on: ${input_cost_per_1m:.2f}/${output_cost_per_1m:.2f} per 1M input/output tokens[/dim]")
+            
+        except Exception as e:
+            console.print(f"[red]Error calculating costs: {str(e)}[/red]")
+            logger.exception("Cost calculation failed")
+    
+    asyncio.run(calculate_costs_async())
@@ -0,0 +1,137 @@
+"""
+Models Info Command
+
+Show detailed information about a specific model.
+"""
+
+import asyncio
+import click
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
+from src.utils.logging import get_logger
+
+console = Console()
+logger = get_logger(__name__)
+
+
+@click.command()
+@click.argument('model_id', required=True)
+@click.pass_context
+def models_info(ctx, model_id):
+    """Show detailed information about a specific model."""
+    
+    async def show_model_info_async():
+        try:
+            from src.models.model_registry import model_registry
+            
+            console.print(f"[blue]Getting information for model: {model_id}[/blue]")
+            
+            # Get all models and find the specific one
+            models = await model_registry.get_available_models()
+            model_info = None
+            
+            for model in models:
+                if model.get('id', '').lower() == model_id.lower():
+                    model_info = model
+                    break
+            
+            if not model_info:
+                console.print(f"[red]Model not found: {model_id}[/red]")
+                console.print("[dim]Use 'models list' or 'models search' to find available models[/dim]")
+                
+                # Show similar models
+                similar = model_registry.search_models(model_id.split('/')[-1], models)[:5]
+                if similar:
+                    console.print(f"\n[yellow]Similar models:[/yellow]")
+                    for sim in similar:
+                        console.print(f"  • {sim.get('id', 'N/A')}")
+                return
+            
+            # Display detailed information
+            console.print(Panel.fit(
+                f"[bold blue]{model_info.get('name', 'N/A')}[/bold blue]\n"
+                f"[dim]{model_info.get('description', 'No description available')}[/dim]",
+                title="Model Information",
+                border_style="blue"
+            ))
+            
+            # Basic details table
+            details_table = Table(title="Model Details")
+            details_table.add_column("Property", style="cyan")
+            details_table.add_column("Value", style="green")
+            
+            details_table.add_row("Model ID", model_info.get('id', 'N/A'))
+            details_table.add_row("Provider", (model_info.get('provider', 'Unknown')).title())
+            details_table.add_row("Context Length", f"{model_info.get('context_length', 0):,} tokens")
+            details_table.add_row("Available", "✓ Yes" if model_info.get('available', True) else "✗ No")
+            details_table.add_row("Modality", (model_info.get('modality', 'text')).title())
+            
+            # Add architecture info if available
+            architecture = model_info.get('architecture', {})
+            if architecture:
+                if 'tokenizer' in architecture:
+                    details_table.add_row("Tokenizer", architecture['tokenizer'])
+                if 'instruct_type' in architecture:
+                    details_table.add_row("Instruction Type", architecture['instruct_type'])
+            
+            console.print(details_table)
+            
+            # Pricing table
+            pricing = model_info.get('pricing', {})
+            if pricing:
+                pricing_table = Table(title="Pricing Information")
+                pricing_table.add_column("Type", style="cyan")
+                pricing_table.add_column("Cost per 1M tokens", style="yellow")
+                
+                input_cost = pricing.get('input_cost_per_1m_tokens', 0)
+                output_cost = pricing.get('output_cost_per_1m_tokens', 0)
+                
+                # Format costs properly, handling scientific notation
+                def format_cost(cost):
+                    if cost == 0:
+                        return "$0"
+                    # Check if values are already per-million-tokens (larger values) or per-token (very small values)
+                    if cost < 0.01:
+                        # Values are per-token, convert to per-million-tokens
+                        price_per_million = cost * 1_000_000
+                    else:
+                        # Values are already per-million-tokens
+                        price_per_million = cost
+                    
+                    if price_per_million < 0.01:
+                        # For very small values, show more decimal places
+                        return f"${price_per_million:.4f}"
+                    elif price_per_million < 1:
+                        return f"${price_per_million:.2f}"
+                    else:
+                        return f"${price_per_million:.0f}"
+                
+                pricing_table.add_row("Input", format_cost(input_cost))
+                pricing_table.add_row("Output", format_cost(output_cost))
+                pricing_table.add_row("Combined", format_cost(input_cost + output_cost))
+                
+                console.print(pricing_table)
+            
+            # Top provider info
+            top_provider = model_info.get('top_provider', {})
+            if top_provider:
+                console.print(f"\n[bold]Top Provider:[/bold]")
+                console.print(f"• Max completion tokens: {top_provider.get('max_completion_tokens', 'N/A')}")
+                console.print(f"• Max throughput: {top_provider.get('max_throughput_tokens_per_minute', 'N/A')} tokens/min")
+            
+            # Per-request limits
+            limits = model_info.get('per_request_limits', {})
+            if limits:
+                console.print(f"\n[bold]Request Limits:[/bold]");
+                if 'prompt_tokens' in limits:
+                    console.print(f"• Max prompt tokens: {limits['prompt_tokens']:,}")
+                if 'completion_tokens' in limits:
+                    console.print(f"• Max completion tokens: {limits['completion_tokens']:,}")
+            
+        except Exception as e:
+            console.print(f"[red]Error getting model info: {str(e)}[/red]")
+            logger.exception("Model info retrieval failed")
+    
+    asyncio.run(show_model_info_async())