calumbell
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Pipfile‎
Lines changed: 3 additions & 0 deletions b/‎Pipfile‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Pipfile.lock‎
Lines changed: 394 additions & 122 deletions b/‎Pipfile.lock‎
Lines changed: 394 additions & 122 deletions
diff --git a/‎README.md‎
Lines changed: 0 additions & 1 deletion b/‎README.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎api/management/commands/quicksetup.py‎
Lines changed: 3 additions & 0 deletions b/‎api/management/commands/quicksetup.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎scripts/search_benchmark.py‎
Lines changed: 157 additions & 0 deletions b/‎scripts/search_benchmark.py‎
Lines changed: 157 additions & 0 deletions
diff --git a/‎search/apps.py‎
Lines changed: 27 additions & 0 deletions b/‎search/apps.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎search/management/commands/buildindex.py‎
Lines changed: 21 additions & 0 deletions b/‎search/management/commands/buildindex.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎search/serializers.py‎
Lines changed: 7 additions & 1 deletion b/‎search/serializers.py‎
Lines changed: 7 additions & 1 deletion
@@ -1,4 +1,4 @@
-FROM python:3.11-alpine
+FROM python:3.11-slim
 
 RUN mkdir -p /opt/services/open5e-api
 WORKDIR /opt/services/open5e-api
 
@@ -13,6 +13,9 @@ requests = "*"
 whitenoise = "*"
 gunicorn = "*"
 drf-spectacular = {extras = ["sidecar"], version = "*"}
+numpy = "*"
+rapidfuzz = "*"
+scikit-learn = "*"
 
 [dev-packages]
 pytest = "*"
 
@@ -92,7 +92,6 @@ To use the search function, you must build the search index by running the above
 pipenv run python manage.py quicksetup
 ```
 
-
 ## Run
 
 Run the server locally. This server is only for development and shall __not__ be used in production. The server will be available at `http://localhost:8000`.
 
@@ -100,6 +100,9 @@ def clean_dir() ->None:
         shutil.rmtree(Path(settings.STATIC_ROOT))
     if Path(settings.DATABASES['default']['NAME']).exists():
         Path(settings.DATABASES['default']['NAME']).unlink()
+    vector_index = Path('server/vector_index.pkl')
+    if vector_index.exists():
+        vector_index.unlink()
 
 def import_v1() -> None:
     """Import the v1 apps' database models."""
 
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import django
+import time
+import requests
+from statistics import mean
+
+# Django setup
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'server.settings')
+sys.path.append('/Users/moody/open5e-api')
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  # Add parent directory
+django.setup()
+
+def time_search(query, params=None):
+    """Time a search request and return timing + result info."""
+    url = "http://127.0.0.1:8000/v2/search/"
+    search_params = {"query": query}
+    if params:
+        search_params.update(params)
+    
+    start_time = time.time()
+    response = requests.get(url, params=search_params)
+    end_time = time.time()
+    
+    if response.status_code != 200:
+        return None, f"Error: {response.status_code}"
+    
+    data = response.json()
+    timing = (end_time - start_time) * 1000  # Convert to milliseconds
+    
+    # Just get the total count
+    total_results = data.get('count', 0)
+    metadata = data.get('search_metadata', {})
+    exact_matches = metadata.get('exact_matches', False)
+    
+    return timing, {
+        'total_results': total_results,
+        'exact_matches': exact_matches
+    }
+
+def run_performance_test():
+    """Run simplified performance comparison of search modes."""
+    
+    # Test queries covering different scenarios
+    test_queries = [
+        "fire",           # Common term, many exact matches
+        "sword",          # Common term, many exact matches
+        "heal",           # Common term, some matches
+        "dragon",         # Common term, many matches
+        "fireball",       # Specific spell, exact matches
+        "teleport",       # Specific spell, exact matches
+        "magic weapon",   # Multi-word, exact matches
+        "chromatic orb",  # Specific spell, few matches
+        "firbal",         # Typo, fuzzy fallback
+        "dragn",          # Typo, fuzzy fallback
+    ]
+    
+    results_table = []
+    
+    print("Search Mode Performance Comparison")
+    print("=" * 80)
+    print("Testing: Default (exact + fuzzy fallback) vs Fuzzy-strict vs Vector-strict")
+    print()
+    
+    for query in test_queries:
+        print(f"Testing '{query}'...")
+        
+        # Test Default mode (exact + fuzzy fallback if needed)
+        default_times = []
+        default_info = None
+        for _ in range(3):
+            timing, info = time_search(query)
+            if timing is not None:
+                default_times.append(timing)
+                if default_info is None:
+                    default_info = info
+        
+        # Test Fuzzy-strict mode
+        fuzzy_times = []
+        fuzzy_info = None
+        for _ in range(3):
+            timing, info = time_search(query, {"strict": "true", "fuzzy": "true"})
+            if timing is not None:
+                fuzzy_times.append(timing)
+                if fuzzy_info is None:
+                    fuzzy_info = info
+        
+        # Test Vector-strict mode
+        vector_times = []
+        vector_info = None
+        for _ in range(3):
+            timing, info = time_search(query, {"strict": "true", "vector": "true"})
+            if timing is not None:
+                vector_times.append(timing)
+                if vector_info is None:
+                    vector_info = info
+        
+        # Calculate averages and format results
+        default_avg = mean(default_times) if default_times else 0
+        fuzzy_avg = mean(fuzzy_times) if fuzzy_times else 0
+        vector_avg = mean(vector_times) if vector_times else 0
+        
+        # Format result information
+        def format_results(info):
+            if not info:
+                return "Error"
+            total = info['total_results']
+            return f"{total}"
+        
+        results_table.append({
+            'query': query,
+            'default_time': default_avg,
+            'default_results': format_results(default_info),
+            'fuzzy_time': fuzzy_avg,
+            'fuzzy_results': format_results(fuzzy_info),
+            'vector_time': vector_avg,
+            'vector_results': format_results(vector_info)
+        })
+    
+    # Output markdown table
+    print("\nResults (Markdown Table):")
+    print("=" * 80)
+    print("| Query | Default Mode | | Fuzzy-Strict | | Vector-Strict | |")
+    print("|-------|-------------|---|-------------|---|-------------|---|")
+    print("| | Time (ms) | Results | Time (ms) | Results | Time (ms) | Results |")
+    
+    for row in results_table:
+        print(f"| {row['query']} | {row['default_time']:.1f} | {row['default_results']} | "
+              f"{row['fuzzy_time']:.1f} | {row['fuzzy_results']} | "
+              f"{row['vector_time']:.1f} | {row['vector_results']} |")
+    
+    print()
+    print("Legend:")
+    print("- Default Mode: Runs exact search, falls back to fuzzy if exact finds 0 results")
+    print("- Fuzzy-Strict: Only fuzzy search")
+    print("- Vector-Strict: Only vector search")
+    print("- Results: Total matches found across all pages (API returns 50 per page)")
+
+if __name__ == "__main__":
+    print("Starting simplified search performance comparison...")
+    print("Make sure the Django dev server is running on http://127.0.0.1:8000")
+    print()
+    
+    try:
+        # Quick connectivity test
+        response = requests.get("http://127.0.0.1:8000/v2/search/", params={"query": "test"})
+        if response.status_code != 200:
+            print(f"Server connectivity issue: {response.status_code}")
+            sys.exit(1)
+    except requests.exceptions.ConnectionError:
+        print("Cannot connect to server. Please start the Django dev server first:")
+        print("python manage.py runserver")
+        sys.exit(1)
+    
+    run_performance_test() 
@@ -1,6 +1,33 @@
+import pickle
+from pathlib import Path
 from django.apps import AppConfig
 
 
 class SearchConfig(AppConfig):
     default_auto_field = 'django.db.models.BigAutoField'
     name = 'search'
+    vector_index = None  # Class attribute to store the loaded index
+    
+    def ready(self):
+        """Load vector index once when Django starts."""
+        vector_index_path = Path('server/vector_index.pkl')
+        
+        if vector_index_path.exists():
+            try:
+                print("Loading vector search index...")
+                with vector_index_path.open('rb') as f:
+                    SearchConfig.vector_index = pickle.load(f)
+                
+                # Print basic info about the loaded index
+                if SearchConfig.vector_index and 'matrix' in SearchConfig.vector_index:
+                    matrix_shape = SearchConfig.vector_index['matrix'].shape
+                    print(f"Vector index loaded successfully: {matrix_shape}")
+                else:
+                    print("Vector index loaded but missing expected components")
+                    
+            except Exception as e:
+                print(f"Failed to load vector index: {e}")
+                SearchConfig.vector_index = None
+        else:
+            print(f"Vector index file not found: {vector_index_path}")
+            SearchConfig.vector_index = None
@@ -1,5 +1,8 @@
 
 import argparse
+import pickle
+
+from pathlib import Path
 
 from django.core.management import call_command
 from django.core.management.base import BaseCommand
@@ -9,6 +12,7 @@
 from api import models as v1
 from api_v2 import models as v2
 from search import models as search
+from sklearn.feature_extraction.text import TfidfVectorizer
 
 class Command(BaseCommand):
     """Implementation for the `manage.py `index_v1` subcommand."""
@@ -102,6 +106,20 @@ def load_index(self):
                 "SELECT document_pk,object_pk,object_name,object_model,text,schema_version " +
                 "FROM search_searchresult")
 
+    def build_vector_index(self):
+        """Create a TF-IDF matrix for vector search and store it to disk."""
+        qs = search.SearchResult.objects.all().order_by("id")
+        if not qs:
+            return
+        docs = [f"{o.object_name} {o.text}" for o in qs]
+        ids = [o.id for o in qs]
+        names = [o.object_name for o in qs]
+        vectorizer = TfidfVectorizer()
+        matrix = vectorizer.fit_transform(docs)
+        index_data = {"ids": ids, "names": names, "matrix": matrix, "vectorizer": vectorizer}
+        with Path("server/vector_index.pkl").open("wb") as fh:
+            pickle.dump(index_data, fh)
+
     def check_fts_enabled(self):
         #import sqlite3
         with connection.cursor() as cursor:
@@ -153,5 +171,8 @@ def handle(self, *args, **options):
         # Take the content table's current data and load it into the index.
         self.load_index()
 
+        # Also build the vector search index from the loaded content.
+        self.build_vector_index()
+
         # Unload content table (saves storage space.)
         self.unload_all_content()
@@ -15,6 +15,9 @@ class SearchResultSerializer(serializers.ModelSerializer):
     object = serializers.SerializerMethodField(method_name='get_object')
     document = serializers.SerializerMethodField(method_name='get_document')
     route = serializers.SerializerMethodField(method_name='get_route')
+    match_type = serializers.CharField(read_only=True)
+    matched_term = serializers.CharField(read_only=True, allow_null=True)
+    match_score = serializers.FloatField(read_only=True, allow_null=True)
 
     class Meta:
         model = models.SearchResult
@@ -27,7 +30,10 @@ class Meta:
             'schema_version',
             'route',
             'text',
-            'highlighted']
+            'highlighted',
+            'match_type',
+            'matched_term',
+            'match_score']
 
     # The following override is replaced in a postproccessing hook defined in oas.py. I couldn't figure out how to get a oneof in here
     @extend_schema_field(OpenApiTypes.STR)
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-FROM python:3.11-alpine`
	`1`	`+FROM python:3.11-slim`
`2`	`2`
`3`	`3`	`RUN mkdir -p /opt/services/open5e-api`
`4`	`4`	`WORKDIR /opt/services/open5e-api`