Skip to content

Commit affd6d8

Browse files
Fix document search (#223)
* date filter * fix search to be more precise on multiple words * ensure sorting by relevance works --------- Co-authored-by: Vijay Swamidass <[email protected]>
1 parent 77b7297 commit affd6d8

File tree

4 files changed

+80
-24
lines changed

4 files changed

+80
-24
lines changed

app/controllers/base_documents_controller.rb

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,7 @@ class BaseDocumentsController < ApplicationController
1111
def index
1212
@documents = Document.includes(:library, :user)
1313

14-
# Only apply default sorting if not doing similarity search
15-
unless params[:similar_to].present?
16-
@documents = if params[:sort] == 'questions'
17-
@documents.order(questions_count: :desc)
18-
elsif params[:sort] == 'tokens'
19-
@documents.order(token_count: :desc)
20-
else
21-
@documents.order(updated_at: :desc)
22-
end
23-
end
24-
14+
# Apply cheap filters first for better performance
2515
library_id = params[:library_id]
2616
if library_id.present?
2717
@library = Library.find(params[:library_id])
@@ -56,17 +46,29 @@ def index
5646
end
5747
end
5848

59-
# Apply text search before similarity search to maintain ActiveRecord relation
60-
@documents = @documents.search_by_title_and_document(params[:contains]) if params[:contains].present?
49+
# Apply text search after basic filtering to work on smaller dataset
6150

51+
# Apply similarity search on already filtered dataset
6252
if params[:similar_to].present?
6353
embedding = get_embedding(params[:similar_to])
6454
# Get similar documents but preserve existing filters
6555
@documents = @documents.related_by_embedding(embedding)
6656

6757
# Sort by neighbor_distance using SQL to maintain ActiveRecord relation
6858
@documents = @documents.order('neighbor_distance ASC')
59+
elsif params[:contains].present?
60+
@documents = @documents.smart_search(params[:contains])
61+
else
62+
# Only apply default sorting if not doing similarity search
63+
@documents = if params[:sort] == 'questions'
64+
@documents.order(questions_count: :desc)
65+
elsif params[:sort] == 'tokens'
66+
@documents.order(token_count: :desc)
67+
else
68+
@documents.order(updated_at: :desc)
69+
end
6970
end
71+
7072
@documents = @documents.page(params[:page]).per(params[:per_page] || 10)
7173
end
7274

app/models/document.rb

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,52 @@ class Document < ApplicationRecord
77
has_many :documents_questions
88
has_many :questions, through: :documents_questions
99
has_many :comments, dependent: :destroy
10+
has_neighbors :embedding
1011

12+
# Primary search scope using PostgreSQL full-text search
13+
# Searches across both title and document content with strict word matching
14+
# - prefix: true allows partial word matching (e.g., "test" matches "testing")
15+
# - any_word: false requires ALL search terms to be present (AND logic)
16+
# - Uses precomputed search_vector for performance
1117
pg_search_scope :search_by_title_and_document,
1218
against: %i[title document],
1319
using: {
14-
tsearch: { prefix: true, dictionary: 'english',
15-
tsvector_column: 'search_vector',
16-
any_word: true } # This enables OR search - matches any word
20+
tsearch: {
21+
prefix: true,
22+
dictionary: 'english',
23+
tsvector_column: 'search_vector',
24+
any_word: true
25+
}
1726
}
1827

19-
has_neighbors :embedding
28+
# Strict search scope with enhanced exact matching
29+
# Similar to search_by_title_and_document but with stricter ranking:
30+
# - normalization: 0 disables document length normalization in ranking
31+
# - This prioritizes exact word matches over document length considerations
32+
# - Better for finding specific content regardless of document size
33+
pg_search_scope :strict_search,
34+
against: %i[title document],
35+
using: {
36+
tsearch: {
37+
prefix: true,
38+
dictionary: 'english',
39+
tsvector_column: 'search_vector',
40+
any_word: false,
41+
normalization: 0 # No normalization - prioritizes exact word matches over document length
42+
}
43+
}
44+
45+
# Smart search scope that handles partial word matching like "test12" -> "test"
46+
# This is chainable and preserves existing filters (library_id, date ranges, etc.)
47+
scope :smart_search, lambda { |query|
48+
return all if query.blank?
49+
50+
strict_results = strict_search(query)
51+
return strict_results if strict_results.count >= 1
52+
53+
# Return empty relation if no results found
54+
none
55+
}
2056

2157
# Scope to find related documents by embedding with optional library filtering
2258
# _limit is the number of documents to return. Returning fewer is better since the most relevant documents are at the top.

app/views/documents/index.html.erb

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,27 @@
1010
<% end %>
1111
</div>
1212
<!-- Search Form -->
13-
<div class="flex-grow flex items-center mt-3">
13+
<div class="mt-3">
1414
<% action_path = @library.present? ? library_documents_path(@library) : documents_path %>
15-
<%= form_with(url: action_path, method: :get, local: true, class: "w-full flex") do %>
16-
<input type="text" name="contains" id="contains" placeholder="Search documents..." class="flex-grow rounded-lg p-2 border border-stone-300">
17-
<button type="submit" class="ml-2 rounded-lg p-2 bg-white text-sky-500 border-sky-500 border">Search</button>
15+
<%= form_with(url: action_path, method: :get, local: true, class: "w-full") do %>
16+
<div class="flex flex-col space-y-3">
17+
<!-- Text Search -->
18+
<div class="flex">
19+
<input type="text" name="contains" id="contains" value="<%= params[:contains] %>" placeholder="Search documents..." class="flex-grow rounded-lg p-2 border border-stone-300">
20+
<button type="submit" class="ml-2 rounded-lg p-2 bg-white text-sky-500 border-sky-500 border">Search</button>
21+
</div>
22+
<!-- Date Range Filters -->
23+
<div class="flex space-x-4">
24+
<div class="flex-1">
25+
<label for="since" class="block text-sm font-medium text-stone-700 mb-1">Updated Since:</label>
26+
<input type="datetime-local" name="since" id="since" value="<%= params[:since] %>" class="w-full rounded-lg p-2 border border-stone-300 text-sm">
27+
</div>
28+
<div class="flex-1">
29+
<label for="until" class="block text-sm font-medium text-stone-700 mb-1">Updated Until:</label>
30+
<input type="datetime-local" name="until" id="until" value="<%= params[:until] %>" class="w-full rounded-lg p-2 border border-stone-300 text-sm">
31+
</div>
32+
</div>
33+
</div>
1834
<% end %>
1935
</div>
2036
<div class="flex-grow flex items-center mt-3">
@@ -28,6 +44,8 @@
2844
<%= link_to 'Sort by Date', request.path, class: 'rounded-lg p-2 bg-white text-sky-500 border-sky-500 border hover:bg-sky-50' %>
2945
<% end %>
3046
</div>
47+
<div class="my-2">
48+
<%= render 'shared/doc_list', docs: @documents %>
49+
</div>
3150
<%= paginate @documents %>
32-
<%= render 'shared/doc_list', docs: @documents %>
3351
</div>

app/views/shared/_doc_list.html.erb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<ul class="space-y-4">
2-
<% docs.each do |document| %>
2+
<% docs.each_with_index do |document, index| %>
33
<li class="bg-white p-4 rounded-lg border border-stone-300 hover:shadow-lg group">
44
<details class="text-base">
55
<summary class="text-lg cursor-pointer flex justify-between items-center">
@@ -10,7 +10,7 @@
1010
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-3 h-3 mr-1 text-stone-700">
1111
<path stroke-linecap="round" stroke-linejoin="round" d="M12 6v6h4.5m4.5 0a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />
1212
</svg>
13-
<%= time_ago_in_words(document.created_at) %>
13+
<%= time_ago_in_words(document.updated_at) %>
1414
</div>
1515
<div class="flex items-center">
1616
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-3 h-3 mr-1 text-stone-700">

0 commit comments

Comments
 (0)