Skip to content

Commit 0020760

Browse files
make neighbor chainable (#219)
Co-authored-by: Vijay Swamidass <[email protected]>
1 parent c69151f commit 0020760

File tree

3 files changed

+18
-17
lines changed

3 files changed

+18
-17
lines changed

app/controllers/base_documents_controller.rb

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,17 @@ def index
5656
end
5757
end
5858

59+
# Apply text search before similarity search to maintain ActiveRecord relation
60+
@documents = @documents.search_by_title_and_document(params[:contains]) if params[:contains].present?
61+
5962
if params[:similar_to].present?
6063
embedding = get_embedding(params[:similar_to])
6164
# Get similar documents but preserve existing filters
62-
similar_docs = related_documents_from_embedding_by_libraries(embedding, library_id)
63-
64-
# Sort by neighbor_distance (ascending - closest first) and convert to array
65-
# This maintains the similarity order without needing raw SQL
66-
@documents = similar_docs.sort_by(&:neighbor_distance)
65+
@documents = @documents.related_by_embedding(embedding)
6766

68-
# Convert to Kaminari-compatible array for pagination
69-
@documents = Kaminari.paginate_array(@documents)
67+
# Sort by neighbor_distance using SQL to maintain ActiveRecord relation
68+
@documents = @documents.order('neighbor_distance ASC')
7069
end
71-
72-
@documents = @documents.search_by_title_and_document(params[:contains]) if params[:contains].present?
7370
@documents = @documents.page(params[:page]).per(params[:per_page] || 10)
7471
end
7572

app/controllers/concerns/neighbor_concern.rb

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,8 @@ def related_documents_from_embedding(embedding)
1111
Document.nearest_neighbors(:embedding, embedding, distance: 'euclidean')
1212
end
1313

14-
# _limit is the number of documents to return. Returning fewer is better since the most relevant documents are at the top.
15-
# Because of the ordering, having too many documents may cause the most relevant documents to be lost.
16-
# TODO implement a better way to get the most relevant documents.
17-
def related_documents_from_embedding_by_libraries(_embedding, _library_ids, _limit = nil)
18-
_limit ||= ENV.fetch('RELATED_DOCUMENTS_LIMIT', 25).to_i
19-
scope = related_documents_from_embedding(_embedding)
20-
scope = scope.where(library_id: _library_ids) if _library_ids.present?
21-
scope.order(updated_at: :desc).limit(_limit)
14+
# Delegate to the Document model scope for better organization
15+
def related_documents_from_embedding_by_libraries(_embedding, __limit = nil)
16+
Document.related_by_embedding(_embedding, limit: _limit)
2217
end
2318
end

app/models/document.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ class Document < ApplicationRecord
1515
}
1616

1717
has_neighbors :embedding
18+
19+
# Scope to find related documents by embedding with optional library filtering
20+
# _limit is the number of documents to return. Returning fewer is better since the most relevant documents are at the top.
21+
# Because of the ordering, having too many documents may cause the most relevant documents to be lost.
22+
scope :related_by_embedding, lambda { |embedding, limit = nil|
23+
limit ||= ENV.fetch('RELATED_DOCUMENTS_LIMIT', 25).to_i
24+
scope = nearest_neighbors(:embedding, embedding, distance: 'euclidean')
25+
scope.order(updated_at: :desc).limit(limit)
26+
}
1827
belongs_to :library, counter_cache: true
1928
belongs_to :user
2029

0 commit comments

Comments
 (0)