File tree Expand file tree Collapse file tree 3 files changed +18
-17
lines changed
Expand file tree Collapse file tree 3 files changed +18
-17
lines changed Original file line number Diff line number Diff line change @@ -56,20 +56,17 @@ def index
5656 end
5757 end
5858
59+ # Apply text search before similarity search to maintain ActiveRecord relation
60+ @documents = @documents . search_by_title_and_document ( params [ :contains ] ) if params [ :contains ] . present?
61+
5962 if params [ :similar_to ] . present?
6063 embedding = get_embedding ( params [ :similar_to ] )
6164 # Get similar documents but preserve existing filters
62- similar_docs = related_documents_from_embedding_by_libraries ( embedding , library_id )
63-
64- # Sort by neighbor_distance (ascending - closest first) and convert to array
65- # This maintains the similarity order without needing raw SQL
66- @documents = similar_docs . sort_by ( &:neighbor_distance )
65+ @documents = @documents . related_by_embedding ( embedding )
6766
68- # Convert to Kaminari-compatible array for pagination
69- @documents = Kaminari . paginate_array ( @documents )
67+ # Sort by neighbor_distance using SQL to maintain ActiveRecord relation
68+ @documents = @documents . order ( 'neighbor_distance ASC' )
7069 end
71-
72- @documents = @documents . search_by_title_and_document ( params [ :contains ] ) if params [ :contains ] . present?
7370 @documents = @documents . page ( params [ :page ] ) . per ( params [ :per_page ] || 10 )
7471 end
7572
Original file line number Diff line number Diff line change @@ -11,13 +11,8 @@ def related_documents_from_embedding(embedding)
1111 Document . nearest_neighbors ( :embedding , embedding , distance : 'euclidean' )
1212 end
1313
14- # _limit is the number of documents to return. Returning fewer is better since the most relevant documents are at the top.
15- # Because of the ordering, having too many documents may cause the most relevant documents to be lost.
16- # TODO implement a better way to get the most relevant documents.
17- def related_documents_from_embedding_by_libraries ( _embedding , _library_ids , _limit = nil )
18- _limit ||= ENV . fetch ( 'RELATED_DOCUMENTS_LIMIT' , 25 ) . to_i
19- scope = related_documents_from_embedding ( _embedding )
20- scope = scope . where ( library_id : _library_ids ) if _library_ids . present?
21- scope . order ( updated_at : :desc ) . limit ( _limit )
14+ # Delegate to the Document model scope for better organization
15+ def related_documents_from_embedding_by_libraries ( _embedding , __limit = nil )
16+ Document . related_by_embedding ( _embedding , limit : _limit )
2217 end
2318end
Original file line number Diff line number Diff line change @@ -15,6 +15,15 @@ class Document < ApplicationRecord
1515 }
1616
1717 has_neighbors :embedding
18+
19+ # Scope to find related documents by embedding with optional library filtering
20+ # _limit is the number of documents to return. Returning fewer is better since the most relevant documents are at the top.
21+ # Because of the ordering, having too many documents may cause the most relevant documents to be lost.
22+ scope :related_by_embedding , lambda { |embedding , limit = nil |
23+ limit ||= ENV . fetch ( 'RELATED_DOCUMENTS_LIMIT' , 25 ) . to_i
24+ scope = nearest_neighbors ( :embedding , embedding , distance : 'euclidean' )
25+ scope . order ( updated_at : :desc ) . limit ( limit )
26+ }
1827 belongs_to :library , counter_cache : true
1928 belongs_to :user
2029
You can’t perform that action at this time.
0 commit comments