Skip to content

Commit 1bfe9c4

Browse files
committed
Handle filter only queries. Short-circuit and return filtered results
- For queries with only filters in them short-circuit and return filtered results. No need to run semantic search, re-ranking. - Add client test for filter only query and quote query in client tests
1 parent afc84de commit 1bfe9c4

File tree

2 files changed

+29
-5
lines changed

2 files changed

+29
-5
lines changed

src/search_type/text_search.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ def query(raw_query: str, model: TextSearchModel, rank_results=False):
112112
if entries is None or len(entries) == 0:
113113
return [], []
114114

115+
# If query only had filters it'll be empty now. So short-circuit and return results.
116+
if query.strip() == "":
117+
hits = [{"corpus_id": id, "score": 1.0} for id, _ in enumerate(entries)]
118+
return hits, entries
119+
115120
# Encode the query using the bi-encoder
116121
start = time.time()
117122
question_embedding = model.bi_encoder.encode([query], convert_to_tensor=True, device=state.device)

tests/test_client.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
# Standard Modules
22
from io import BytesIO
33
from PIL import Image
4+
from urllib.parse import quote
5+
46

57
# External Packages
68
from fastapi.testclient import TestClient
79

810
# Internal Packages
911
from src.main import app
10-
from src.utils.config import SearchType
1112
from src.utils.state import model, config
1213
from src.search_type import text_search, image_search
1314
from src.utils.rawconfig import ContentConfig, SearchConfig
1415
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
1516
from src.search_filter.word_filter import WordFilter
17+
from src.search_filter.file_filter import FileFilter
1618

1719

1820
# Arrange
@@ -23,7 +25,7 @@
2325
# ----------------------------------------------------------------------------------------------------
2426
def test_search_with_invalid_content_type():
2527
# Arrange
26-
user_query = "How to call Khoj from Emacs?"
28+
user_query = quote("How to call Khoj from Emacs?")
2729

2830
# Act
2931
response = client.get(f"/search?q={user_query}&t=invalid_content_type")
@@ -117,7 +119,7 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
117119
def test_notes_search(content_config: ContentConfig, search_config: SearchConfig):
118120
# Arrange
119121
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
120-
user_query = "How to git install application?"
122+
user_query = quote("How to git install application?")
121123

122124
# Act
123125
response = client.get(f"/search?q={user_query}&n=1&t=org&r=true")
@@ -129,12 +131,29 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
129131
assert "git clone" in search_result
130132

131133

134+
# ----------------------------------------------------------------------------------------------------
135+
def test_notes_search_with_only_filters(content_config: ContentConfig, search_config: SearchConfig):
136+
# Arrange
137+
filters = [WordFilter(), FileFilter()]
138+
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
139+
user_query = quote('+"Emacs" file:"*.org"')
140+
141+
# Act
142+
response = client.get(f"/search?q={user_query}&n=1&t=org")
143+
144+
# Assert
145+
assert response.status_code == 200
146+
# assert actual_data contains word "Emacs"
147+
search_result = response.json()[0]["entry"]
148+
assert "Emacs" in search_result
149+
150+
132151
# ----------------------------------------------------------------------------------------------------
133152
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
134153
# Arrange
135154
filters = [WordFilter()]
136155
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
137-
user_query = 'How to git install application? +"Emacs"'
156+
user_query = quote('How to git install application? +"Emacs"')
138157

139158
# Act
140159
response = client.get(f"/search?q={user_query}&n=1&t=org")
@@ -151,7 +170,7 @@ def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_
151170
# Arrange
152171
filters = [WordFilter()]
153172
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
154-
user_query = 'How to git install application? -"clone"'
173+
user_query = quote('How to git install application? -"clone"')
155174

156175
# Act
157176
response = client.get(f"/search?q={user_query}&n=1&t=org")

0 commit comments

Comments
 (0)