Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configurable query types. Create new simple query string option #114

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 34 additions & 13 deletions src/collective/elasticsearch/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,10 @@ def _zdt(val):
class BaseIndex:
filter_query = True

def __init__(self, catalog, index):
def __init__(self, catalog, index, query_type="match"):
self.catalog = catalog
self.index = index
self.query_type = query_type

def create_mapping(self, name): # NOQA R0201
if name in keyword_fields:
Expand Down Expand Up @@ -209,20 +210,40 @@ def get_value(self, obj):
return "\n".join(all_texts)
return None

def _make_simple_query_string(self, query):
query = query.replace(" AND ", " + ")
return query

def get_query(self, name, value):
value = self._normalize_query(value)
# ES doesn't care about * like zope catalog does
clean_value = value.strip("*") if value else ""
queries = [{"match_phrase": {name: {"query": clean_value, "slop": 2}}}]
if name in ("Title", "SearchableText"):
# titles have most importance... we override here...
queries.append(
{"match_phrase_prefix": {"Title": {"query": clean_value, "boost": 2}}}
)
if name != "Title":
queries.append({"match": {name: {"query": clean_value}}})
if self.query_type == "match":
clean_value = value.strip("*") if value else ""
queries = [{"match_phrase": {name: {"query": clean_value, "slop": 2}}}]
if name in ("Title", "SearchableText"):
# titles have most importance... we override here...
queries.append(
{
"match_phrase_prefix": {
"Title": {"query": clean_value, "boost": 2}
}
}
)
if name != "Title":
queries.append({"match": {name: {"query": clean_value}}})

return queries
return queries

if self.query_type == "simple_query_string":
qs_value = self._make_simple_query_string(value)

fields = []
if name in ("Title", "SearchableText"):
fields.extend(["Title^2", "SearchableText"])
else:
fields.append(name)
query = {"simple_query_string": {"query": qs_value, "fields": fields}}
return query


class EBooleanIndex(BaseIndex):
Expand Down Expand Up @@ -381,13 +402,13 @@ class ERecurringIndex(EDateIndex):
pass


def getIndex(catalog, name):
def getIndex(catalog, name, query_type="match"):
catalog = getattr(catalog, "_catalog", catalog)
try:
index = aq_base(catalog.getIndex(name))
except KeyError:
return None
index_type = type(index)
if index_type in INDEX_MAPPING:
return INDEX_MAPPING[index_type](catalog, index)
return INDEX_MAPPING[index_type](catalog, index, query_type=query_type)
return None
17 changes: 17 additions & 0 deletions src/collective/elasticsearch/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@
from typing import Tuple
from zope import schema
from zope.interface import Interface
from zope.schema.vocabulary import SimpleTerm
from zope.schema.vocabulary import SimpleVocabulary


query_types = SimpleVocabulary(
[
SimpleTerm(value="match", title="Match (Default)"),
SimpleTerm(value="simple_query_string", title="Simple Query String"),
]
)


class IElasticSearchLayer(Interface):
Expand Down Expand Up @@ -68,6 +78,13 @@ class IElasticSettings(Interface):
value_type=schema.TextLine(title="Index"),
)

query_type = schema.Choice(
title="Query Type",
description="Whether to use match/match_prefix queries or simple_query_string queries. See elastic search docs for more information.",
default="match",
vocabulary=query_types,
)

sniff_on_start = schema.Bool(title="Sniff on start", default=False, required=False)

sniff_on_connection_fail = schema.Bool(
Expand Down
16 changes: 16 additions & 0 deletions src/collective/elasticsearch/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ def highlight_post_tags(self):
value = ""
return value

@property
def query_type(self):
"""Query type chosen."""
try:
value = api.portal.get_registry_record(
"query_type", interfaces.IElasticSettings, "match"
)
except KeyError:
value = "match"
if value is None:
value = "match"
return value

@property
def catalog(self):
return api.portal.get_tool("portal_catalog")
Expand Down Expand Up @@ -367,6 +380,9 @@ def _search(self, query, sort=None, **query_params):
"pre_tags": self.highlight_pre_tags.split("\n"),
"post_tags": self.highlight_post_tags.split("\n"),
}
import json

print(json.dumps(body, indent=4))
return self.connection.search(index=self.index_name, body=body, **query_params)

def search(self, query: dict, factory=None, **query_params) -> LazyMap:
Expand Down
2 changes: 1 addition & 1 deletion src/collective/elasticsearch/profiles.zcml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
source="*"
destination="1"
>
</genericsetup:upgradeSteps>
</genericsetup:upgradeSteps>

<genericsetup:upgradeSteps
profile="collective.elasticsearch:default"
Expand Down
2 changes: 1 addition & 1 deletion src/collective/elasticsearch/profiles/default/metadata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
<metadata>
<version>4</version>
<dependencies>
</dependencies>
</dependencies>
</metadata>
5 changes: 3 additions & 2 deletions src/collective/elasticsearch/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,16 @@ def __call__(self, dquery):
matches = []
catalog = self.catalog._catalog
idxs = catalog.indexes.keys()
query_type = self.es.query_type
query = {"match_all": {}}
es_only_indexes = getESOnlyIndexes()
for key, value in dquery.items():
if key not in idxs and key not in es_only_indexes:
continue
index = getIndex(catalog, key)
index = getIndex(catalog, key, query_type=query_type)
if index is None and key in es_only_indexes:
# deleted index for plone performance but still need on ES
index = EZCTextIndex(catalog, key)
index = EZCTextIndex(catalog, key, query_type=query_type)
qq = index.get_query(key, value)
if qq is None:
continue
Expand Down
Loading