104104import java .util .function .BiPredicate ;
105105import java .util .function .Consumer ;
106106import java .util .function .Function ;
107+ import java .util .regex .Matcher ;
108+ import java .util .regex .Pattern ;
107109import java .util .stream .Collectors ;
108110import java .util .stream .Stream ;
109111import java .util .stream .StreamSupport ;
@@ -126,6 +128,11 @@ public class ElasticRequestHandler {
126128 private static final String HIGHLIGHT_PREFIX = "<strong>" ;
127129 private static final String HIGHLIGHT_SUFFIX = "</strong>" ;
128130
131+ // Match Lucene 4.x fuzzy queries (e.g., roam~0.8), but not 5.x and beyond (e.g., roam~2)
132+ private static final Pattern LUCENE_4_FUZZY_PATTERN = Pattern .compile ("\\ b(\\ w+)~([0-9]*\\ .?[0-9]+)\\ b" );
133+ // From Lucene 5 and above (used by elastic), the fuzzy query syntax has changed to use a single integer
134+ private static final Pattern ELASTIC_FUZZY_PATTERN = Pattern .compile ("\\ b(\\ w+)~([0-2])\\ b" );
135+
129136 private final IndexPlan indexPlan ;
130137 private final Filter filter ;
131138 private final PlanResult planResult ;
@@ -889,10 +896,10 @@ private static Query referenceConstraint(String uuid) {
889896 return Query .of (q -> q .multiMatch (m -> m .fields (uuid )));
890897 }
891898
892- private static QueryStringQuery .Builder fullTextQuery (String text , String fieldName , PlanResult pr , boolean includeDynamicBoostedValues ) {
899+ private QueryStringQuery .Builder fullTextQuery (String text , String fieldName , PlanResult pr , boolean includeDynamicBoostedValues ) {
893900 LOG .debug ("fullTextQuery for text: '{}', fieldName: '{}'" , text , fieldName );
894901 QueryStringQuery .Builder qsqBuilder = new QueryStringQuery .Builder ()
895- .query (FulltextIndex . rewriteQueryText (text ))
902+ .query (rewriteQueryText (text ))
896903 .defaultOperator (Operator .And )
897904 .type (TextQueryType .CrossFields )
898905 .tieBreaker (0.5d );
@@ -908,6 +915,75 @@ private static QueryStringQuery.Builder fullTextQuery(String text, String fieldN
908915 return qsqBuilder .fields (fieldName );
909916 }
910917
918+ private String rewriteQueryText (String text ) {
919+ String rewritten = FulltextIndex .rewriteQueryText (text );
920+
921+ // here we handle special cases where the syntax used in the lucene 4.x query parser is not supported by the current version
922+ rewritten = convertFuzzyQuery (rewritten );
923+
924+ return rewritten ;
925+ }
926+
927+ /**
928+ * Converts Lucene fuzzy queries from the old syntax (float similarity) to the new syntax (edit distance).
929+ * <p>
930+ * In Lucene 4, fuzzy queries were specified using a floating-point similarity (e.g., "term~0.8"), where values
931+ * closer to 1 required a higher similarity match. In later Lucene versions, this was replaced with a discrete
932+ * edit distance (0, 1, or 2).
933+ * <p>
934+ * This method:
935+ * <ul>
936+ * <li>Detects and converts old fuzzy queries (e.g., "roam~0.7" → "roam~1").</li>
937+ * <li>Preserves new fuzzy queries (e.g., "test~2" remains unchanged).</li>
938+ * <li>Avoids modifying proximity queries (e.g., "\"quick fox\"~5" remains unchanged).</li>
939+ * </ul>
940+ *
941+ * @param text The input query string containing fuzzy or proximity queries.
942+ * @return A query string where old fuzzy syntax is converted to the new format.
943+ */
944+ private String convertFuzzyQuery (String text ) {
945+ if (!text .contains ("~" )) {
946+ return text ;
947+ }
948+ Matcher lucene4FuzzyMatcher = LUCENE_4_FUZZY_PATTERN .matcher (text );
949+
950+ if (!lucene4FuzzyMatcher .find ()) {
951+ // this can only happen if the pattern is not found, which means we are dealing with a tilde not related to a fuzzy query
952+ return text ;
953+ }
954+
955+ StringBuilder result = new StringBuilder ();
956+ do {
957+ String term = lucene4FuzzyMatcher .group (1 );
958+ String fuzzyValue = lucene4FuzzyMatcher .group (2 );
959+
960+ // Skip if it's already using the new syntax (integer 0-2)
961+ if (ELASTIC_FUZZY_PATTERN .matcher (term + "~" + fuzzyValue ).matches ()) {
962+ continue ;
963+ }
964+
965+ // Convert floating-point similarity to integer edit distance
966+ int editDistance = 2 ; // Default to the most lenient setting
967+ try {
968+ float similarity = Float .parseFloat (fuzzyValue );
969+ if (similarity >= 0.8f ) {
970+ editDistance = 0 ;
971+ } else if (similarity >= 0.5f ) {
972+ editDistance = 1 ;
973+ }
974+ } catch (NumberFormatException e ) {
975+ LOG .warn ("Invalid fuzzy value: {} for query text {}, using default edit distance of 2" , fuzzyValue , text );
976+ }
977+
978+ lucene4FuzzyMatcher .appendReplacement (result , term + "~" + editDistance );
979+ } while (lucene4FuzzyMatcher .find ());
980+
981+ lucene4FuzzyMatcher .appendTail (result );
982+ String resultString = result .toString ();
983+ LOG .info ("Converted fuzzy query from '{}' to '{}'" , text , resultString );
984+ return resultString ;
985+ }
986+
911987 private Query createQuery (String propertyName , Filter .PropertyRestriction pr , PropertyDefinition defn ) {
912988 final String field = elasticIndexDefinition .getElasticKeyword (propertyName );
913989
0 commit comments