Skip to content

Commit 3c7e37a

Browse files
committed
fix(indexing): Fix indexing full text terms to support exact match; fix isbn seach term processor
- Change instance folio_word_delimiter_graph to catenate_all - Preserve trailing asterisc in IsbnSearchTermProcessor Closes: MSEARCH-1011
1 parent cd97314 commit 3c7e37a

File tree

6 files changed

+81
-3
lines changed

6 files changed

+81
-3
lines changed

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
* Fix soft-deleted items being indexed into elasticsearch ([MSEARCH-1119](https://folio-org.atlassian.net/browse/MSEARCH-1119))
6868
* Add error handling on upload range processing ([MSEARCH-1151](https://folio-org.atlassian.net/browse/MSEARCH-1151))
6969
* Ignore shadow locations and location units while indexing domain events ([MSEARCH-1154](https://folio-org.atlassian.net/browse/MSEARCH-1154))
70+
* Change instance index folio_word_delimiter_graph to catenate_all, honor '*' in IsbnSearchTermProcessor ([MSEARCH-1011](https://folio-org.atlassian.net/browse/MSEARCH-1011))
7071

7172
### Tech Dept
7273
* Migrate to Opensearch 3.0.0 ([MSEARCH-1033](https://folio-org.atlassian.net/browse/MSEARCH-1033))

src/main/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessor.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.folio.search.cql.searchterm;
22

3+
import static org.folio.search.utils.SearchUtils.ASTERISKS_SIGN;
4+
35
import lombok.RequiredArgsConstructor;
46
import org.folio.search.service.setter.instance.IsbnProcessor;
57
import org.springframework.stereotype.Component;
@@ -12,6 +14,10 @@ public class IsbnSearchTermProcessor implements SearchTermProcessor {
1214

1315
@Override
1416
public String getSearchTerm(String inputTerm) {
15-
return String.join(" ", isbnProcessor.normalizeIsbn(inputTerm));
17+
var hasWildcard = inputTerm.endsWith(ASTERISKS_SIGN);
18+
var termToNormalize = hasWildcard ? inputTerm.substring(0, inputTerm.length() - 1) : inputTerm;
19+
var normalized = String.join(" ", isbnProcessor.normalizeIsbn(termToNormalize));
20+
21+
return hasWildcard ? normalized + ASTERISKS_SIGN : normalized;
1622
}
1723
}

src/main/resources/elasticsearch/index/instance.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"filter": {
1111
"folio_word_delimiter_graph": {
1212
"type": "word_delimiter_graph",
13-
"catenate_words": true
13+
"catenate_all": true
1414
}
1515
},
1616
"normalizer": {

src/test/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessorTest.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,20 @@ void getSearchTerm_positive_multipleValues() {
3636
var actual = isbnSearchTermProcessor.getSearchTerm(searchTerm);
3737
assertThat(actual).isEqualTo("1861972717 9781861972712 (paper)");
3838
}
39+
40+
@Test
41+
void getSearchTerm_withTrailingWildcard() {
42+
var searchTerm = "9781609383657*";
43+
when(isbnProcessor.normalizeIsbn("9781609383657")).thenReturn(List.of("9781609383657"));
44+
var actual = isbnSearchTermProcessor.getSearchTerm(searchTerm);
45+
assertThat(actual).isEqualTo("9781609383657*");
46+
}
47+
48+
@Test
49+
void getSearchTerm_withTrailingWildcard_isbn10() {
50+
var searchTerm = "047144250X*";
51+
when(isbnProcessor.normalizeIsbn("047144250X")).thenReturn(List.of("047144250x", "9780471442509"));
52+
var actual = isbnSearchTermProcessor.getSearchTerm(searchTerm);
53+
assertThat(actual).isEqualTo("047144250x 9780471442509*");
54+
}
3955
}

src/test/java/org/folio/search/service/setter/instance/IsbnProcessorTest.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,54 @@ void getFieldValue_negative_failedToLoadReferenceData() {
6060
assertThat(actual).isEmpty();
6161
}
6262

63+
@MethodSource("normalizeIsbnDataProvider")
64+
@DisplayName("normalizeIsbn_parameterized")
65+
@ParameterizedTest(name = "[{index}] input=''{0}'', expected={1}")
66+
void normalizeIsbn_parameterized(String input, List<String> expected) {
67+
var actual = isbnProcessor.normalizeIsbn(input);
68+
assertThat(actual).containsExactlyElementsOf(expected);
69+
}
70+
71+
@SuppressWarnings("checkstyle:MethodLength")
72+
private static Stream<Arguments> normalizeIsbnDataProvider() {
73+
return Stream.of(
74+
// Empty/whitespace cases
75+
arguments("", emptyList()),
76+
arguments(" ", emptyList()),
77+
78+
// Valid ISBN-10 (with valid checksum - converts to ISBN-13)
79+
arguments(" 1-86197-271-7 ", List.of("1861972717", "9781861972712")), // Covers trimming + formatting
80+
arguments("1 86197 271-7 (paper)", List.of("1861972717", "9781861972712", "(paper)")),
81+
82+
// Invalid ISBN-10 checksum (normalized only, no conversion)
83+
arguments("047144250X", List.of("047144250x")),
84+
arguments("047144250X (paper)", List.of("047144250x (paper)")),
85+
86+
// Invalid ISBN-10 format (non-standard spacing/hyphens)
87+
arguments("1-86-197 271-7", List.of("1861972717")), // Invalid spacing
88+
arguments("1 86197 2717 (paper)", List.of("1861972717 (paper)")),
89+
90+
// Valid ISBN-13
91+
arguments("9781609383657", List.of("9781609383657")),
92+
arguments("9790471442509", List.of("9790471442509")), // 979 prefix
93+
94+
// ISBN-13 with formatting variations
95+
arguments("978 0 471 44250 9", List.of("9780471442509")), // Multiple spaces
96+
arguments("978 0 471 44250 9 (alk. paper)", List.of("9780471442509", "(alk. paper)")),
97+
98+
// Invalid ISBN-13 (wrong prefix or malformed)
99+
arguments("89780471442509 (alk. paper)", List.of("89780471442509 (alk. paper)")),
100+
arguments("978-0 4712 442509 (alk. paper)", List.of("97804712442509 (alk. paper)")),
101+
102+
// Valid ISBN-10 with qualifier treated as extra text
103+
arguments("1861972717 extra text", List.of("1861972717", "9781861972712", "extra text")),
104+
105+
// Non-ISBN strings (normalized with char removal)
106+
arguments("ISBN 047144250X", List.of("isbn 047144250x")),
107+
arguments("1 2 3 4 5", List.of("12345"))
108+
);
109+
}
110+
63111
@SuppressWarnings("checkstyle:MethodLength")
64112
private static Stream<Arguments> isbnDataProvider() {
65113
return Stream.of(

src/test/resources/test-resources/instance-search-test-queries.csv

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,4 +393,11 @@ Case,Query,Value
393393
392,"issn = ""{value}""",0*-*x
394394
393,"issn = ""{value}""",*X
395395
394,"issn = ""{value}""",*x
396-
395,"issn = ""{value}""",0040-781*
396+
395,"issn = ""{value}""",0040-781*
397+
396,"isbn = ""{value}""",0471442*
398+
397,"isbn = ""{value}""",047144250X*
399+
398,"isbn == ""{value}""",047144250X
400+
399,"isbn == ""{value}""",047144250
401+
400,"isbn = ""{value}""",9781609383*
402+
401,"isbn = ""{value}""",9781609383657*
403+
402,"isbn == ""{value}""",9781609383657

0 commit comments

Comments
 (0)