Skip to content

Commit e148d46

Browse files
authored
Improving and tuning Klass Search (#418)
* add make commands for easier debugging open search * add tests and testdata * add tests and testdata - correct tests * add tests and testdata - correct tests * add tests and testdata - correct tests * add tests and testdata - correct tests * use http locally * add more memory * log all * test improvements * add more time * add search constants * add test cases * add new matcher for title - remove fuzzy for description and codes * search results seems slightly improved * add comment * not display copyrighted in search * correct test and improve * linting and updating after merge main * format * update time and properties * fix codes causing exceptions * solve issue resulting in exception * final search query * reset make command * linting * update comment * remove unused
1 parent 0896874 commit e148d46

22 files changed

+451
-224
lines changed

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,11 @@ start-klass-api-open-search-docker:
128128
.PHONY: stop-klass-api-open-search-docker
129129
stop-klass-api-open-search-docker:
130130
docker compose $(COMPOSE_FILE) --profile open-search down -v
131+
132+
.PHONY: check-klass-api-open-search-docker
133+
check-klass-api-open-search-docker:
134+
docker compose $(COMPOSE_FILE) --profile open-search ps
135+
136+
.PHONY: logs-klass-api-open-search
137+
logs-klass-api-open-search:
138+
docker compose $(COMPOSE_FILE) --profile open-search logs --tail=100 -f

klass-api/src/main/java/no/ssb/klass/api/config/OpenSearchConfig.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ public RestHighLevelClient opensearchClient() {
4141
.connectedTo(opensearchUri.replace("https://", ""))
4242
.usingSsl()
4343
.withBasicAuth(username, password)
44-
.withConnectTimeout(Duration.ofSeconds(10))
45-
.withSocketTimeout(Duration.ofSeconds(5))
44+
.withConnectTimeout(Duration.ofSeconds(15))
45+
.withSocketTimeout(Duration.ofSeconds(60))
4646
: ClientConfiguration.builder()
4747
.connectedTo(
4848
opensearchUri
4949
.replace("https://", "")
5050
.replace("http://", ""))
51-
.withConnectTimeout(Duration.ofSeconds(10))
52-
.withSocketTimeout(Duration.ofSeconds(5)))
51+
.withConnectTimeout(Duration.ofSeconds(15))
52+
.withSocketTimeout(Duration.ofSeconds(60)))
5353
.build();
5454

5555
return RestClients.create(clientConfiguration).rest();

klass-api/src/main/java/no/ssb/klass/api/services/IndexService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
public interface IndexService {
99

10+
void createIndexWithStemmingAnalyzer();
11+
1012
/**
1113
* Indexes a classification and makes it searchable.
1214
*

klass-api/src/main/java/no/ssb/klass/api/services/IndexServiceImpl.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ private IndexCoordinates getIndexCoordinates() {
7575
return IndexCoordinates.of(elasticsearchIndex);
7676
}
7777

78+
@Override
7879
@PostConstruct
79-
private void createIndexWithStemmingAnalyzer() {
80+
public void createIndexWithStemmingAnalyzer() {
8081

8182
try {
8283
var indexOps = elasticsearchOperations.indexOps(getIndexCoordinates());

klass-api/src/main/java/no/ssb/klass/api/services/PublicSearchQuery.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import no.ssb.klass.core.model.ClassificationType;
44
import no.ssb.klass.core.model.Language;
55

6+
import org.opensearch.common.unit.Fuzziness;
67
import org.opensearch.data.client.orhlc.NativeSearchQueryBuilder;
78
import org.opensearch.index.query.BoolQueryBuilder;
89
import org.opensearch.index.query.Operator;
@@ -37,16 +38,31 @@ public static org.springframework.data.elasticsearch.core.query.Query build(
3738
if (filterOnSection != null) {
3839
filterBuilder.must(QueryBuilders.termQuery("section", filterOnSection));
3940
}
41+
// Do not display copyrighted in search
42+
filterBuilder.mustNot(QueryBuilders.termQuery("copyrighted", true));
4043

44+
// Building a query of multiple conditions
45+
// Boosting match on title
46+
// Adding fuzziness on title to accept incomplete search words
4147
BoolQueryBuilder finalQuery =
4248
QueryBuilders.boolQuery()
43-
.must(
44-
QueryBuilders.queryStringQuery(query)
45-
.field("title", 3.0f)
49+
// 'matchPhrasePrefixQuery' will match on 'kommune' in 'kommuneinndeling'
50+
.should(QueryBuilders.matchPhrasePrefixQuery("title", query).boost(10.0f))
51+
// will match "kommun", "kommune" with "kommuner"
52+
.should(
53+
QueryBuilders.matchQuery("title", query)
54+
.fuzziness(Fuzziness.fromEdits(1))
55+
.prefixLength(2)
56+
.maxExpansions(30)
57+
.boost(5.0f))
58+
.should(
59+
QueryBuilders.multiMatchQuery(query)
4660
.field("description", 2.0f)
47-
.field("codes", 1.0f)
48-
.defaultOperator(Operator.OR))
49-
.filter(filterBuilder);
61+
.field("codes", 0.5f)
62+
.operator(Operator.OR)
63+
.boost(2.0f))
64+
.filter(filterBuilder)
65+
.minimumShouldMatch(1);
5066

5167
NativeSearchQueryBuilder nativeQueryBuilder =
5268
new NativeSearchQueryBuilder()
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
klass.env.search.elasticsearch.index=klass
2-
opensearch.url=https://opensearch:9200
3-
opensearch.username=admin
4-
opensearch.password=admin
5-
2+
opensearch.url=http://opensearch:9200
3+
opensearch.username=
4+
opensearch.password=
65
opensearch.ssl=false

klass-api/src/test/java/no/ssb/klass/api/applicationtest/AbstractRestApiApplicationTest.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,13 @@ public abstract class AbstractRestApiApplicationTest {
9494
public static final String REQUEST_WITH_ID_AND_CHANGES =
9595
REQUEST + "/{classificationId}/changes";
9696

97+
public static final String QUERY = "query";
98+
public static final String INCLUDE_CODE_LISTS = "includeCodelists";
99+
public static final String TRUE = "true";
97100
public static final String JSON_SEARCH_RESULTS = "_embedded.searchResults";
98101
public static final String JSON_SEARCH_RESULT1 = "_embedded.searchResults[0]";
99102
public static final String JSON_SEARCH_RESULT2 = "_embedded.searchResults[1]";
103+
public static final String JSON_SEARCH_RESULT3 = "_embedded.searchResults[2]";
100104

101105
public static final String XML_SEARCH_RESULTS = "PagedResources.contents.content";
102106
public static final String XML_SEARCH_RESULT1 = "PagedResources.contents.content[0]";
@@ -107,12 +111,14 @@ public abstract class AbstractRestApiApplicationTest {
107111
public static final String JSON_CLASSIFICATION2 = "_embedded.classifications[1]";
108112
public static final String JSON_CLASSIFICATION3 = "_embedded.classifications[2]";
109113
public static final String JSON_CLASSIFICATION4 = "_embedded.classifications[3]";
114+
public static final String JSON_CLASSIFICATION5 = "_embedded.classifications[4]";
110115

111116
public static final String XML_CLASSIFICATIONS = "PagedResources.contents.content";
112117
public static final String XML_CLASSIFICATION1 = "PagedResources.contents.content[0]";
113118
public static final String XML_CLASSIFICATION2 = "PagedResources.contents.content[1]";
114119
public static final String XML_CLASSIFICATION3 = "PagedResources.contents.content[2]";
115120
public static final String XML_CLASSIFICATION4 = "PagedResources.contents.content[3]";
121+
public static final String XML_CLASSIFICATION5 = "PagedResources.contents.content[4]";
116122

117123
public static final String JSON_PAGE = "page";
118124
public static final String XML_PAGE = "PagedResources.page";
@@ -144,6 +150,9 @@ public abstract class AbstractRestApiApplicationTest {
144150
protected ClassificationFamily classificationFamily;
145151
protected CorrespondenceTable correspondenceTable;
146152
protected ClassificationSeries badmintonCodelist;
153+
protected ClassificationSeries badminton;
154+
protected ClassificationSeries sport;
155+
protected ClassificationSeries icd;
147156

148157
@Autowired protected ApplicationTestUtil applicationTestUtil;
149158

@@ -181,12 +190,25 @@ public void prepareTestData() {
181190
classificationFamily.addClassificationSeries(familieGrupperingCodelist);
182191
classificationService.saveAndIndexClassification(familieGrupperingCodelist);
183192

184-
badmintonCodelist = TestDataProvider.createBadmintonCodelist(user, user2, user3);
193+
badmintonCodelist = TestDataProvider.createBadmintonCodeList(user, user2, user3);
185194
classificationFamily.addClassificationSeries(badmintonCodelist);
186195
classificationService.saveAndIndexClassification(badmintonCodelist);
187196

197+
badminton = TestDataProvider.createBadmintonClassification(user);
198+
badminton.setContactPerson(user);
199+
classificationFamily.addClassificationSeries(badminton);
200+
badminton = classificationService.saveAndIndexClassification(badminton);
201+
202+
sport = TestDataProvider.createSportClassification(user);
203+
sport.setContactPerson(user);
204+
classificationFamily.addClassificationSeries(sport);
205+
sport = classificationService.saveAndIndexClassification(sport);
206+
188207
kommuneinndeling = classificationService.saveAndIndexClassification(kommuneinndeling);
189208
bydelsinndeling = classificationService.saveAndIndexClassification(bydelsinndeling);
209+
icd = TestDataProvider.createCopyrightedCodeList(user);
210+
classificationFamily.addClassificationSeries(icd);
211+
icd = classificationService.saveAndIndexClassification(icd);
190212

191213
correspondenceTable =
192214
TestDataProvider.createAndAddCorrespondenceTable(kommuneinndeling, bydelsinndeling);

klass-api/src/test/java/no/ssb/klass/api/applicationtest/RestApiClassificationFamilyIntegrationTest.java

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
import org.junit.jupiter.api.Test;
1212
import org.springframework.http.HttpStatus;
1313

14-
public class RestApiClassificationFamilyIntegrationTest extends AbstractRestApiApplicationTest {
14+
class RestApiClassificationFamilyIntegrationTest extends AbstractRestApiApplicationTest {
1515
@Test
16-
public void restServiceReturnClassificationFamily() {
16+
void restServiceReturnClassificationFamily() {
1717
String urlParts = REQUEST_CLASSIFICATION_FAMILY + "/" + classificationFamily.getId();
1818

1919
given().port(port)
@@ -43,7 +43,7 @@ public void restServiceReturnClassificationFamily() {
4343
}
4444

4545
@Test
46-
public void restServiceClassificationFamilyFiltersClassificationType() {
46+
void restServiceClassificationFamilyFiltersClassificationType() {
4747
String urlParts = REQUEST_CLASSIFICATION_FAMILY + "/" + classificationFamily.getId();
4848

4949
given().port(port)
@@ -60,21 +60,24 @@ public void restServiceClassificationFamilyFiltersClassificationType() {
6060
.assertThat()
6161
.body("name", equalTo(classificationFamily.getName()))
6262
// classifications
63+
// minus 1 because there is one copyrighted in classifications test data,
64+
// but copyrighted classifications are only available with id
6365
.assertThat()
6466
.body(
6567
"classifications.size()",
6668
equalTo(
6769
classificationFamily
68-
.getClassificationSeriesBySectionAndClassificationType(
69-
null, null)
70-
.size()))
70+
.getClassificationSeriesBySectionAndClassificationType(
71+
null, null)
72+
.size()
73+
- 1))
7174

7275
// links
7376
.body(JSON_LINKS + ".self.href", containsString(urlParts));
7477
}
7578

7679
@Test
77-
public void restServiceClassificationFamilyFiltersSsbSection() {
80+
void restServiceClassificationFamilyFiltersSsbSection() {
7881
final String ssbSection = "unknown section";
7982
String urlParts = REQUEST_CLASSIFICATION_FAMILY + "/" + classificationFamily.getId();
8083

@@ -100,7 +103,7 @@ public void restServiceClassificationFamilyFiltersSsbSection() {
100103
}
101104

102105
@Test
103-
public void restServiceClassificationFamilyFiltersSsbSectionKnownSection() {
106+
void restServiceClassificationFamilyFiltersSsbSectionKnownSection() {
104107
final String ssbSection = "section";
105108
String urlParts = REQUEST_CLASSIFICATION_FAMILY + "/" + classificationFamily.getId();
106109

@@ -116,12 +119,12 @@ public void restServiceClassificationFamilyFiltersSsbSectionKnownSection() {
116119
.assertThat()
117120
.body("name", equalTo(classificationFamily.getName()))
118121
.assertThat()
119-
.body("classifications.size()", equalTo(2))
122+
.body("classifications.size()", equalTo(4))
120123
.body(JSON_LINKS + ".self.href", containsString(urlParts));
121124
}
122125

123126
@Test
124-
public void restServiceClassificationFamilyFiltersSsbSectionKnownSectionFullName() {
127+
void restServiceClassificationFamilyFiltersSsbSectionKnownSectionFullName() {
125128
final String ssbSection = "section - section name";
126129
String urlParts = REQUEST_CLASSIFICATION_FAMILY + "/" + classificationFamily.getId();
127130

@@ -137,7 +140,7 @@ public void restServiceClassificationFamilyFiltersSsbSectionKnownSectionFullName
137140
.assertThat()
138141
.body("name", equalTo(classificationFamily.getName()))
139142
.assertThat()
140-
.body("classifications.size()", equalTo(2))
143+
.body("classifications.size()", equalTo(4))
141144
.body(JSON_LINKS + ".self.href", containsString(urlParts));
142145
}
143146
}

klass-api/src/test/java/no/ssb/klass/api/applicationtest/RestApiListClassificationFamilyIntegrationTest.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
import org.springframework.http.HttpStatus;
1313

1414
/** Testsuite that test the list (all) classificationFamilies */
15-
public class RestApiListClassificationFamilyIntegrationTest extends AbstractRestApiApplicationTest {
15+
class RestApiListClassificationFamilyIntegrationTest extends AbstractRestApiApplicationTest {
1616
@Test
17-
public void restServiceListClassificationFamilies() {
17+
void restServiceListClassificationFamilies() {
1818
given().port(port)
1919
.accept(ContentType.JSON)
2020
.get(REQUEST_CLASSIFICATION_FAMILY)
@@ -38,7 +38,7 @@ public void restServiceListClassificationFamilies() {
3838
}
3939

4040
@Test
41-
public void restServiceListClassificationFamiliesFiltersClassificationType() {
41+
void restServiceListClassificationFamiliesFiltersClassificationType() {
4242
given().port(port)
4343
.accept(ContentType.JSON)
4444
.param("includeCodelists", "true")
@@ -51,19 +51,25 @@ public void restServiceListClassificationFamiliesFiltersClassificationType() {
5151
.body(
5252
"_embedded.classificationFamilies[0].name",
5353
equalTo(classificationFamily.getName()))
54+
.body(
55+
"_embedded.classificationFamilies[0].name",
56+
equalTo(classificationFamily.getName()))
57+
// minus 1 because there is one copyrighted in classifications test data,
58+
// but copyrighted classifications are only available with id
5459
.body(
5560
"_embedded.classificationFamilies[0].numberOfClassifications",
5661
equalTo(
5762
classificationFamily
58-
.getClassificationSeriesBySectionAndClassificationType(
59-
null, null)
60-
.size()))
63+
.getClassificationSeriesBySectionAndClassificationType(
64+
null, null)
65+
.size()
66+
- 1))
6167
// links
6268
.body(JSON_LINKS + ".self.href", containsString(REQUEST_CLASSIFICATION_FAMILY));
6369
}
6470

6571
@Test
66-
public void restServiceListClassificationFamiliesFiltersSsbSection() {
72+
void restServiceListClassificationFamiliesFiltersSsbSection() {
6773
final String ssbSection = "section";
6874

6975
given().port(port)

0 commit comments

Comments
 (0)