Skip to content

Commit e70026e

Browse files
authored
Index in Nais Job (#516)
* Create klass-index-job * Add an application entrypoint * Running job * Synchronous indexing * Remove unnecessary injection * Working indexing in Docker Compose * Index each classification once * Async * Fix tests * Format code * Fix snapshot version * Nais manifest and deploy workflow * Explicitly close app * Block until futures complete * Add schedule * Add Backstage Component * Update README * Fix schedule expression * Set final workflow trigger * Use local timezone for schedule
1 parent 502dca3 commit e70026e

File tree

47 files changed

+625
-226
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+625
-226
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
name: Klass Index Job build and deploy
2+
3+
on:
4+
release:
5+
types: [ published ]
6+
push:
7+
branches:
8+
- main
9+
paths:
10+
- "klass-index-job/**"
11+
- "klass-shared/**"
12+
- ".nais/**/klass-index-job.yaml"
13+
- ".github/workflows/klass-index-job-build-and-deploy.yaml"
14+
workflow_dispatch:
15+
inputs:
16+
cluster:
17+
description: "Which cluster to deploy to?"
18+
required: true
19+
default: "test"
20+
type: choice
21+
options:
22+
- test
23+
- prod
24+
25+
jobs:
26+
docker-build:
27+
name: Docker Build
28+
permissions:
29+
contents: "read"
30+
id-token: "write"
31+
packages: "read"
32+
runs-on: ubuntu-latest
33+
outputs:
34+
image: ${{ steps.docker-build-push.outputs.image }}
35+
telemetry: ${{ steps.docker-build-push.outputs.telemetry }}
36+
prod-config-changed: ${{ steps.prod-config-changed.outputs.changed != 'non-inputs' }}
37+
test-config-changed: ${{ steps.test-config-changed.outputs.changed != 'non-inputs' }}
38+
only-config-changed: ${{ steps.changed-files.outputs.changed == 'only-inputs'}}
39+
steps:
40+
- name: Checkout
41+
uses: actions/checkout@v6
42+
with:
43+
fetch-depth: 0
44+
- uses: actions/setup-java@v4
45+
with:
46+
distribution: "temurin"
47+
java-version: "17"
48+
cache: "maven"
49+
50+
- name: Check what changed
51+
id: changed-files
52+
uses: "nais/what-changed@main"
53+
with:
54+
files: .nais/**/klass-index-job.yaml
55+
- name: Check for test config changes
56+
id: test-config-changed
57+
uses: "nais/what-changed@main"
58+
with:
59+
files: .nais/test/klass-index-job.yaml
60+
- name: Check for prod config changes
61+
id: prod-config-changed
62+
uses: "nais/what-changed@main"
63+
with:
64+
files: .nais/prod/klass-index-job.yaml
65+
66+
- name: Build with Maven
67+
if: steps.changed-files.outputs.changed != 'only-inputs' || github.event.inputs.cluster == 'test' || github.event.inputs.cluster == 'prod'
68+
run: mvn --batch-mode --update-snapshots install
69+
working-directory: klass-index-job
70+
env:
71+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
72+
73+
- name: Build and push
74+
if: steps.changed-files.outputs.changed != 'only-inputs' || github.event.inputs.cluster == 'test' || github.event.inputs.cluster == 'prod'
75+
uses: nais/docker-build-push@v0
76+
id: docker-build-push
77+
with:
78+
team: dapla-metadata
79+
dockerfile: klass-index-job/Dockerfile
80+
docker_context: klass-index-job
81+
image_suffix: index-job
82+
83+
deploy-test:
84+
name: Deploy to test
85+
needs: docker-build
86+
permissions:
87+
contents: "read"
88+
id-token: "write"
89+
packages: "read"
90+
runs-on: ubuntu-latest
91+
if: github.event_name != 'release' || github.event.inputs.cluster == 'test' || needs.docker-build.outputs.test-config-changed == 'true'
92+
steps:
93+
- name: Checkout
94+
uses: actions/checkout@v6
95+
96+
- uses: nais/deploy/actions/deploy@v2
97+
env:
98+
CLUSTER: test
99+
RESOURCE: .nais/test/klass-index-job.yaml
100+
DEPLOY_SERVER: deploy.ssb.cloud.nais.io:443
101+
WORKLOAD_IMAGE: ${{ needs.docker-build.outputs.image }}
102+
TELEMETRY: ${{ needs.docker-build.outputs.telemetry }}
103+
104+
deploy-prod:
105+
name: Deploy to prod
106+
needs: docker-build
107+
permissions:
108+
contents: "read"
109+
id-token: "write"
110+
packages: "read"
111+
runs-on: ubuntu-latest
112+
if: github.event_name == 'release' || github.event.inputs.cluster == 'prod' || (needs.docker-build.outputs.prod-config-changed == 'true' && needs.docker-build.outputs.only-config-changed == 'true')
113+
steps:
114+
- name: Checkout
115+
uses: actions/checkout@v6
116+
117+
- uses: nais/deploy/actions/deploy@v2
118+
env:
119+
CLUSTER: prod
120+
RESOURCE: .nais/prod/klass-index-job.yaml
121+
DEPLOY_SERVER: deploy.ssb.cloud.nais.io:443
122+
WORKLOAD_IMAGE: ${{ needs.docker-build.outputs.image }}
123+
TELEMETRY: ${{ needs.docker-build.outputs.telemetry }}

.nais/test/klass-index-job.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
apiVersion: nais.io/v1
2+
kind: Naisjob
3+
metadata:
4+
name: klass-index-job
5+
namespace: dapla-metadata
6+
labels:
7+
team: dapla-metadata
8+
shared-db: "true"
9+
spec:
10+
schedule: "30 23 * * *" # Run once per day at 23:30
11+
timeZone: Europe/Oslo
12+
concurrencyPolicy: Forbid
13+
restartPolicy: Never
14+
failedJobsHistoryLimit: 3
15+
successfulJobsHistoryLimit: 3
16+
openSearch:
17+
access: admin
18+
instance: klass-search
19+
resources:
20+
requests:
21+
cpu: 200m
22+
memory: 1920Mi
23+
limits:
24+
memory: 1920Mi
25+
env:
26+
- name: SPRING_PROFILES_ACTIVE
27+
value: postgres, remote-open-search
28+
- name: OPENSEARCH_USERNAME
29+
value: ${OPEN_SEARCH_USERNAME}
30+
- name: OPENSEARCH_URL
31+
value: ${OPEN_SEARCH_URI}
32+
- name: OPENSEARCH_PASSWORD
33+
value: ${OPEN_SEARCH_PASSWORD}
34+
# Structured logging in cluster
35+
- name: LOGGING_STRUCTURED_FORMAT_CONSOLE
36+
value: logstash
37+
- name: JAVA_TOOL_OPTIONS
38+
value: "-XX:InitialRAMPercentage=75.0 -XX:MaxRAMPercentage=75.0"
39+
envFrom:
40+
- secret: google-sql-klass
41+
filesFrom:
42+
- mountPath: /var/run/secrets/nais.io/sqlcertificate
43+
secret: sqeletor-klass-827ec8ec

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,11 @@ check-klass-api-open-search-docker:
136136
.PHONY: logs-klass-api-open-search
137137
logs-klass-api-open-search:
138138
docker compose $(COMPOSE_FILE) --profile open-search logs --tail=100 -f
139+
140+
.PHONY: start-klass-index-job-docker
141+
start-klass-index-job-docker:
142+
docker compose $(COMPOSE_FILE) --profile index up --build
143+
144+
.PHONY: stop-docker
145+
stop-docker:
146+
docker compose $(COMPOSE_FILE) down

README.md

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,39 @@
1-
[![Maintainability](https://api.codeclimate.com/v1/badges/34eed0d4c7e9abd16add/maintainability)](https://codeclimate.com/github/statisticsnorway/klass/maintainability)
2-
[![CodeQL](https://github.com/statisticsnorway/klass/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/statisticsnorway/klass/actions/workflows/codeql-analysis.yml)
1+
# Klass
32

4-
# KLASS
3+
Klass is Statistics Norway's system for classifications and code lists. The data model is based on the structure and principles described by [GSIM](https://statswiki.unece.org/spaces/gsim/pages/97356506/1_Introduction).
54

6-
Spring Boot applications that handles classifications for SSB.
7-
Klass provides a REST api that clients can use to read classifications, and a Vaadin frontend for maintaining classifications.
5+
The information in Klass is exposed through a REST API, available to all, free of charge under the [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/deed.no) license. The API documentation is available in multiple flavours:
6+
7+
- API Guide: <https://data.ssb.no/api/klass/v1/api-guide.html>
8+
- Swagger UI: <https://data.ssb.no/api/klass/swagger-ui/index.html>
9+
- OpenAPI spec: <https://data.ssb.no/api/klass/v3/api-docs>
810

911
## Overview
1012

1113
Klass consists of 4 maven modules
1214

1315
- Klass API (Standalone application that provides the Klass API)
14-
- Klass Forvaltning (Frontend for classification maintaining)
16+
- Klass Forvaltning (Internal tool for maintenance of classifications)
1517
- Klass Shared (Classes shared between API and Forvaltning. primary database and search components)
1618
- Klass Solr (Solr Core configuration and configuration for embedded solr for test/development)
19+
- Klass Index Job (Responsible for periodically updating the OpenSearch index)
1720

18-
## Build and Deploy
21+
## Build
1922

20-
Building the project will output war files for **Klass API** & **Klass Forvaltning** and a zip file (WiP) for **Klass Solr**.
23+
Run `mvn install` to build the project.
2124

22-
You can find these in each maven modules target folder.
25+
## Deploy
2326

24-
```
25-
klass-api/target/klass-api-{Version}.war
26-
klass-forvaltning/target/klass-forvaltning-{Version}.war
27-
klass-solr/klass-solr-{Version}.zip (WiP)
28-
```
27+
Klass is hosted on the Nais application platform. Deploy configuration may be found in the [.nais](.nais) directory. Deploy workflows may be found in the [.github/workflows](.github/workflows) directory.
2928

3029
## Database
3130

32-
Klass is configured to use Flyway for database initialising and migration.
33-
You can find the collection of SQL scripts in the Klass-shared module under `src/main/resources/db/migration`
31+
Klass uses PostgreSQL for its database.
3432

35-
If the classification tables are empty Klass will by default attempt to import data from its predecessor.
36-
This process can take quite some time as there is a lot of data and its also sent to Solr to populate the search index.
33+
Tests are run with the Zonky Postgres embedded database so that they use the PostgreSQL dialect and guarantee consistency with deployed environments.
3734

38-
Tips: If you are only setting up Klass for testing/development purposes you can use the `small-import` spring profile to reduce the amount of data being imported.
35+
Klass is configured to use Flyway for database initialization and migration.
36+
You can find the collection of SQL scripts in the Klass-shared module under `src/main/resources/db/migration`
3937

4038
## Development
4139

@@ -119,7 +117,9 @@ Build the app: `make build-klass-api`
119117

120118
#### Docker compose
121119

122-
TODO
120+
The apps can be run in multiple different configurations with Docker Compose. See [klass-shared/docker-compose.yaml](klass-shared/docker-compose.yaml) for details.
121+
122+
There are tasks available in the Makefile as well.
123123

124124
### Klass Forvaltning
125125

backstage.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,27 @@ spec:
141141
system: metadata
142142
owner: dapla-metadata-developers
143143
lifecycle: production
144+
---
145+
apiVersion: backstage.io/v1alpha1
146+
kind: Component
147+
metadata:
148+
title: Klass Index Job
149+
name: klass-index-job
150+
description:
151+
Independent job which creates and updates the search index for Klass.
152+
tags:
153+
- metadata
154+
- java
155+
- classification
156+
- codelist
157+
- nais
158+
annotations:
159+
github.com/project-slug: statisticsnorway/klass
160+
spec:
161+
type: job
162+
owner: dapla-metadata-developers
163+
lifecycle: production
164+
system: metadata
165+
dependsOn:
166+
- resource:klass-postgresql
167+
- resource:klass-opensearch

klass-api/src/main/java/no/ssb/klass/KlassApiApplication.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import org.springframework.boot.web.servlet.ServletComponentScan;
2222
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
2323
import org.springframework.context.annotation.Import;
24-
import org.springframework.scheduling.annotation.EnableScheduling;
2524

2625
@OpenAPIDefinition(
2726
info =
@@ -94,7 +93,6 @@ Get codes from a classification. A range is specified when requesting the codes,
9493
@Import(TomcatServletWebServerFactoryCustomizer.class)
9594
@ConfigurationPropertiesScan
9695
@ServletComponentScan
97-
@EnableScheduling
9896
public class KlassApiApplication extends SpringBootServletInitializer {
9997

10098
@Override

klass-api/src/main/java/no/ssb/klass/api/config/OpenSearchConfig.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ public class OpenSearchConfig extends AbstractOpenSearchConfiguration {
3030
private boolean ssl;
3131

3232
// Constant for the stemmer
33-
public static final String NORWEGIAN_STEMMER_ANALYZER = "norwegian_stemmer_analyzer";
34-
3533
@Override
3634
@Bean(destroyMethod = "close")
3735
public RestHighLevelClient opensearchClient() {

klass-api/src/main/java/no/ssb/klass/api/services/ReIndexService.java

Lines changed: 0 additions & 48 deletions
This file was deleted.

0 commit comments

Comments
 (0)