Skip to content

Commit 71a0f36

Browse files
authored
Merge pull request #77 from cofacts/feature/upgrade-to-elasticsearch-v9
Upgrade to Elasticsearch 9.2 and Node 24
2 parents 38d3106 + c303b93 commit 71a0f36

11 files changed

Lines changed: 6406 additions & 12891 deletions

File tree

.github/workflows/ci.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
submodules: true
1717
- uses: actions/setup-node@v4
1818
with:
19-
node-version: '18'
19+
node-version: '24'
2020
cache: 'npm'
2121
- run: npm ci
2222
- run: npm run lint
@@ -26,7 +26,10 @@ jobs:
2626
runs-on: ubuntu-latest
2727
services:
2828
rumors-test-db:
29-
image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.3.2
29+
image: elasticsearch:9.2.2
30+
env:
31+
discovery.type: single-node
32+
xpack.security.enabled: 'false'
3033
ports:
3134
- 62223:9200
3235
steps:
@@ -35,7 +38,7 @@ jobs:
3538
submodules: true
3639
- uses: actions/setup-node@v4
3740
with:
38-
node-version: '18'
41+
node-version: '24'
3942
cache: 'npm'
4043
- run: npm ci
4144
- name: Test if schema loads

db/clear.js

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,41 @@
11
import 'dotenv/config';
22
import '../util/catchUnhandledRejection';
3-
import elasticsearch from '@elastic/elasticsearch';
3+
import { Client } from '@elastic/elasticsearch';
44

5-
const client = new elasticsearch.Client({
5+
const client = new Client({
66
node: process.env.ELASTICSEARCH_URL,
77
});
88

9-
client.indices.delete({ index: '_all' }).then(() => {
10-
console.log(`All indices has been deleted.`);
11-
});
9+
async function clearAllIndices() {
10+
try {
11+
// First, get all indices (ES client v8+ may return body directly)
12+
const raw = await client.cat.indices({ format: 'json' });
13+
const indices = Array.isArray(raw) ? raw : [];
14+
15+
if (!indices || indices.length === 0) {
16+
console.log('No indices to delete.');
17+
return;
18+
}
19+
20+
// Extract index names (exclude system indices starting with .)
21+
const indexNames = indices
22+
.map((index) => index.index)
23+
.filter((name) => !name.startsWith('.'));
24+
25+
if (indexNames.length === 0) {
26+
console.log('No user indices to delete.');
27+
return;
28+
}
29+
30+
// Delete all indices explicitly
31+
await client.indices.delete({ index: indexNames });
32+
console.log(
33+
`Deleted ${indexNames.length} indices: ${indexNames.join(', ')}`
34+
);
35+
} catch (error) {
36+
console.error('Error clearing indices:', error);
37+
throw error;
38+
}
39+
}
40+
41+
clearAllIndices();

db/loadSchema.js

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
/*eslint import/namespace: ['error', { allowComputed: true }]*/
22
import 'dotenv/config';
33
import '../util/catchUnhandledRejection';
4-
import elasticsearch from '@elastic/elasticsearch';
4+
import { Client } from '@elastic/elasticsearch';
55
import getIndexName from '../util/getIndexName';
66
import indexSetting from '../util/indexSetting';
77

88
import * as schema from '../schema';
99

10-
const client = new elasticsearch.Client({
10+
const client = new Client({
1111
node: process.env.ELASTICSEARCH_URL,
1212
});
1313

@@ -28,12 +28,10 @@ async function loadSchema() {
2828
try {
2929
await client.indices.create({
3030
index: indexName,
31-
body: {
32-
settings: indexSetting,
33-
mappings: { doc: schema[index] },
34-
aliases: {
35-
[index]: {},
36-
},
31+
settings: indexSetting,
32+
mappings: schema[index],
33+
aliases: {
34+
[index]: {},
3735
},
3836
});
3937
} catch (e) {

db/loadSeed.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@ const client = new Client({
99
});
1010

1111
async function loadSeeds(seedMap: Record<string, unknown[]>) {
12-
const body: unknown[] = [];
12+
const operations: unknown[] = [];
1313
Object.keys(seedMap).forEach((key) => {
1414
const fixtures = seedMap[key];
1515
fixtures.forEach((fixture, idx) => {
16-
body.push({
17-
index: { _index: key, _type: 'doc', _id: `${key}-${idx + 1}` },
16+
operations.push({
17+
index: { _index: key, _id: `${key}-${idx + 1}` },
1818
});
19-
body.push(fixture);
19+
operations.push(fixture);
2020
});
2121
});
2222

23-
const { body: result } = await client.bulk({ body, refresh: 'true' });
23+
const result = await client.bulk({ operations, refresh: 'true' });
2424
const resultStr = JSON.stringify(result, null, ' ');
2525
if (result.errors) {
2626
throw new Error(`[loadSeed] Seed load failed ${resultStr}`);
@@ -40,6 +40,6 @@ loadSeeds(
4040
).catch((e) => {
4141
console.error(e);
4242
// Elasticsearch transport.js errors
43-
if (e?.meta?.body) console.error(e.meta.body);
43+
if (e && typeof e === 'object' && 'body' in e) console.error((e as any).body);
4444
process.exit(1);
4545
});
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* Creates destination indices with -v9 suffix and correct mappings for Reindex V6 -> V9.
3+
* Run this before running reindex in migration.sh so that string fields (status, userId, etc.)
4+
* are keyword, not text.
5+
*
6+
* How to use:
7+
* 1. Set ELASTICSEARCH_URL in .env to your V9 cluster (e.g. http://localhost:62223)
8+
* 2. Run: npx babel-node db/migrations/202602-create-v9-indices.js
9+
* Or just: ELASTICSEARCH_URL="http://localhost:62223" npx babel-node --extensions .ts,.js db/migrations/202602-create-v9-indices.js
10+
*/
11+
12+
/* eslint import/namespace: ['error', { allowComputed: true }] */
13+
import 'dotenv/config';
14+
import '../../util/catchUnhandledRejection';
15+
import { Client } from '@elastic/elasticsearch';
16+
import getIndexName from '../../util/getIndexName';
17+
import indexSetting from '../../util/indexSetting';
18+
19+
import * as schema from '../../schema';
20+
21+
const client = new Client({
22+
node: process.env.ELASTICSEARCH_URL,
23+
});
24+
25+
async function createV9Indices() {
26+
const schemaNames = Object.keys(schema).filter(
27+
(k) => typeof schema[k] === 'object' && schema[k].properties
28+
);
29+
for (const index of schemaNames) {
30+
const baseName = getIndexName(index);
31+
const indexName = `${baseName}-v9`;
32+
try {
33+
const exists = await client.indices.exists({ index: indexName });
34+
if (exists) {
35+
const aliasRes = await client.indices
36+
.getAlias({ name: index })
37+
.catch(() => ({}));
38+
const aliasMapping = aliasRes || {};
39+
const indicesWithAlias =
40+
typeof aliasMapping === 'object' && aliasMapping !== null
41+
? Object.keys(aliasMapping)
42+
: [];
43+
const actions = [];
44+
indicesWithAlias.forEach((idx) => {
45+
if (idx !== indexName)
46+
actions.push({ remove: { index: idx, alias: index } });
47+
});
48+
if (!indicesWithAlias.includes(indexName)) {
49+
actions.push({ add: { index: indexName, alias: index } });
50+
}
51+
if (actions.length > 0) {
52+
await client.indices.updateAliases({ actions });
53+
console.log(`Index "${indexName}" alias "${index}" updated`);
54+
} else {
55+
console.log(
56+
`Index "${indexName}" already exists with alias "${index}", skip`
57+
);
58+
}
59+
continue;
60+
}
61+
await client.indices.create({
62+
index: indexName,
63+
settings: indexSetting,
64+
mappings: schema[index],
65+
aliases: { [index]: {} },
66+
});
67+
console.log(
68+
`Index "${indexName}" created with mappings and alias "${index}"`
69+
);
70+
} catch (e) {
71+
console.error(`Error creating index "${indexName}"`, e);
72+
throw e;
73+
}
74+
}
75+
}
76+
77+
createV9Indices().catch((e) => {
78+
console.error('[createV9Indices]', e);
79+
process.exit(1);
80+
});
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/bin/bash
2+
#
3+
# Migrates data from Elasticsearch V6 to V9 via remote reindex.
4+
#
5+
# Usage:
6+
# SOURCE_HOST="http://db:9200" DEST_HOST="localhost:62223" ./migration.sh
7+
#
8+
# Environment variables:
9+
# SOURCE_HOST - V6 ES URL (default: http://db:9200)
10+
# DEST_HOST - V9 ES host:port without protocol (default: localhost:62223)
11+
#
12+
# Re-run (已做過一次 migration 後想再跑一次):
13+
# - 若只要修正 alias:直接執行 ./migration.sh,Step 1 會略過已存在的 index 並更新 alias。
14+
# - 若要從 V6 重新完整 reindex:Reindex 使用 op_type: create,已存在的 doc 會被略過,
15+
# 所以需先刪除 V9 的目標 index,再執行本 script。
16+
# 刪除範例(請將 DEST 換成 DEST_HOST,例如 http://localhost:62223):
17+
#
18+
# DEST="http://localhost:62223"
19+
# for idx in articlereplyfeedbacks_v1_2_1 urls_v1_1_1 tags_v1_0_2 categories_v1_1_1 airesponses_v1_0_1 replies_v1_1_1 articlecategoryfeedbacks_v1_1_2 analytics_v1_2_1 badges_v1_0_0 users_v1_2_3 replyrequests_v1_1_2 ydocs_v1_0_2 articles_v1_4_1 cooccurrences_v1_0_1; do curl -sf -X DELETE "${DEST}/${idx}-v9" && echo " deleted ${idx}-v9"; done
20+
# (-f: 404 時不顯示 "deleted";tags_v1_0_2-v9 若從未 reindex 過會 404,屬正常)
21+
#
22+
# 再執行: ./migration.sh
23+
24+
# V6 服務連線資訊
25+
SOURCE_HOST="${SOURCE_HOST:-http://db:9200}"
26+
27+
# V9 服務連線資訊 (host:port, without protocol)
28+
DEST_HOST="${DEST_HOST:-localhost:62223}"
29+
30+
# 需要轉移的索引列表
31+
INDICES=(
32+
articlereplyfeedbacks_v1_2_1
33+
urls_v1_1_1
34+
tags_v1_0_2
35+
categories_v1_1_1
36+
airesponses_v1_0_1
37+
replies_v1_1_1
38+
articlecategoryfeedbacks_v1_1_2
39+
analytics_v1_2_1
40+
badges_v1_0_0
41+
users_v1_2_3
42+
replyrequests_v1_1_2
43+
ydocs_v1_0_2
44+
articles_v1_4_1
45+
cooccurrences_v1_0_1
46+
)
47+
48+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
49+
50+
echo "--- Step 1: Create V9 indices with correct mapping (keyword, nested, etc.) ---"
51+
echo " SOURCE_HOST=${SOURCE_HOST}"
52+
echo " DEST_HOST=${DEST_HOST}"
53+
echo ""
54+
(cd "${SCRIPT_DIR}/../.." && ELASTICSEARCH_URL="http://${DEST_HOST}" npx babel-node --extensions .ts,.js db/migrations/202602-000-create-v9-indices.js) || exit 1
55+
echo ""
56+
57+
echo "--- Step 2: Remote Reindex (V6 -> V9) [async] ---"
58+
echo "請記錄每個任務的 task ID 以便查詢進度。"
59+
echo "=========================================================================================="
60+
61+
for SOURCE_INDEX in "${INDICES[@]}"
62+
do
63+
DEST_INDEX="${SOURCE_INDEX}-v9" # 新的 V9 物理索引名稱
64+
65+
echo " > 任務啟動: ${SOURCE_INDEX} -> ${DEST_INDEX}"
66+
67+
curl -X POST "http://${DEST_HOST}/_reindex?wait_for_completion=false&pretty" -H 'Content-Type: application/json' -d"
68+
{
69+
\"source\": {
70+
\"remote\": {
71+
\"host\": \"${SOURCE_HOST}\"
72+
},
73+
\"index\": \"${SOURCE_INDEX}\",
74+
\"size\": 50
75+
},
76+
\"dest\": {
77+
\"index\": \"${DEST_INDEX}\",
78+
\"op_type\": \"create\"
79+
},
80+
\"conflicts\": \"proceed\"
81+
}"
82+
echo ""
83+
done
84+
85+
echo "=========================================================================================="
86+
echo "--- 所有 Reindex 任務已啟動。 ---"
87+
echo "--- 注意: tags 無對應 schema,tags_v1_0_2-v9 會由 Reindex 動態建立 (欄位為 text+.keyword);alias 由 Step 1 的 202602-create-v9-indices 一併設定。 ---"
88+
echo ""
89+
echo "--- 查詢 Reindex 進度(上方 curl 回傳的 task 欄位即為 task_id)---"
90+
echo " 單一任務: curl -s \"http://${DEST_HOST}/_tasks/<task_id>?pretty\""
91+
echo " 全部 reindex 任務: curl -s \"http://${DEST_HOST}/_tasks?detailed=true&actions=*reindex&pretty\""
92+
echo " 進度看 response 內 task.status.created / task.status.total 等。"

0 commit comments

Comments
 (0)