ci(ruvector-npm): functional, learning, perf, and recall CI workflow #4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: ruvector npm — functional, learning, optimized, effective | |
| on: | |
| push: | |
| branches: [main] | |
| paths: | |
| - 'npm/packages/ruvector/**' | |
| - '.github/workflows/ruvector-npm-ci.yml' | |
| pull_request: | |
| paths: | |
| - 'npm/packages/ruvector/**' | |
| - '.github/workflows/ruvector-npm-ci.yml' | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| defaults: | |
| run: | |
| working-directory: npm/packages/ruvector | |
| env: | |
| NO_COLOR: '1' | |
| FORCE_COLOR: '0' | |
| jobs: | |
| # ────────────────────────────────────────────────────────────────────────── | |
| # 1. BUILD — package compiles and dist entry-points are present | |
| # ────────────────────────────────────────────────────────────────────────── | |
| build: | |
| name: Build | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| - name: Install (isolated from workspace) | |
| run: npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional | |
| - name: Build | |
| # tsc exits non-zero on pre-existing errors but still emits dist/ | |
| # (noEmitOnError is not set in tsconfig, defaults to false). | |
| # Run the full build script steps manually so we can tolerate the | |
| # tsc exit code while still failing if dist/index.js is absent. | |
| run: | | |
| set +e | |
| ./node_modules/.bin/tsc 2>&1 | tail -5 | |
| TSC_EXIT=$? | |
| mkdir -p dist/core/onnx/pkg | |
| cp -r src/core/onnx/pkg/. dist/core/onnx/pkg/ 2>/dev/null || true | |
| if [ ! -f dist/index.js ]; then | |
| echo "::error::dist/index.js missing — build truly failed (not just pre-existing TS errors)" | |
| exit 1 | |
| fi | |
| echo "Build complete (tsc exit=$TSC_EXIT — pre-existing errors tolerated)" | |
| - name: Verify dist entry-points | |
| run: npm run verify-dist | |
| - name: Upload dist | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ruvector-dist | |
| path: npm/packages/ruvector/dist/ | |
| retention-days: 1 | |
| # ────────────────────────────────────────────────────────────────────────── | |
| # 2. UNIT + CLI TESTS — existing test suite must pass green | |
| # ────────────────────────────────────────────────────────────────────────── | |
| unit-tests: | |
| name: Unit & CLI tests | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| - name: Install | |
| run: npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional | |
| - name: Download dist | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ruvector-dist | |
| path: npm/packages/ruvector/dist/ | |
| - name: Integration test | |
| run: node test/integration.js | |
| - name: CLI commands test | |
| run: node test/cli-commands.js | |
| - name: SIGTERM cleanup test | |
| run: node test/sigterm-cleanup.js | |
| - name: VectorDB standalone test (insert / search / delete / stats) | |
| run: node test/standalone-test.js | |
| - name: Optimizer test (task-type routing / profiles) | |
| run: node test/optimizer.test.js | |
| # ────────────────────────────────────────────────────────────────────────── | |
| # 3. FUNCTIONAL SMOKE — npx-style CLI works end-to-end | |
| # ────────────────────────────────────────────────────────────────────────── | |
| functional-smoke: | |
| name: Functional smoke (npx ruvector) | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| - name: Install | |
| run: npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional | |
| - name: Download dist | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ruvector-dist | |
| path: npm/packages/ruvector/dist/ | |
| - name: CLI help exits 0 | |
| run: node bin/cli.js --help | |
| - name: CLI version exits 0 | |
| run: node bin/cli.js --version | |
| - name: CLI info shows implementation | |
| run: | | |
| out=$(node bin/cli.js info 2>&1) || true | |
| echo "$out" | |
| # Must report a known implementation type | |
| if ! echo "$out" | grep -qiE 'native|wasm|implementation'; then | |
| echo "::error::ruvector info did not report an implementation type" | |
| exit 1 | |
| fi | |
| - name: CLI doctor exits 0 | |
| run: node bin/cli.js doctor || true | |
| - name: Create DB, insert vectors, search — CLI round-trip | |
| run: | | |
| set -e | |
| TMP=$(mktemp -d) | |
| DB="$TMP/smoke.db" | |
| node bin/cli.js create "$DB" --dimensions 64 | |
| echo "Created DB at $DB" | |
| # Generate 200 random float32 vectors as JSON and insert them | |
| node - "$DB" <<'JSEOF' | |
| const { execSync } = require('child_process'); | |
| const fs = require('fs'); | |
| const os = require('os'); | |
| const path = require('path'); | |
| const db = process.argv[2]; | |
| const dim = 64; | |
| const n = 200; | |
| const rows = []; | |
| for (let i = 0; i < n; i++) { | |
| const v = Array.from({ length: dim }, () => Math.random() * 2 - 1); | |
| rows.push({ id: `vec-${i}`, vector: v }); | |
| } | |
| const tmpFile = path.join(os.tmpdir(), 'smoke-vectors.json'); | |
| fs.writeFileSync(tmpFile, JSON.stringify(rows)); | |
| execSync(`node bin/cli.js insert ${db} ${tmpFile}`, { | |
| stdio: 'inherit', | |
| cwd: process.cwd(), | |
| env: { ...process.env, NO_COLOR: '1' }, | |
| }); | |
| fs.unlinkSync(tmpFile); | |
| JSEOF | |
| echo "Insert complete" | |
| node bin/cli.js stats "$DB" | |
| # Search with a random query — must return results | |
| # CLI accepts: ruvector search <db> --vector <json> --top-k <n> | |
| node - "$DB" <<'JSEOF' | |
| const { execSync } = require('child_process'); | |
| const db = process.argv[2]; | |
| const dim = 64; | |
| const vec = JSON.stringify(Array.from({ length: dim }, () => Math.random() * 2 - 1)); | |
| const out = execSync( | |
| `node bin/cli.js search ${db} --vector '${vec}' --top-k 10`, | |
| { encoding: 'utf8', cwd: process.cwd(), env: { ...process.env, NO_COLOR: '1' } } | |
| ); | |
| console.log(out); | |
| if (!/result|score|id/i.test(out)) { | |
| console.error('::error::search output missing expected fields (id/score/result)'); | |
| process.exit(1); | |
| } | |
| console.log('Smoke search OK'); | |
| JSEOF | |
| # ────────────────────────────────────────────────────────────────────────── | |
| # 4. LEARNING CHECK — HNSW index activates, not brute-force fallback | |
| # ────────────────────────────────────────────────────────────────────────── | |
| learning-check: | |
| name: Learning check (HNSW activates) | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| - name: Install | |
| run: npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional | |
| - name: Download dist | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ruvector-dist | |
| path: npm/packages/ruvector/dist/ | |
| - name: HNSW activates with unique storagePath (not FallbackVectorDB) | |
| run: | | |
| node - <<'JSEOF' | |
| const os = require('os'); | |
| const path = require('path'); | |
| const crypto = require('crypto'); | |
| // Mirror the fix from @claude-flow/cli vector-db.ts: | |
| // always pass a unique storagePath so no daemon lock conflict occurs. | |
| function uniqueStoragePath() { | |
| const rand = crypto.randomBytes(8).toString('hex'); | |
| return path.join(os.tmpdir(), `ruvector-test-${process.pid}-${rand}.rvf`); | |
| } | |
| async function main() { | |
| const { VectorDB, getImplementationType } = require('./dist/index.js'); | |
| const implType = getImplementationType(); | |
| console.log(`Implementation type: ${implType}`); | |
| if (!['native', 'wasm'].includes(implType)) { | |
| console.error(`::error::Expected native or wasm implementation, got: ${implType}`); | |
| process.exit(1); | |
| } | |
| // Create DB with unique storagePath — must not fall back to brute-force | |
| const db = new VectorDB({ dimensions: 128, storagePath: uniqueStoragePath() }); | |
| console.log('VectorDB constructed OK'); | |
| const DIM = 128; | |
| const N = 500; | |
| // Insert N random vectors | |
| for (let i = 0; i < N; i++) { | |
| const v = Float32Array.from({ length: DIM }, () => Math.random() * 2 - 1); | |
| await db.insert({ id: `id-${i}`, vector: v }); | |
| } | |
| console.log(`Inserted ${N} vectors`); | |
| // Search — should use HNSW path | |
| const query = Float32Array.from({ length: DIM }, () => Math.random() * 2 - 1); | |
| const results = await db.search({ vector: query, k: 10 }); | |
| console.log(`Search returned ${results.length} results`); | |
| if (results.length === 0) { | |
| console.error('::error::search returned 0 results — HNSW may not be indexing correctly'); | |
| process.exit(1); | |
| } | |
| // Results must have id + score fields | |
| for (const r of results) { | |
| if (!r.id || typeof r.score !== 'number') { | |
| console.error(`::error::Malformed result: ${JSON.stringify(r)}`); | |
| process.exit(1); | |
| } | |
| } | |
| // Scores should be in [-1, 1] range (cosine similarity) | |
| const badScore = results.find(r => r.score < -1.01 || r.score > 1.01); | |
| if (badScore) { | |
| console.error(`::error::Score out of cosine range: ${JSON.stringify(badScore)}`); | |
| process.exit(1); | |
| } | |
| const len = await db.len(); | |
| if (len !== N) { | |
| console.error(`::error::DB reports ${len} vectors but inserted ${N}`); | |
| process.exit(1); | |
| } | |
| console.log(`HNSW learning check passed (impl=${implType}, N=${N}, k=10, scores OK)`); | |
| } | |
| main().catch(e => { console.error('::error::' + e.message); process.exit(1); }); | |
| JSEOF | |
| - name: Verify no storagePath regression in adapter code | |
| working-directory: ${{ github.workspace }} | |
| run: | | |
| # The @claude-flow/cli adapter must pass storagePath to VectorDB. | |
| # This is the guard for the lock-conflict regression fixed in | |
| # fix/hnsw-storage-path-lock. | |
| ADAPTER="$(find . -path '*/ruvector/vector-db.ts' -not -path '*/node_modules/*' | head -1)" | |
| if [ -n "$ADAPTER" ]; then | |
| if ! grep -q 'storagePath:' "$ADAPTER"; then | |
| echo "::error file=$ADAPTER::VectorDB constructor called without storagePath — HNSW lock-conflict regression (fix/hnsw-storage-path-lock)." | |
| exit 1 | |
| fi | |
| echo "adapter storagePath guard OK ($ADAPTER)" | |
| else | |
| echo "::notice::@claude-flow/cli adapter not found in this repo (expected in ruflo monorepo)" | |
| fi | |
| # ────────────────────────────────────────────────────────────────────────── | |
| # 5. PERFORMANCE BENCHMARK — HNSW must be ≥2× faster than brute-force at N=5000 | |
| # ────────────────────────────────────────────────────────────────────────── | |
| perf-benchmark: | |
| name: Performance benchmark (≥2× speedup at N=5000) | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| - name: Install | |
| run: npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional | |
| - name: Download dist | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ruvector-dist | |
| path: npm/packages/ruvector/dist/ | |
| - name: HNSW speedup ≥2× vs brute-force at N=5000 | |
| run: | | |
| node - <<'JSEOF' | |
| const os = require('os'); | |
| const path = require('path'); | |
| const crypto = require('crypto'); | |
| function uniqueStoragePath() { | |
| const rand = crypto.randomBytes(8).toString('hex'); | |
| return path.join(os.tmpdir(), `ruvector-bench-${process.pid}-${rand}.rvf`); | |
| } | |
| function cosineBrute(vectors, query, k) { | |
| // O(N) brute-force cosine similarity | |
| const scores = vectors.map((v, i) => { | |
| let dot = 0, na = 0, nb = 0; | |
| for (let j = 0; j < v.length; j++) { | |
| dot += v[j] * query[j]; | |
| na += v[j] * v[j]; | |
| nb += query[j] * query[j]; | |
| } | |
| const denom = Math.sqrt(na) * Math.sqrt(nb); | |
| return { id: `id-${i}`, score: denom === 0 ? 0 : dot / denom }; | |
| }); | |
| return scores.sort((a, b) => b.score - a.score).slice(0, k); | |
| } | |
| async function main() { | |
| const { VectorDB } = require('./dist/index.js'); | |
| const DIM = 128; | |
| const N = 5000; | |
| const Q = 50; // queries for timing | |
| const K = 10; | |
| const MIN_SPEEDUP = 2.0; | |
| console.log(`Building dataset: N=${N}, dim=${DIM}`); | |
| const vectors = Array.from({ length: N }, () => | |
| Float32Array.from({ length: DIM }, () => Math.random() * 2 - 1) | |
| ); | |
| const queries = Array.from({ length: Q }, () => | |
| Float32Array.from({ length: DIM }, () => Math.random() * 2 - 1) | |
| ); | |
| // ---- brute-force baseline ---- | |
| const vPlain = vectors.map(v => Array.from(v)); // plain arrays for brute | |
| const bruteStart = Date.now(); | |
| for (const q of queries) cosineBrute(vPlain, q, K); | |
| const bruteMs = Date.now() - bruteStart; | |
| const bruteQps = (Q / bruteMs) * 1000; | |
| console.log(`Brute-force: ${bruteMs}ms for ${Q} queries → ${bruteQps.toFixed(0)} QPS`); | |
| // ---- HNSW ---- | |
| const db = new VectorDB({ dimensions: DIM, storagePath: uniqueStoragePath() }); | |
| for (let i = 0; i < N; i++) { | |
| await db.insert({ id: `id-${i}`, vector: vectors[i] }); | |
| } | |
| console.log(`HNSW index built (${N} vectors)`); | |
| // Warm-up (1 query) | |
| await db.search({ vector: queries[0], k: K }); | |
| const hnswStart = Date.now(); | |
| for (const q of queries) await db.search({ vector: q, k: K }); | |
| const hnswMs = Date.now() - hnswStart; | |
| const hnswQps = (Q / hnswMs) * 1000; | |
| console.log(`HNSW: ${hnswMs}ms for ${Q} queries → ${hnswQps.toFixed(0)} QPS`); | |
| const speedup = hnswQps / bruteQps; | |
| console.log(`Speedup: ${speedup.toFixed(2)}× (threshold: ${MIN_SPEEDUP}×)`); | |
| if (speedup < MIN_SPEEDUP) { | |
| console.error( | |
| `::error::HNSW speedup ${speedup.toFixed(2)}× is below the ${MIN_SPEEDUP}× threshold at N=${N}. ` + | |
| `HNSW may have degraded to brute-force — check for storagePath lock conflicts or index corruption.` | |
| ); | |
| process.exit(1); | |
| } | |
| console.log(`Performance benchmark passed: ${speedup.toFixed(2)}× speedup at N=${N}`); | |
| } | |
| main().catch(e => { console.error('::error::' + e.message); process.exit(1); }); | |
| JSEOF | |
| # ────────────────────────────────────────────────────────────────────────── | |
| # 6. RECALL QUALITY — recall@10 ≥ 0.88 at N=10 000 | |
| # ────────────────────────────────────────────────────────────────────────── | |
| recall-quality: | |
| name: Recall quality (recall@10 ≥ 0.88 at N=10k) | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| - name: Install | |
| run: npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional | |
| - name: Download dist | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ruvector-dist | |
| path: npm/packages/ruvector/dist/ | |
| - name: recall@10 ≥ 0.88 at N=10 000 | |
| timeout-minutes: 10 | |
| run: | | |
| node - <<'JSEOF' | |
| const os = require('os'); | |
| const path = require('path'); | |
| const crypto = require('crypto'); | |
| function uniqueStoragePath() { | |
| const rand = crypto.randomBytes(8).toString('hex'); | |
| return path.join(os.tmpdir(), `ruvector-recall-${process.pid}-${rand}.rvf`); | |
| } | |
| function cosineSimilarity(a, b) { | |
| let dot = 0, na = 0, nb = 0; | |
| for (let i = 0; i < a.length; i++) { | |
| dot += a[i] * b[i]; | |
| na += a[i] * a[i]; | |
| nb += b[i] * b[i]; | |
| } | |
| const denom = Math.sqrt(na) * Math.sqrt(nb); | |
| return denom === 0 ? 0 : dot / denom; | |
| } | |
| function exactTopK(vectors, query, k) { | |
| return vectors | |
| .map((v, i) => ({ id: `id-${i}`, score: cosineSimilarity(v, query) })) | |
| .sort((a, b) => b.score - a.score) | |
| .slice(0, k) | |
| .map(r => r.id); | |
| } | |
| async function main() { | |
| const { VectorDB } = require('./dist/index.js'); | |
| const DIM = 128; | |
| const N = 10000; | |
| const Q = 50; // evaluation queries | |
| const K = 10; | |
| const MIN_RECALL = 0.88; | |
| console.log(`Building dataset: N=${N}, dim=${DIM}`); | |
| const vectors = Array.from({ length: N }, () => | |
| Float32Array.from({ length: DIM }, () => Math.random() * 2 - 1) | |
| ); | |
| const queries = Array.from({ length: Q }, () => | |
| Float32Array.from({ length: DIM }, () => Math.random() * 2 - 1) | |
| ); | |
| const db = new VectorDB({ dimensions: DIM, storagePath: uniqueStoragePath() }); | |
| // Batch insert in chunks of 500 to stay within memory | |
| const CHUNK = 500; | |
| for (let i = 0; i < N; i += CHUNK) { | |
| const chunk = vectors.slice(i, i + CHUNK); | |
| for (let j = 0; j < chunk.length; j++) { | |
| await db.insert({ id: `id-${i + j}`, vector: chunk[j] }); | |
| } | |
| if ((i + CHUNK) % 2000 === 0 || i + CHUNK >= N) { | |
| console.log(` inserted ${Math.min(i + CHUNK, N)}/${N}`); | |
| } | |
| } | |
| console.log('Index build complete'); | |
| // Plain arrays for brute-force ground truth | |
| const plain = vectors.map(v => Array.from(v)); | |
| let totalRecall = 0; | |
| for (let qi = 0; qi < Q; qi++) { | |
| const q = queries[qi]; | |
| const gtIds = new Set(exactTopK(plain, q, K)); | |
| const results = await db.search({ vector: q, k: K }); | |
| const found = results.filter(r => gtIds.has(r.id)).length; | |
| totalRecall += found / K; | |
| } | |
| const avgRecall = totalRecall / Q; | |
| console.log(`recall@${K} = ${avgRecall.toFixed(4)} over ${Q} queries (threshold: ${MIN_RECALL})`); | |
| if (avgRecall < MIN_RECALL) { | |
| console.error( | |
| `::error::recall@${K} = ${avgRecall.toFixed(4)} is below the ${MIN_RECALL} threshold at N=${N}. ` + | |
| `HNSW index quality has degraded — check ef_construction, m parameters, and distance metric.` | |
| ); | |
| process.exit(1); | |
| } | |
| console.log(`Recall quality check passed: recall@${K} = ${avgRecall.toFixed(4)} at N=${N}`); | |
| } | |
| main().catch(e => { console.error('::error::' + e.message); process.exit(1); }); | |
| JSEOF | |
| # ────────────────────────────────────────────────────────────────────────── | |
| # 7. TARBALL INTEGRITY — published package contains all entry points | |
| # (reuses the regression-guard pattern for the ruvector package only) | |
| # ────────────────────────────────────────────────────────────────────────── | |
| tarball-integrity: | |
| name: Tarball integrity | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| - name: Install | |
| run: npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional | |
| - name: Download dist | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ruvector-dist | |
| path: npm/packages/ruvector/dist/ | |
| - name: npm pack and assert entry-points | |
| run: | | |
| set -e | |
| mkdir -p /tmp/pack | |
| npm pack --pack-destination /tmp/pack | |
| tarball=$(ls /tmp/pack/*.tgz | head -1) | |
| echo "Packed: $tarball" | |
| listing=$(tar -tzf "$tarball") | |
| echo "--- tarball contents (first 30) ---" | |
| echo "$listing" | head -30 | |
| required=$(node -e " | |
| const p = require('./package.json'); | |
| const files = new Set(); | |
| if (p.main) files.add(p.main); | |
| if (p.module) files.add(p.module); | |
| if (p.types) files.add(p.types); | |
| if (p.bin) Object.values(p.bin).forEach(f => files.add(f)); | |
| if (p.exports) { | |
| const walk = (n) => { | |
| if (typeof n === 'string') files.add(n); | |
| else if (n && typeof n === 'object') Object.values(n).forEach(walk); | |
| }; | |
| walk(p.exports); | |
| } | |
| console.log([...files].map(f => f.replace(/^\\.\\//,'')).join('\n')); | |
| ") | |
| fail=0 | |
| while IFS= read -r ep; do | |
| [ -z "$ep" ] && continue | |
| if ! echo "$listing" | grep -qE "^package/${ep}\$"; then | |
| echo "::error::Required entry-point missing from tarball: $ep" | |
| fail=1 | |
| fi | |
| done <<< "$required" | |
| exit $fail | |
| # ────────────────────────────────────────────────────────────────────────── | |
| # Summary gate — all jobs must pass before merging | |
| # ────────────────────────────────────────────────────────────────────────── | |
| ci-pass: | |
| name: CI pass | |
| runs-on: ubuntu-22.04 | |
| needs: | |
| - build | |
| - unit-tests | |
| - functional-smoke | |
| - learning-check | |
| - perf-benchmark | |
| - recall-quality | |
| - tarball-integrity | |
| if: always() | |
| steps: | |
| - name: Check all jobs passed | |
| run: | | |
| results='${{ toJSON(needs) }}' | |
| echo "$results" | node -e " | |
| const d = JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')); | |
| const failed = Object.entries(d) | |
| .filter(([,v]) => v.result !== 'success') | |
| .map(([k,v]) => k + '=' + v.result); | |
| if (failed.length) { | |
| console.error('::error::Failed jobs: ' + failed.join(', ')); | |
| process.exit(1); | |
| } | |
| console.log('All jobs passed.'); | |
| " |