Skip to content

Commit fcd5d8e

Browse files
authored
Merge pull request #682 from netzbegruenung/feat/notebook-startpage-tabs-and-stats
feat(notebook): startpage tabs, statistics & monthly keyword snapshots
2 parents 7923b0f + 994d569 commit fcd5d8e

34 files changed

Lines changed: 2974 additions & 350 deletions
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
name: Monthly Notebook Keyword Snapshots
2+
3+
on:
4+
schedule:
5+
- cron: '0 3 1 * *' # 1st of each month at 03:00 UTC
6+
workflow_dispatch:
7+
inputs:
8+
collection_id:
9+
description: 'Single collection ID to refresh (empty = all)'
10+
required: false
11+
type: string
12+
month:
13+
description: 'Override month (YYYY-MM, empty = current month)'
14+
required: false
15+
type: string
16+
17+
permissions:
18+
contents: read
19+
20+
concurrency:
21+
group: notebook-keyword-snapshots
22+
cancel-in-progress: false
23+
24+
jobs:
25+
refresh:
26+
name: Refresh keyword snapshots
27+
runs-on: ubuntu-latest
28+
timeout-minutes: 60
29+
env:
30+
API_URL: ${{ vars.NOTEBOOK_API_URL || 'https://gruenerator.eu' }}
31+
steps:
32+
- name: Build request payload
33+
id: payload
34+
run: |
35+
PAYLOAD='{}'
36+
if [ -n "${{ inputs.collection_id }}" ]; then
37+
PAYLOAD=$(jq -n --arg id "${{ inputs.collection_id }}" '{collectionId: $id}')
38+
fi
39+
if [ -n "${{ inputs.month }}" ]; then
40+
PAYLOAD=$(echo "$PAYLOAD" | jq --arg m "${{ inputs.month }}" '. + {month: $m}')
41+
fi
42+
echo "payload=$PAYLOAD" >> "$GITHUB_OUTPUT"
43+
echo "Payload: $PAYLOAD"
44+
45+
- name: Trigger snapshot refresh
46+
id: refresh
47+
continue-on-error: true
48+
run: |
49+
echo "POST $API_URL/api/internal/notebook/refresh-keywords"
50+
RESULT=$(curl -sfS -X POST "$API_URL/api/internal/notebook/refresh-keywords" \
51+
-H "x-admin-token: ${{ secrets.ADMIN_TOKEN }}" \
52+
-H "Content-Type: application/json" \
53+
-d '${{ steps.payload.outputs.payload }}' \
54+
--max-time 3000)
55+
56+
echo "$RESULT" | jq .
57+
echo "result<<EOF" >> "$GITHUB_OUTPUT"
58+
echo "$RESULT" >> "$GITHUB_OUTPUT"
59+
echo "EOF" >> "$GITHUB_OUTPUT"
60+
61+
- name: Generate summary
62+
if: always()
63+
run: |
64+
RESULT='${{ steps.refresh.outputs.result }}'
65+
RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
66+
67+
if [ -z "$RESULT" ] || [ "$RESULT" = "" ]; then
68+
{
69+
echo "## :x: Snapshot Refresh — No Result"
70+
echo ""
71+
echo "Service did not return a result. Check [workflow logs]($RUN_URL)."
72+
} >> "$GITHUB_STEP_SUMMARY"
73+
exit 0
74+
fi
75+
76+
COUNT=$(echo "$RESULT" | jq -r '.count // (.results | length)')
77+
SUCCESS=$(echo "$RESULT" | jq -r '.success')
78+
79+
if [ "$SUCCESS" = "true" ]; then
80+
ICON=":white_check_mark:"
81+
else
82+
ICON=":x:"
83+
fi
84+
85+
{
86+
echo "## $ICON Notebook Keyword Snapshots"
87+
echo ""
88+
echo "**Date:** $(date -u '+%d.%m.%Y %H:%M UTC')"
89+
echo "**Run:** [${{ github.run_id }}]($RUN_URL)"
90+
echo "**Collections refreshed:** $COUNT"
91+
echo ""
92+
echo "| Collection | Keywords | Sample | Total docs | Duration (ms) |"
93+
echo "|------------|---------:|-------:|-----------:|--------------:|"
94+
echo "$RESULT" | jq -r '.results[] | "| \(.collectionId) | \(.keywordCount) | \(.sampleSize) | \(.totalDocuments) | \(.durationMs) |"'
95+
} >> "$GITHUB_STEP_SUMMARY"
96+
97+
- name: Fail if refresh errored
98+
if: steps.refresh.outcome == 'failure'
99+
run: exit 1
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Stores monthly NLP-classified topic counts alongside the keyword snapshot.
2+
-- Shape: { migration: 12, klima: 4, ... } keyed by TopicCategory.
3+
-- Populated by the same monthly cron that fills `keywords`. Reads happen via
4+
-- the notebook stats endpoint so user-facing requests never hit NLP inline.
5+
6+
ALTER TABLE notebook_keyword_snapshots
7+
ADD COLUMN IF NOT EXISTS topic_counts JSONB NOT NULL DEFAULT '{}';
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
-- Monthly NLP-extracted keyword snapshots per notebook (system collection).
2+
-- Refreshed by a scheduled job on the 1st of each month (cron).
3+
-- Stats endpoint reads the latest row to render the "Häufigste Begriffe" word cloud
4+
-- without making an inline NLP call.
5+
6+
CREATE TABLE IF NOT EXISTS notebook_keyword_snapshots (
7+
collection_id TEXT NOT NULL,
8+
month TEXT NOT NULL, -- 'YYYY-MM'
9+
keywords JSONB NOT NULL DEFAULT '[]', -- [{keyword, count, topic|null}]
10+
total_documents INT NOT NULL DEFAULT 0,
11+
sample_size INT NOT NULL DEFAULT 0,
12+
computed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
13+
PRIMARY KEY (collection_id, month)
14+
);
15+
16+
CREATE INDEX IF NOT EXISTS idx_notebook_keyword_snapshots_month
17+
ON notebook_keyword_snapshots(month DESC);
18+
19+
CREATE INDEX IF NOT EXISTS idx_notebook_keyword_snapshots_collection_month
20+
ON notebook_keyword_snapshots(collection_id, month DESC);

apps/api/routes.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,9 @@ export async function setupRoutes(app: Application): Promise<void> {
223223
const {
224224
collectionsRouter: notebookCollectionsRouter,
225225
interactionRouter: notebookInteractionRouter,
226+
recentDocumentsRouter: notebookRecentDocumentsRouter,
227+
statisticsRouter: notebookStatisticsRouter,
228+
internalNotebookRouter,
226229
} = await import('./routes/notebook/index.js');
227230
const { default: nextcloudApiRouter } = await import('./routes/nextcloud/nextcloudApi.js');
228231
const { default: connectionsRouter } =
@@ -293,6 +296,8 @@ export async function setupRoutes(app: Application): Promise<void> {
293296
// the prefix, which would break the public/:token routes).
294297
mountNotebookContractRouter(app);
295298
app.use('/api/auth/notebook', authenticatedReadLimiter, notebookInteractionRouter);
299+
app.use('/api/auth/notebook', authenticatedReadLimiter, notebookRecentDocumentsRouter);
300+
app.use('/api/auth/notebook', authenticatedReadLimiter, notebookStatisticsRouter);
296301
// ts-rest contract router for /api/documents — mounts BEFORE the legacy documentsRouter
297302
// so ts-rest matches its own routes first; unmatched paths fall through.
298303
// requireAuth is applied at the prefix because all 3 contract routes require auth.
@@ -550,6 +555,7 @@ export async function setupRoutes(app: Application): Promise<void> {
550555
app.use('/api/internal/offboarding', offboardingRouter);
551556
app.use('/api/internal/gruene-api', grueneApiTestRouter);
552557
app.use('/api/internal/monitor', monitorInternalRouter);
558+
app.use('/api/internal/notebook', internalNotebookRouter);
553559
app.use('/api/internal/content-sync', contentSyncRouter);
554560
app.use('/api/monitor', requireAuth, publicReadLimiter, monitorRouter);
555561

apps/api/routes/notebook/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,8 @@
77
*/
88

99
export { default as collectionsRouter } from './collectionsController.js';
10+
export { internalNotebookRouter } from './internalController.js';
1011
export { default as interactionRouter } from './interactionController.js';
12+
export { default as recentDocumentsRouter } from './recentDocumentsController.js';
13+
export { default as statisticsRouter } from './statisticsController.js';
1114
export * from './types.js';
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { Router, type Request, type Response } from 'express';
2+
3+
import { requireAdminToken } from '../../middleware/adminTokenMiddleware.js';
4+
import {
5+
refreshAllKeywordSnapshots,
6+
refreshKeywordSnapshot,
7+
} from '../../services/notebook/notebookKeywordSnapshotService.js';
8+
import { toError } from '../../utils/errors/index.js';
9+
import { createLogger } from '../../utils/logger.js';
10+
11+
const log = createLogger('notebook-internal');
12+
const router: Router = Router();
13+
14+
/**
15+
* Refresh notebook keyword snapshots. Called monthly by GitHub Actions cron
16+
* (apps/api isn't running an in-process scheduler).
17+
*
18+
* POST /api/internal/notebook/refresh-keywords
19+
* body: { collectionId?: string, month?: 'YYYY-MM' }
20+
* - no body → refresh all system collections for current month
21+
* - { collectionId } → refresh just that one
22+
* - { month } → store under that month label (defaults to current)
23+
*/
24+
router.post(
25+
'/refresh-keywords',
26+
requireAdminToken,
27+
async (req: Request, res: Response): Promise<void> => {
28+
const body = (req.body ?? {}) as { collectionId?: string; month?: string };
29+
const month = typeof body.month === 'string' && body.month ? body.month : undefined;
30+
31+
try {
32+
if (typeof body.collectionId === 'string' && body.collectionId) {
33+
log.info(`Single-collection refresh: ${body.collectionId} (month=${month ?? 'current'})`);
34+
const result = await refreshKeywordSnapshot(body.collectionId, month);
35+
res.json({ success: true, results: result ? [result] : [] });
36+
return;
37+
}
38+
39+
log.info(`Bulk refresh for all system collections (month=${month ?? 'current'})`);
40+
const results = await refreshAllKeywordSnapshots(month);
41+
res.json({ success: true, count: results.length, results });
42+
} catch (error) {
43+
log.error(`Snapshot refresh failed: ${toError(error).message}`);
44+
res.status(500).json({ error: toError(error).message });
45+
}
46+
}
47+
);
48+
49+
export const internalNotebookRouter = router;

0 commit comments

Comments
 (0)