Skip to content

Commit 7cede8e

Browse files
author
Kalyan Kanuri
committed
fix: Address review feedback from jongyoul, tbonelee, and voidmatcha
Critical fixes: - Revert all unrelated tsconfig changes (noImplicitAny, skipLibCheck, as-any casts) - Fix deleteNoteIndex prefix collision (2A123 vs 2A1234) - Debounced saveIndex with 5s flush interval instead of writing on every mutation - Bootstrap index when embedding_index.bin is missing - Preserve Lucene <B> keyword highlighting (convert to <mark> tags) Security fixes: - Restrict index directory (0700) and file (0600) permissions - Warn when index path is under /tmp - Add SHA256 verification to install-search-model.sh - Add runtime SHA256 verification of model.onnx at startup - Add sanity bound (10M) to loadIndex deserialization - Serialize index to buffer under lock, write to disk outside lock Other fixes: - Fix OnnxTensor leak on partial allocation failure - Fix detectInterpreter: check %prefix first, fall back to heuristics - Replace emoji delimiter with structured [TABLES] prefix - Update LICENSE file for ONNX Runtime (MIT) and DJL (Apache 2.0) - Improve test assertions for semantic search behavior JIRA: https://issues.apache.org/jira/browse/ZEPPELIN-6411
1 parent c11aa0f commit 7cede8e

17 files changed

Lines changed: 276 additions & 90 deletions

File tree

LICENSE

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,3 +277,21 @@ Eclipse Public License - v 1.0
277277
The following components are provided under the Eclipse Public License, version 1.0. See file headers and project links for details.
278278

279279
(Eclipse Public License) pty4j - http://www.eclipse.org/legal/epl-v10.html
280+
281+
========================================================================
282+
MIT License
283+
========================================================================
284+
The following components are provided under the MIT License. See file headers and project links for details.
285+
286+
(MIT License) ONNX Runtime (https://github.com/microsoft/onnxruntime)
287+
Licensed under the MIT License.
288+
https://github.com/microsoft/onnxruntime/blob/main/LICENSE
289+
290+
========================================================================
291+
Apache License 2.0 (bundled dependencies)
292+
========================================================================
293+
The following components are provided under the Apache License 2.0. See file headers and project links for details.
294+
295+
(Apache License 2.0) DJL - Deep Java Library Tokenizers (https://github.com/deepjavalibrary/djl)
296+
Licensed under the Apache License, Version 2.0.
297+
https://github.com/deepjavalibrary/djl/blob/master/LICENSE

bin/install-search-model.sh

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,53 @@ MODEL_NAME="all-MiniLM-L6-v2"
2626
MODEL_REVISION="c9745ed1d9f207416be6d2e6f8de32d1f16199bf"
2727
BASE_URL="https://huggingface.co/sentence-transformers/${MODEL_NAME}/resolve/${MODEL_REVISION}"
2828

29+
# Expected SHA256 checksums for integrity verification
30+
MODEL_SHA256="6fd5d72fe4589f189f8ebc006442dbb529bb7ce38f8082112682524616046452"
31+
TOKENIZER_SHA256="be50c3628f2bf5bb5e3a7f17b1f74611b2561a3a27eeab05e5aa30f411572037"
32+
2933
INDEX_PATH="${1:-/tmp/zeppelin-index}"
3034
MODEL_DIR="${INDEX_PATH}/models/${MODEL_NAME}"
3135

3236
mkdir -p "${MODEL_DIR}"
3337

38+
verify_sha256() {
39+
local file="$1" expected="$2"
40+
local actual
41+
if command -v sha256sum >/dev/null 2>&1; then
42+
actual=$(sha256sum "${file}" | cut -d' ' -f1)
43+
elif command -v shasum >/dev/null 2>&1; then
44+
actual=$(shasum -a 256 "${file}" | cut -d' ' -f1)
45+
else
46+
echo "WARNING: Neither sha256sum nor shasum found, skipping integrity check for ${file}"
47+
return 0
48+
fi
49+
if [ "${actual}" != "${expected}" ]; then
50+
echo "ERROR: SHA256 mismatch for ${file}"
51+
echo " Expected: ${expected}"
52+
echo " Actual: ${actual}"
53+
rm -f "${file}"
54+
return 1
55+
fi
56+
echo "SHA256 verified: ${file}"
57+
}
58+
3459
download() {
35-
local url="$1" dest="$2"
60+
local url="$1" dest="$2" expected_sha="$3"
3661
if [ -f "${dest}" ]; then
37-
echo "Already exists: ${dest}"
38-
return
62+
if verify_sha256 "${dest}" "${expected_sha}"; then
63+
echo "Already exists and verified: ${dest}"
64+
return
65+
fi
66+
echo "Existing file failed verification, re-downloading..."
3967
fi
4068
echo "Downloading ${url} ..."
4169
curl -fSL --connect-timeout 30 --max-time 300 -o "${dest}.tmp" "${url}"
4270
mv "${dest}.tmp" "${dest}"
71+
verify_sha256 "${dest}" "${expected_sha}"
4372
echo "Saved: ${dest}"
4473
}
4574

46-
download "${BASE_URL}/onnx/model.onnx" "${MODEL_DIR}/model.onnx"
47-
download "${BASE_URL}/tokenizer.json" "${MODEL_DIR}/tokenizer.json"
75+
download "${BASE_URL}/onnx/model.onnx" "${MODEL_DIR}/model.onnx" "${MODEL_SHA256}"
76+
download "${BASE_URL}/tokenizer.json" "${MODEL_DIR}/tokenizer.json" "${TOKENIZER_SHA256}"
4877

4978
echo "Model installed to ${MODEL_DIR}"

zeppelin-web-angular/projects/zeppelin-sdk/tsconfig.json

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
"target": "es2015",
66
"declaration": true,
77
"inlineSources": true,
8-
"skipLibCheck": true,
9-
"noImplicitAny": false,
108
"types": [],
119
"lib": ["dom", "es2018"]
1210
},

zeppelin-web-angular/src/app/pages/workspace/credential/credential.component.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ export class CredentialComponent {
146146
this.credentialService.getCredentials().subscribe(data => {
147147
const controls = [...Object.entries(data.userCredentials)].map(e => {
148148
const entity = e[0];
149-
const { username, password } = e[1] as any;
149+
const { username, password } = e[1];
150150
return this.fb.group({
151151
entity: [entity, [Validators.required]],
152152
username: [username, [Validators.required]],

zeppelin-web-angular/src/app/pages/workspace/notebook-search/result-item/result-item.component.html

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,11 @@
1717
<span *ngIf="interpreter" class="badge" [ngClass]="interpreter">{{ interpreter }}</span>
1818
</div>
1919
</ng-template>
20-
<div *ngIf="codeText" class="code-block">
21-
<pre>{{ codeText }}</pre>
20+
<div *ngIf="codeHtml" class="code-block">
21+
<pre [innerHTML]="codeHtml"></pre>
2222
</div>
2323
<div *ngIf="outputText" class="output-block">
2424
<pre>{{ outputText }}</pre>
2525
</div>
26-
<div *ngIf="tablesText" class="tables-block">
27-
📊 {{ tablesText }}
28-
</div>
26+
<div *ngIf="tablesText" class="tables-block">Tables: {{ tablesText }}</div>
2927
</nz-card>

zeppelin-web-angular/src/app/pages/workspace/notebook-search/result-item/result-item.component.less

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,9 @@
9191
color: #22863a;
9292
padding: 4px 0;
9393
}
94+
95+
mark {
96+
background-color: #fff3bf;
97+
padding: 0 1px;
98+
border-radius: 2px;
99+
}

zeppelin-web-angular/src/app/pages/workspace/notebook-search/result-item/result-item.component.ts

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export class NotebookSearchResultItemComponent implements OnChanges {
2626
displayName = '';
2727
routerLink: string[] = [];
2828
codeText = '';
29+
codeHtml = '';
2930
outputText = '';
3031
tablesText = '';
3132
interpreter = '';
@@ -52,17 +53,20 @@ export class NotebookSearchResultItemComponent implements OnChanges {
5253
this.displayName = this.result.name ? this.result.name : `Note ${noteId}`;
5354

5455
// snippet = SQL/code, header = tables + output
55-
this.codeText = (this.result.snippet || '').replace(/<\/?B>/gi, '');
56+
const snippet = this.result.snippet || '';
57+
// Preserve Lucene <B> highlighting by converting to <mark>
58+
this.codeHtml = snippet.replace(/<B>/gi, '<mark>').replace(/<\/B>/gi, '</mark>');
59+
this.codeText = snippet.replace(/<\/?B>/gi, '');
5660
this.interpreter = this.detectInterpreter(this.codeText);
5761

58-
// Parse header: lines with 📊 are tables, rest is output
62+
// Parse header: lines with [TABLES] prefix are tables, rest is output
5963
const header = (this.result.header || '').replace(/<\/?B>/gi, '');
6064
const lines = header.split('\n');
6165
const tableParts: string[] = [];
6266
const outputParts: string[] = [];
6367
for (const line of lines) {
64-
if (line.startsWith('📊')) {
65-
tableParts.push(line.substring(2).trim());
68+
if (line.startsWith('[TABLES]')) {
69+
tableParts.push(line.substring(8).trim());
6670
} else if (line.trim()) {
6771
outputParts.push(line);
6872
}
@@ -75,7 +79,8 @@ export class NotebookSearchResultItemComponent implements OnChanges {
7579
if (!text) {
7680
return '';
7781
}
78-
if (/select|insert|create|from|where/i.test(text)) {
82+
// Check interpreter prefix first — this is reliable
83+
if (/^%(\w*\.)?sql/i.test(text)) {
7984
return 'sql';
8085
}
8186
if (/^%(\w*\.)?py/i.test(text)) {
@@ -87,8 +92,14 @@ export class NotebookSearchResultItemComponent implements OnChanges {
8792
if (/^%sh/i.test(text)) {
8893
return 'sh';
8994
}
90-
if (/import |def |class /i.test(text)) {
91-
return 'python';
95+
// Fall back to keyword heuristic only if no prefix
96+
if (!text.startsWith('%')) {
97+
if (/\b(?:SELECT|INSERT|CREATE|FROM|WHERE)\b/i.test(text) && /\b(?:SELECT|FROM)\b/i.test(text)) {
98+
return 'sql';
99+
}
100+
if (/import |def |class /i.test(text)) {
101+
return 'python';
102+
}
92103
}
93104
return '';
94105
}

zeppelin-web-angular/src/app/pages/workspace/notebook/notebook.component.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ export class NotebookComponent extends MessageListenersManager implements OnInit
321321
this.securityService.getPermissions(note.id).subscribe(data => {
322322
this.permissions = data;
323323
this.isOwner = !(
324-
this.permissions?.owners?.length && this.permissions.owners.indexOf(this.ticketService.ticket.principal) < 0
324+
this.permissions.owners.length && this.permissions.owners.indexOf(this.ticketService.ticket.principal) < 0
325325
);
326326
this.cdr.markForCheck();
327327
});

zeppelin-web-angular/src/app/pages/workspace/notebook/paragraph/code-editor/code-editor.component.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ export class NotebookParagraphCodeEditorComponent
360360
return;
361361
}
362362
const text = model.getValue();
363-
const newDecorations: any[] = [];
363+
const newDecorations = [];
364364
let startIndex = 0;
365365
while (term && text) {
366366
const idx = text.indexOf(term, startIndex);

zeppelin-web-angular/src/app/services/save-as.service.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export class SaveAsService {
1919
saveAs(content: string, filename: string, extension: string) {
2020
const BOM = '\uFEFF';
2121
const fileName = `${filename}.${extension}`;
22-
const binaryData: string[] = [];
22+
const binaryData = [];
2323
binaryData.push(BOM);
2424
binaryData.push(content);
2525
const blob = new Blob(binaryData, { type: 'octet/stream' });

0 commit comments

Comments
 (0)