@@ -88,7 +88,6 @@ public class EmbeddingSearch extends SearchService {
8888 private static final int MAX_RESULTS = 20 ;
8989 private static final float MIN_SIMILARITY = 0.25f ;
9090 private static final int MAX_TEXT_LENGTH = 1500 ;
91- private static final int SNIPPET_LENGTH = 150 ;
9291
9392 static final String ID_FIELD = "id" ;
9493 private static final String PARAGRAPH = "paragraph" ;
@@ -198,7 +197,10 @@ private void initModel() throws OrtException, IOException {
198197
199198 private static void downloadFile (String urlStr , Path dest ) throws IOException {
200199 URL url = new URL (urlStr );
201- try (InputStream in = new BufferedInputStream (url .openStream ());
200+ java .net .URLConnection conn = url .openConnection ();
201+ conn .setConnectTimeout (30_000 );
202+ conn .setReadTimeout (60_000 );
203+ try (InputStream in = new BufferedInputStream (conn .getInputStream ());
202204 FileOutputStream out = new FileOutputStream (dest .toFile ())) {
203205 byte [] buf = new byte [8192 ];
204206 int n ;
@@ -447,9 +449,9 @@ public List<Map<String, String>> query(String queryStr) {
447449 });
448450 }
449451
450- // Phase 2: re-score with table boost
451- List <Map < String , String >> results = new ArrayList <>();
452- for (int i = 0 ; i < scored .size () && results .size () < MAX_RESULTS ; i ++) {
452+ // Phase 2: re-score with table boost, collect candidates with boosted scores
453+ List <Map . Entry < Map < String , String >, Float >> candidates = new ArrayList <>();
454+ for (int i = 0 ; i < scored .size () && candidates .size () < MAX_RESULTS ; i ++) {
453455 float sim = scored .get (i ).getValue ();
454456 if (sim < MIN_SIMILARITY ) {
455457 break ;
@@ -459,17 +461,13 @@ public List<Map<String, String>> query(String queryStr) {
459461 if (entry == null || StringUtils .isBlank (entry .text )) {
460462 continue ;
461463 }
462- // Boost paragraphs that reference discovered tables
463464 if (!relevantTables .isEmpty () && StringUtils .isNotBlank (entry .tables )) {
464465 for (String t : entry .tables .split (" " )) {
465466 if (relevantTables .contains (t )) {
466467 sim += TABLE_BOOST ;
467468 }
468469 }
469470 }
470- // Frontend renders: header + "\n\n" + snippet in Monaco editor
471- // snippet = SQL/code (used for language detection too)
472- // header = title + tables + output preview
473471 StringBuilder header = new StringBuilder ();
474472 if (StringUtils .isNotBlank (entry .title )) {
475473 header .append (entry .title ).append ("\n " );
@@ -484,14 +482,19 @@ public List<Map<String, String>> query(String queryStr) {
484482 }
485483 header .append ("\n " ).append (out );
486484 }
487- results .add (ImmutableMap .of (
485+ candidates .add ( Map . entry (ImmutableMap .of (
488486 "id" , docId ,
489487 "name" , entry .noteName != null ? entry .noteName : "" ,
490488 "snippet" , entry .text ,
491489 "text" , entry .text ,
492- "header" , header .toString ()));
490+ "header" , header .toString ()), sim ) );
493491 }
494492 // Re-sort by boosted score
493+ candidates .sort ((a , b ) -> Float .compare (b .getValue (), a .getValue ()));
494+ List <Map <String , String >> results = new ArrayList <>();
495+ for (Map .Entry <Map <String , String >, Float > c : candidates ) {
496+ results .add (c .getKey ());
497+ }
495498 return results ;
496499 }
497500
@@ -657,29 +660,34 @@ static String formatId(String noteId, Paragraph p) {
657660 */
658661 private void saveIndex () throws IOException {
659662 Path file = indexPath .resolve ("embedding_index.bin" );
663+ Path tmpFile = indexPath .resolve ("embedding_index.bin.tmp" );
660664 indexLock .readLock ().lock ();
661- try (DataOutputStream out = new DataOutputStream (new FileOutputStream (file .toFile ()))) {
662- out .writeInt (3 ); // version 3: includes output field
663- out .writeInt (index .size ());
664- for (Map .Entry <String , IndexEntry > e : index .entrySet ()) {
665- out .writeUTF (e .getKey ());
666- out .writeUTF (e .getValue ().noteName != null ? e .getValue ().noteName : "" );
667- String text = e .getValue ().text != null ? e .getValue ().text : "" ;
668- if (text .length () > 2000 ) {
669- text = text .substring (0 , 2000 );
670- }
671- out .writeUTF (text );
672- out .writeUTF (e .getValue ().title != null ? e .getValue ().title : "" );
673- out .writeUTF (e .getValue ().tables != null ? e .getValue ().tables : "" );
674- String output = e .getValue ().output != null ? e .getValue ().output : "" ;
675- if (output .length () > 1000 ) {
676- output = output .substring (0 , 1000 );
677- }
678- out .writeUTF (output );
679- for (float v : e .getValue ().embedding ) {
680- out .writeFloat (v );
665+ try {
666+ try (DataOutputStream out = new DataOutputStream (new FileOutputStream (tmpFile .toFile ()))) {
667+ out .writeInt (3 ); // version 3: includes output field
668+ out .writeInt (index .size ());
669+ for (Map .Entry <String , IndexEntry > e : index .entrySet ()) {
670+ out .writeUTF (e .getKey ());
671+ out .writeUTF (e .getValue ().noteName != null ? e .getValue ().noteName : "" );
672+ String text = e .getValue ().text != null ? e .getValue ().text : "" ;
673+ if (text .length () > 2000 ) {
674+ text = text .substring (0 , 2000 );
675+ }
676+ out .writeUTF (text );
677+ out .writeUTF (e .getValue ().title != null ? e .getValue ().title : "" );
678+ out .writeUTF (e .getValue ().tables != null ? e .getValue ().tables : "" );
679+ String output = e .getValue ().output != null ? e .getValue ().output : "" ;
680+ if (output .length () > 1000 ) {
681+ output = output .substring (0 , 1000 );
682+ }
683+ out .writeUTF (output );
684+ for (float v : e .getValue ().embedding ) {
685+ out .writeFloat (v );
686+ }
681687 }
682688 }
689+ Files .move (tmpFile , file , java .nio .file .StandardCopyOption .REPLACE_EXISTING ,
690+ java .nio .file .StandardCopyOption .ATOMIC_MOVE );
683691 } finally {
684692 indexLock .readLock ().unlock ();
685693 }
0 commit comments