Skip to content

Commit f8dcec3

Browse files
duncdrumline-o
authored andcommitted
[test] expand tests to cover vectors
1 parent 117c590 commit f8dcec3

2 files changed

Lines changed: 74 additions & 3 deletions

File tree

extensions/indexes/lucene/src/test/xquery/lucene/field-expression-context.xqm

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,13 @@ declare function idx:get-metadata($root as element(), $field as xs:string) as xs
4545
};
4646
]``;
4747

48-
declare variable $t:XML := document { <entry><form><orth>hello</orth></form></entry> };
48+
(:~
49+
: vec attribute carries a dim=4 text-encoded float vector on the entry element.
50+
: Using an attribute rather than a child element means XPath axes * and ./* still
51+
: return only <form>, so module-call fields (mchildren, mdotchildren) are unaffected,
52+
: and string(<entry>) remains "hello" so all text-field query assertions still hold.
53+
:)
54+
declare variable $t:XML := document { <entry vec="1.0 0.0 0.0 0.0"><form><orth>hello</orth></form></entry> };
4955

5056
declare variable $t:xconf :=
5157
<collection xmlns="http://exist-db.org/collection-config/1.0">
@@ -71,6 +77,13 @@ declare variable $t:xconf :=
7177
<field name="mdotselfchild" expression="idx:get-metadata(./self::*/form, 'x')"/>
7278
<field name="mchildren" expression="idx:get-metadata(*, 'x')"/>
7379
<field name="mdotchildren" expression="idx:get-metadata(./*, 'x')"/>
80+
<!-- vector-field expression context regressions (#6446) -->
81+
<!-- @vec is a dim=4 float attribute; self::*/@vec exercises the element-type
82+
check that was broken: if the context NodeProxy had UNKNOWN_NODE_TYPE,
83+
self::* returned empty, the attribute step had no context, and no vector
84+
was indexed. -->
85+
<vector-field name="v_baseline" expression="@vec" dimension="4" similarity="cosine" encoding="text"/>
86+
<vector-field name="v_self" expression="self::*/@vec" dimension="4" similarity="cosine" encoding="text"/>
7487
</text>
7588
</lucene>
7689
</index>
@@ -117,3 +130,11 @@ declare %test:assertTrue function t:mdotself() { t:indexed("mdotself") };
117130
declare %test:assertTrue function t:mdotselfchild() { t:indexed("mdotselfchild") };
118131
declare %test:assertTrue function t:mchildren() { t:indexed("mchildren") };
119132
declare %test:assertTrue function t:mdotchildren() { t:indexed("mdotchildren") };
133+
134+
(:~ --- vector-field expression context regressions (#6446) --- :)
135+
declare function t:vector-indexed($field as xs:string) as xs:boolean {
136+
exists(collection($t:COLL)//entry[ft:query-field-vector($field, [1.0, 0.0, 0.0, 0.0], 1)])
137+
};
138+
139+
declare %test:assertTrue function t:vector-baseline() { t:vector-indexed("v_baseline") };
140+
declare %test:assertTrue function t:vector-xself() { t:vector-indexed("v_self") };

extensions/indexes/lucene/src/test/xquery/lucene/vector-search.xqm

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,36 @@ declare variable $vs:COLLECTION_ORDER_SINGLE := "/db/" || $vs:COLLECTION_ORDER_S
360360
declare variable $vs:COLLECTION_ORDER_MULTI_NAME := "lucene-test-vector-order-multi";
361361
declare variable $vs:COLLECTION_ORDER_MULTI := "/db/" || $vs:COLLECTION_ORDER_MULTI_NAME;
362362

363+
(:~
364+
: Data for runtime-expression-error test.
365+
: Valid1 (before) and Valid2 (after) should both get vectors; BadExpr triggers error() in the
366+
: expression, which before the isValid fix would permanently disable the field for all
367+
: subsequent documents in the same indexing run.
368+
:)
369+
declare variable $vs:DATA_EXPR_ERROR :=
370+
<articles>
371+
<article><title>Valid1</title><embedding>{$vs:EMB_TEXT_A}</embedding></article>
372+
<article><title>BadExpr</title><embedding>{$vs:EMB_TEXT_B}</embedding></article>
373+
<article><title>Valid2</title><embedding>{$vs:EMB_TEXT_C}</embedding></article>
374+
</articles>;
375+
376+
declare variable $vs:XCONF_EXPR_ERROR :=
377+
<collection xmlns="http://exist-db.org/collection-config/1.0">
378+
<index xmlns:xs="http://www.w3.org/2001/XMLSchema">
379+
<lucene>
380+
<text qname="article">
381+
<field name="title" expression="title"/>
382+
<vector-field name="embedding"
383+
expression="if (title = 'BadExpr') then error() else embedding"
384+
dimension="4" similarity="cosine" encoding="text"/>
385+
</text>
386+
</lucene>
387+
</index>
388+
</collection>;
389+
390+
declare variable $vs:COLLECTION_EXPR_ERROR_NAME := "lucene-test-vector-expr-error";
391+
declare variable $vs:COLLECTION_EXPR_ERROR := "/db/" || $vs:COLLECTION_EXPR_ERROR_NAME;
392+
363393
(:~ Lucene fulltext only — no vector-field (profiler NONE tests). :)
364394
declare variable $vs:DATA_NO_VECTOR :=
365395
<articles>
@@ -484,7 +514,13 @@ function vs:setup() {
484514
xmldb:store($vs:COLLECTION_ORDER_MULTI, "b.xml", $vs:DATA_ORDER_B),
485515
xmldb:store($vs:COLLECTION_ORDER_MULTI, "c.xml", $vs:DATA_ORDER_C),
486516
xmldb:store("/db/system/config/db/" || $vs:COLLECTION_ORDER_MULTI_NAME, "collection.xconf", $vs:XCONF_ORDER),
487-
xmldb:reindex($vs:COLLECTION_ORDER_MULTI) )
517+
xmldb:reindex($vs:COLLECTION_ORDER_MULTI),
518+
(: Runtime expression error: field must survive for subsequent docs. :)
519+
xmldb:create-collection("/db/system/config/db", $vs:COLLECTION_EXPR_ERROR_NAME),
520+
xmldb:create-collection("/db", $vs:COLLECTION_EXPR_ERROR_NAME),
521+
xmldb:store($vs:COLLECTION_EXPR_ERROR, "test.xml", $vs:DATA_EXPR_ERROR),
522+
xmldb:store("/db/system/config/db/" || $vs:COLLECTION_EXPR_ERROR_NAME, "collection.xconf", $vs:XCONF_EXPR_ERROR),
523+
xmldb:reindex($vs:COLLECTION_EXPR_ERROR) )
488524
};
489525

490526
(:~
@@ -528,7 +564,9 @@ function vs:tearDown() {
528564
xmldb:remove($vs:COLLECTION_ORDER_SINGLE),
529565
xmldb:remove("/db/system/config/db/" || $vs:COLLECTION_ORDER_SINGLE_NAME),
530566
xmldb:remove($vs:COLLECTION_ORDER_MULTI),
531-
xmldb:remove("/db/system/config/db/" || $vs:COLLECTION_ORDER_MULTI_NAME)
567+
xmldb:remove("/db/system/config/db/" || $vs:COLLECTION_ORDER_MULTI_NAME),
568+
xmldb:remove($vs:COLLECTION_EXPR_ERROR),
569+
xmldb:remove("/db/system/config/db/" || $vs:COLLECTION_EXPR_ERROR_NAME)
532570
};
533571

534572
(:~
@@ -674,6 +712,18 @@ function vs:non-finite-doc-still-text-searchable() {
674712
count(collection($vs:COLLECTION_NON_FINITE)//article[ft:query(., "NonFinite")])
675713
};
676714

715+
(:~
716+
: (8f) runtime expression error does not permanently disable the vector field.
717+
: DATA_EXPR_ERROR: Valid1, BadExpr (throws error()), Valid2 — all in one document.
718+
: Before the isValid fix, BadExpr's XPathException set isValid=false, silently dropping
719+
: Valid2's vector. After the fix both Valid1 and Valid2 are indexed; count must be 2.
720+
:)
721+
declare
722+
%test:assertEquals(2)
723+
function vs:runtime-expr-error-does-not-disable-subsequent-docs() {
724+
count(collection($vs:COLLECTION_EXPR_ERROR)//article[ft:query-vector(., [1.0, 0.0, 0.0, 0.0], 5)])
725+
};
726+
677727
(:~
678728
: (9) empty node set returns empty sequence.
679729
:)

0 commit comments

Comments
 (0)