Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,8 @@ protected void indexNodeName(Document doc, String value) {
}

@Override
protected boolean indexSimilarityTag(Document doc, PropertyState property) {
doc.add(new TextField(FieldNames.SIMILARITY_TAGS, property.getValue(Type.STRING), Field.Store.YES));
protected boolean indexSimilarityTag(Document doc, String value) {
doc.add(new TextField(FieldNames.SIMILARITY_TAGS, value, Field.Store.YES));
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,27 @@

import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexDefinitionBuilder;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.document.Document;
import org.junit.Test;

import java.util.List;

import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT;
import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;

public class LuceneDocumentMakerTest {
private final NodeState root = INITIAL_CONTENT;
private final LuceneIndexDefinitionBuilder builder = new LuceneIndexDefinitionBuilder();

@Test
public void excludeSingleProperty() throws Exception{
LuceneIndexDefinitionBuilder builder = new LuceneIndexDefinitionBuilder();
builder.indexRule("nt:base")
.property("foo")
.propertyIndex()
Expand All @@ -63,4 +67,39 @@ public void excludeSingleProperty() throws Exception{
assertNull(docMaker.makeDocument(test.getNodeState()));
}

}
@Test
public void similarityTagMaxLengthFiltering() throws Exception{
LuceneIndexDefinitionBuilder builder = new LuceneIndexDefinitionBuilder();
builder.indexRule("nt:base")
.property("jcr:primaryType")
.propertyIndex();
builder.indexRule("nt:base")
.property("tag")
.similarityTags(true);

builder.getBuilderTree().setProperty(FulltextIndexConstants.MAX_TAG_LENGTH, 10);

LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(root, builder.build(), "/foo").build();
LuceneDocumentMaker docMaker = new LuceneDocumentMaker(defn,
defn.getApplicableIndexingRule("nt:base"), "/x");

NodeBuilder test = EMPTY_NODE.builder();
test.setProperty("tag", "short");
Document doc = docMaker.makeDocument(test.getNodeState());
assertNotNull(doc);
assertEquals("short", doc.get(FieldNames.SIMILARITY_TAGS));

test = EMPTY_NODE.builder();
test.setProperty("tag", "exactly10!");
doc = docMaker.makeDocument(test.getNodeState());
assertNotNull(doc);
assertEquals("exactly10!", doc.get(FieldNames.SIMILARITY_TAGS));

test = EMPTY_NODE.builder();
test.setProperty("tag", "this is too long");
doc = docMaker.makeDocument(test.getNodeState());
assertNotNull(doc);
assertNull(doc.get(FieldNames.SIMILARITY_TAGS));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,37 @@ public void dynamicBoostLiteShouldGiveLessRelevanceToTags() throws Exception {
List.of("/test/asset3", "/test/asset2"));
}

@Test
public void dynamicBoostMaxLengthFiltering() throws Exception {
createAssetsIndexAndProperties(false, false, true, 10);

Tree testParent = createNodeWithType(root.getTree("/"), "test", JcrConstants.NT_UNSTRUCTURED, "");

Tree predicted1 = createAssetNodeWithPredicted(testParent, "asset1", "test");
createPredictedTag(predicted1, "short", 0.9);
createPredictedTag(predicted1, "exactly10!", 0.8);
createPredictedTag(predicted1, "this is too long", 0.7);

Tree predicted2 = createAssetNodeWithPredicted(testParent, "asset2", "test");
createPredictedTag(predicted2, "short", 0.9);
createPredictedTag(predicted2, "exactly10!", 0.8);

root.commit();

assertEventually(() -> {
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'short')", SQL2,
List.of("/test/asset1", "/test/asset2"));
assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'exactly10!')", SQL2,
List.of("/test/asset1", "/test/asset2"));

assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'this is too long')", SQL2, List.of());
});
}

@Override
protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTags) throws Exception {
protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTags, boolean useInFullTextQuery, Integer maxTagLength) throws Exception {
factory.queryTermsProvider = new FulltextQueryTermsProviderImpl();
super.createAssetsIndexAndProperties(lite, similarityTags);
super.createAssetsIndexAndProperties(lite, similarityTags, useInFullTextQuery, maxTagLength);
}

private String runIndexingTest(Class<?> loggerClass, boolean nameProperty) throws CommitFailedException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,9 @@ protected void indexNodeName(ElasticDocument doc, String value) {
}

@Override
protected boolean indexSimilarityTag(ElasticDocument doc, PropertyState property) {
String val = property.getValue(Type.STRING);
if (!val.isEmpty()) {
doc.addSimilarityTag(val);
protected boolean indexSimilarityTag(ElasticDocument doc, String value) {
if (!value.isEmpty()) {
doc.addSimilarityTag(value);
return true;
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,12 @@ public static IndexingMode from(String indexingMode) {
*/
String MAX_FIELD_LENGTH = "maxFieldLength";

/**
* Maximum length of similarity and dynamic boost tag values to be indexed. Tags longer than this value will be skipped.
* Set to -1 to disable the length check entirely
*/
String MAX_TAG_LENGTH = "maxTagLength";

/**
* whether use this property values for suggestions
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ public class IndexDefinition implements Aggregate.AggregateMapper {
*/
public static final int DEFAULT_MAX_FIELD_LENGTH = 10000;

/**
* Default value for property {@link #maxTagLength}.
*/
public static final int DEFAULT_MAX_TAG_LENGTH = 100;

public static final int DEFAULT_MAX_EXTRACT_LENGTH = -10;

/**
Expand Down Expand Up @@ -274,6 +279,8 @@ public class IndexDefinition implements Aggregate.AggregateMapper {

private final int maxFieldLength;

private final int maxTagLength;

private final int maxExtractLength;

private final int suggesterUpdateFrequencyMinutes;
Expand Down Expand Up @@ -470,6 +477,7 @@ protected IndexDefinition(NodeState root, NodeState defn, IndexFormatVersion ver
}

this.maxFieldLength = getOptionalValue(defn, FulltextIndexConstants.MAX_FIELD_LENGTH, DEFAULT_MAX_FIELD_LENGTH);
this.maxTagLength = getOptionalValue(defn, FulltextIndexConstants.MAX_TAG_LENGTH, DEFAULT_MAX_TAG_LENGTH);
this.costPerEntry = getOptionalValue(defn, FulltextIndexConstants.COST_PER_ENTRY, getDefaultCostPerEntry(version));
this.costPerExecution = getOptionalValue(defn, FulltextIndexConstants.COST_PER_EXECUTION, 1.0);
this.hasCustomTikaConfig = getTikaConfigNode().exists();
Expand Down Expand Up @@ -690,6 +698,10 @@ public String[] getIndexTags() {
return indexSelectionPolicy;
}

public int getMaxTagLength() {
return maxTagLength;
}

public int getMaxExtractLength() {
return maxExtractLength;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.collections.IterableUtils;
import org.apache.jackrabbit.oak.commons.log.LogSilencer;
import org.apache.jackrabbit.oak.plugins.index.search.Aggregate;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
Expand Down Expand Up @@ -58,6 +59,9 @@
public abstract class FulltextDocumentMaker<D> implements DocumentMaker<D> {

private final Logger log = LoggerFactory.getLogger(getClass());

private static final LogSilencer LOG_SILENCER = new LogSilencer();

public static final String WARN_LOG_STRING_SIZE_THRESHOLD_KEY = "oak.repository.property.index.logWarnStringSizeThreshold";
private static final int DEFAULT_WARN_LOG_STRING_SIZE_THRESHOLD_VALUE = 102400;

Expand Down Expand Up @@ -343,7 +347,13 @@ private boolean indexProperty(String path,
dirty |= indexFacets(doc, property, pname, pd);
}
if (pd.similarityTags) {
dirty |= indexSimilarityTag(doc, property);
String value = property.getValue(Type.STRING);
if (isTagWithinLengthLimit(value)) {
dirty |= indexSimilarityTag(doc, value);
} else if (!LOG_SILENCER.silence(pname)) {
log.warn("[{}] Skipping similarity tag for property {}. Value length {} exceeds maximum allowed length",
getIndexName(), pname, value.length());
}
}

}
Expand Down Expand Up @@ -377,7 +387,7 @@ protected boolean isFulltextValuePersistedAtNode(PropertyDefinition pd) {
return true;
}

protected abstract boolean indexSimilarityTag(D doc, PropertyState property);
protected abstract boolean indexSimilarityTag(D doc, String value);

protected abstract void indexSimilarityBinaries(D doc, PropertyDefinition pd, Blob blob) throws IOException;

Expand Down Expand Up @@ -704,6 +714,13 @@ protected boolean indexDynamicBoost(D doc, String propertyName, String nodeName,
continue;
}
String dynaTagValue = p.getValue(Type.STRING);
if (!isTagWithinLengthLimit(dynaTagValue)) {
if (!LOG_SILENCER.silence(p.getName())) {
log.warn("[{}] Skipping dynamic boost tag for property {}. Value length {} exceeds maximum allowed length",
getIndexName(), p.getName(), dynaTagValue.length());
}
continue;
}
p = dynaTag.getProperty(DYNAMIC_BOOST_TAG_CONFIDENCE);
if (p == null) {
// here we don't log a warning, because possibly it will be added later
Expand Down Expand Up @@ -736,6 +753,11 @@ protected String getIndexName() {
return definition.getIndexName();
}

private boolean isTagWithinLengthLimit(String value) {
int maxLength = definition.getMaxTagLength();
return maxLength < 0 || value.length() <= maxLength;
}

/*
* Extracts the local name of the current node ignoring any namespace prefix
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTa
}

protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTags, boolean useInFullTextQuery) throws Exception {
createAssetsIndexAndProperties(lite, similarityTags, useInFullTextQuery, null);
}

protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTags, boolean useInFullTextQuery, Integer maxTagLength) throws Exception {
NodeTypeRegistry.register(root, new ByteArrayInputStream(ASSET_NODE_TYPE.getBytes()), "test nodeType");
Tree indexRuleProps = createIndex("dam:Asset", lite);

Expand All @@ -250,6 +254,11 @@ protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTa
predictedTags.setProperty("similarityTags", true);
}

if (maxTagLength != null) {
Tree indexDef = root.getTree("/oak:index/" + TEST_INDEX_NAME);
indexDef.setProperty(FulltextIndexConstants.MAX_TAG_LENGTH, maxTagLength);
}

root.commit();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,19 @@ public void customTikaMimeTypes() {
assertEquals("application/test-unmapped", defn.getTikaMappedMimeType("application/test-unmapped"));
}

@Test
public void similarityTagMaxLength() {
NodeBuilder defnb = newFTIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
"lucene", Set.of(TYPENAME_STRING));
IndexDefinition defn = new IndexDefinition(root, defnb.getNodeState(), "/foo");
assertEquals(IndexDefinition.DEFAULT_MAX_TAG_LENGTH, defn.getMaxTagLength());

defnb.setProperty(FulltextIndexConstants.MAX_TAG_LENGTH, 50);

defn = new IndexDefinition(root, defnb.getNodeState(), "/foo");
assertEquals(50, defn.getMaxTagLength());
}

@Test
public void maxExtractLength() {
NodeBuilder defnb = newFTIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
Expand Down
Loading