Skip to content

Commit

Permalink
Merge #761
Browse files Browse the repository at this point in the history
761: Add similar documents query to index  r=curquiza a=jdvalenzuelah

# Pull Request

## Related issue
Fixes #755

## What does this PR do?
- Add similar documents method to index

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Josue Valenzuela <[email protected]>
Co-authored-by: Clémentine <[email protected]>
  • Loading branch information
3 people authored Aug 20, 2024
2 parents 67e3467 + 1880805 commit cf7732e
Show file tree
Hide file tree
Showing 13 changed files with 245 additions and 27 deletions.
5 changes: 5 additions & 0 deletions .code-samples.meilisearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -822,3 +822,8 @@ multi_search_1: |-
multiIndexSearch.addQuery(new IndexSearchRequest("movie_ratings").setQuery("us"));
client.multiSearch(multiSearchRequest);
get_similar_post_1:
SimilarDocumentRequest query = new SimilarDocumentRequest()
.setId("143")
.setEmbedder("manual");
client.index("movies").searchSimilarDocuments(query)
4 changes: 4 additions & 0 deletions src/main/java/com/meilisearch/sdk/Client.java
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,10 @@ public Results<MultiSearchResult> multiSearch(MultiSearchRequest search)
"/multi-search", search, Results.class, MultiSearchResult.class);
}

public void experimentalFeatures(Map<String, Boolean> features) {
this.config.httpClient.patch("/experimental-features", features, Void.class);
}

public String generateTenantToken(String apiKeyUid, Map<String, Object> searchRules)
throws MeilisearchException {
return this.generateTenantToken(apiKeyUid, searchRules, new TenantTokenOptions());
Expand Down
25 changes: 10 additions & 15 deletions src/main/java/com/meilisearch/sdk/Index.java
Original file line number Diff line number Diff line change
@@ -1,21 +1,8 @@
package com.meilisearch.sdk;

import com.meilisearch.sdk.exceptions.MeilisearchException;
import com.meilisearch.sdk.model.DocumentQuery;
import com.meilisearch.sdk.model.DocumentsQuery;
import com.meilisearch.sdk.model.FacetSearchable;
import com.meilisearch.sdk.model.Faceting;
import com.meilisearch.sdk.model.IndexStats;
import com.meilisearch.sdk.model.Pagination;
import com.meilisearch.sdk.model.Results;
import com.meilisearch.sdk.model.SearchResult;
import com.meilisearch.sdk.model.Searchable;
import com.meilisearch.sdk.model.Settings;
import com.meilisearch.sdk.model.Task;
import com.meilisearch.sdk.model.TaskInfo;
import com.meilisearch.sdk.model.TasksQuery;
import com.meilisearch.sdk.model.TasksResults;
import com.meilisearch.sdk.model.TypoTolerance;
import com.meilisearch.sdk.http.URLBuilder;
import com.meilisearch.sdk.model.*;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -1214,4 +1201,12 @@ public TaskInfo updateSearchCutoffMsSettings(Integer milliseconds) throws Meilis
public TaskInfo resetSearchCutoffMsSettings() throws MeilisearchException {
return this.settingsHandler.resetSearchCutoffMsSettings(this.uid);
}

public SimilarDocumentsResults searchSimilarDocuments(SimilarDocumentRequest query)
throws MeilisearchException {
return this.config.httpClient.post(
new URLBuilder("/indexes").addSubroute(this.uid).addSubroute("/similar").getURL(),
query,
SimilarDocumentsResults.class);
}
}
49 changes: 49 additions & 0 deletions src/main/java/com/meilisearch/sdk/SimilarDocumentRequest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package com.meilisearch.sdk;

import lombok.*;
import lombok.experimental.Accessors;
import org.json.JSONObject;

@Builder
@AllArgsConstructor(access = AccessLevel.PACKAGE)
@Getter
@Setter
@Accessors(chain = true)
public class SimilarDocumentRequest {
private String id;
private String embedder;
private String[] attributesToRetrieve;
private Integer offset;
private Integer limit;
private String filter;
private Boolean showRankingScore;
private Boolean showRankingScoreDetails;
private Double rankingScoreThreshold;
private Boolean retrieveVectors;

/**
* Constructor for SimilarDocumentsRequest for building search request for similar documents
* with the default values: id null, embedder "default", attributesToRetrieve ["*"], offset 0,
* limit 20, filter null, showRankingScore false, showRankingScoreDetails false,
* rankingScoreThreshold null, retrieveVectors false
*/
public SimilarDocumentRequest() {}

@Override
public String toString() {
JSONObject jsonObject =
new JSONObject()
.put("id", this.id)
.put("embedder", this.embedder)
.put("attributesToRetrieve", this.attributesToRetrieve)
.put("offset", this.offset)
.put("limit", this.limit)
.put("filter", this.filter)
.put("showRankingScore", this.showRankingScore)
.put("showRankingScoreDetails", this.showRankingScoreDetails)
.put("rankingScoreThreshold", this.rankingScoreThreshold)
.put("retrieveVectors", this.retrieveVectors);

return jsonObject.toString();
}
}
24 changes: 24 additions & 0 deletions src/main/java/com/meilisearch/sdk/model/EmbedderInputType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.meilisearch.sdk.model;

import com.fasterxml.jackson.annotation.JsonValue;
import com.google.gson.annotations.SerializedName;

public enum EmbedderInputType {
@SerializedName("text")
TEXT("text"),

@SerializedName("textArray")
TEXT_ARRAY("textArray");

public final String inputType;

private EmbedderInputType(String inputType) {
this.inputType = inputType;
}

@JsonValue
@Override
public String toString() {
return this.inputType;
}
}
33 changes: 33 additions & 0 deletions src/main/java/com/meilisearch/sdk/model/EmbedderSource.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package com.meilisearch.sdk.model;

import com.fasterxml.jackson.annotation.JsonValue;
import com.google.gson.annotations.SerializedName;

public enum EmbedderSource {
@SerializedName("openAi")
OPEN_AI("openAi"),

@SerializedName("huggingFace")
HUGGING_FACE("huggingFace"),

@SerializedName("ollama")
OLLAMA("ollama"),

@SerializedName("rest")
REST("rest"),

@SerializedName("userProvided")
USER_PROVIDED("userProvided");

public final String source;

private EmbedderSource(String source) {
this.source = source;
}

@JsonValue
@Override
public String toString() {
return this.source;
}
}
24 changes: 24 additions & 0 deletions src/main/java/com/meilisearch/sdk/model/Embedders.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.meilisearch.sdk.model;

import lombok.*;
import lombok.experimental.Accessors;

@Builder
@AllArgsConstructor(access = AccessLevel.PACKAGE)
@Getter
@Setter
@Accessors(chain = true)
public class Embedders {
protected EmbedderSource source;
protected String url;
protected String apiKey;
protected String model;
protected String documentTemplate;
protected Integer dimensions;
protected String revision;
protected String[] inputField;
protected EmbedderInputType inputType;
protected String query;

public Embedders() {}
}
1 change: 1 addition & 0 deletions src/main/java/com/meilisearch/sdk/model/Settings.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public class Settings {
protected Integer searchCutoffMs;
protected String[] separatorTokens;
protected String[] nonSeparatorTokens;
protected HashMap<String, Embedders> embedders;

public Settings() {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package com.meilisearch.sdk.model;

import java.util.ArrayList;
import java.util.HashMap;
import lombok.Getter;
import lombok.ToString;

/**
* Meilisearch similar documents results data structure
*
* @see <a href="https://www.meilisearch.com/docs/reference/api/similar#response-200-ok">API
* specification</a>
*/
@Getter
@ToString
public class SimilarDocumentsResults {
ArrayList<HashMap<String, Object>> hits;
String id;
int processingTimeMs;
int offset;
int limit;
int estimatedTotalHits;
}
49 changes: 37 additions & 12 deletions src/test/java/com/meilisearch/integration/SearchTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,11 @@

import com.meilisearch.integration.classes.AbstractIT;
import com.meilisearch.integration.classes.TestData;
import com.meilisearch.sdk.Index;
import com.meilisearch.sdk.IndexSearchRequest;
import com.meilisearch.sdk.MultiSearchRequest;
import com.meilisearch.sdk.SearchRequest;
import com.meilisearch.sdk.*;
import com.meilisearch.sdk.json.GsonJsonHandler;
import com.meilisearch.sdk.model.FacetRating;
import com.meilisearch.sdk.model.MatchingStrategy;
import com.meilisearch.sdk.model.MultiSearchResult;
import com.meilisearch.sdk.model.SearchResult;
import com.meilisearch.sdk.model.SearchResultPaginated;
import com.meilisearch.sdk.model.Searchable;
import com.meilisearch.sdk.model.Settings;
import com.meilisearch.sdk.model.TaskInfo;
import com.meilisearch.sdk.model.*;
import com.meilisearch.sdk.utils.Movie;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import org.junit.jupiter.api.AfterAll;
Expand Down Expand Up @@ -843,4 +834,38 @@ public void testMultiSearchWithRankingScoreThreshold() throws Exception {
assertThat(rankingScore, is(greaterThanOrEqualTo(0.98)));
}
}

@Test
public void testSimilarDocuments() throws Exception {
HashMap<String, Boolean> features = new HashMap();
features.put("vectorStore", true);
client.experimentalFeatures(features);

String indexUid = "SimilarDocuments";
Index index = client.index(indexUid);
HashMap<String, Embedders> embedders = new HashMap<>();
embedders.put(
"manual", new Embedders().setSource(EmbedderSource.USER_PROVIDED).setDimensions(3));

Settings settings = new Settings();
settings.setEmbedders(embedders);

index.updateSettings(settings);

TestData<Movie> testData = this.getTestData(VECTOR_MOVIES, Movie.class);
TaskInfo task = index.addDocuments(testData.getRaw());

index.waitForTask(task.getTaskUid());

SimilarDocumentsResults results =
index.searchSimilarDocuments(
new SimilarDocumentRequest().setId("143").setEmbedder("manual"));

ArrayList<HashMap<String, Object>> hits = results.getHits();
assertThat(hits.size(), is(4));
assertThat(hits.get(0).get("title"), is("Escape Room"));
assertThat(hits.get(1).get("title"), is("Captain Marvel"));
assertThat(hits.get(2).get("title"), is("How to Train Your Dragon: The Hidden World"));
assertThat(hits.get(3).get("title"), is("Shazam!"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@ public abstract class AbstractIT {

public static final String MOVIES_INDEX = "movies.json";
public static final String NESTED_MOVIES = "nested_movies.json";
public static final String VECTOR_MOVIES = "vector_movies.json";

public AbstractIT() {
try {
loadResource(MOVIES_INDEX);
loadResource(NESTED_MOVIES);
loadResource(VECTOR_MOVIES);
} catch (IOException e) {
e.printStackTrace();
}
Expand Down
1 change: 1 addition & 0 deletions src/test/java/com/meilisearch/sdk/utils/Movie.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class Movie {
private HashMap<String, List<Match>> _matchesPosition;
private Double _rankingScore;
private HashMap<String, Object> _rankingScoreDetails;
private HashMap<String, Object> _vectors;

public class Match {
public int start;
Expand Down
32 changes: 32 additions & 0 deletions src/test/resources/vector_movies.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[
{
"title": "Shazam!",
"release_year": 2019,
"id": "287947",
"_vectors": { "manual": [0.8, 0.4, -0.5]}
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": { "manual": [0.6, 0.8, -0.2] }
},
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": { "manual": [0.1, 0.6, 0.8] }
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": { "manual": [0.7, 0.7, -0.4] }
},
{
"title": "All Quiet on the Western Front",
"release_year": 1930,
"id": "143",
"_vectors": { "manual": [-0.5, 0.3, 0.85] }
}
]

0 comments on commit cf7732e

Please sign in to comment.