Skip to content

Add functionality for sorting facets/observations into a tree #1542

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions whelk-core/src/main/groovy/whelk/JsonLd.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -670,12 +670,6 @@ class JsonLd {

//==== Class-hierarchies ====

List<String> getSuperClasses(String type) {
List<String> res = []
getSuperClasses(type, res)
return res
}

void getSuperClasses(String type, List<String> result) {
def termMap = vocabIndex[type]
if (termMap == null)
Expand All @@ -695,6 +689,13 @@ class JsonLd {
}
}

// Returns all superclasses in an ordered List of typeKeys
List<String> getSuperClasses(String type) {
List<String> allSuperClasses = new ArrayList<>()
getSuperClasses(type, allSuperClasses)
return allSuperClasses
}

private Map<String, List<String>> generateSubTermLists(String relationToSuper) {
Map<String, List<String>> superTermOf = [:]
for (String type : vocabIndex.keySet()) {
Expand Down Expand Up @@ -730,6 +731,10 @@ class JsonLd {
return type in bases
}

List<String> getDirectSubclasses(String type) {
return superClassOf.get(type) ?: []
}

boolean isInstanceOf(Map entity, String baseType) {
if (entity.is(null)) {
return false
Expand Down
138 changes: 138 additions & 0 deletions whelk-core/src/main/groovy/whelk/search2/FacetTree.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package whelk.search2;

import whelk.JsonLd;

import java.util.*;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.function.Function;
import java.util.stream.Collectors;

import static whelk.util.DocumentUtil.getAtPath;

public class FacetTree {

private final JsonLd jsonLd;
private Map<String, Map<String, Object>> keyToObservation = new HashMap<>();

public FacetTree(JsonLd jsonLd) {
this.jsonLd = jsonLd;
}

public List<Map<String, Object>> sortObservationsAsTree(List<Map<String, Object>> observations) {
List<Map<String, Object>> tree = new ArrayList<>();
Queue<Map<String, Object>> queue = new ConcurrentLinkedQueue<>();
Set<String> intermediateClasses = new HashSet<>();

keyToObservation = observations.stream()
.collect(Collectors.toMap(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), "")), Function.identity()));

List<String> rootCandidates = keyToObservation.keySet().stream().filter(this::isRootNode).toList();
String rootKey = "";

if (rootCandidates.size() == 1) {
rootKey = rootCandidates.getFirst();
var root = keyToObservation.get(rootKey);
tree.add(root);
queue.add(root);
} else {
Optional<String> first = keyToObservation.keySet().stream().findFirst();
if (first.isPresent()) {
Optional<String> rootKeyOpt = getAbsentRoot(first.get());
if (rootKeyOpt.isPresent()) {
rootKey = rootKeyOpt.get();
var root = createFakeObservation(rootKey);
observations.add(root);
tree.add(root);
queue.add(root);
}
}
}

for (String typeKey : keyToObservation.keySet()) {
if (!typeKey.equals(rootKey)) {
intermediateClasses.addAll(getIntermediateClassesFor(typeKey));
}
}

observations.addAll(intermediateClasses.stream().map(this::createFakeObservation).toList());

while (!queue.isEmpty()) {
var observation = queue.remove();
var children = findChildren(observation, observations);

if (!children.isEmpty()) {
queue.addAll(children);
observation.put("_children", children);
}
}
return List.copyOf(tree);
}

private Map<String, Object> createFakeObservation(String termKey) {
Map<String, Object> fakeObservation = new LinkedHashMap<>();
String termId = jsonLd.toTermId(termKey);
if (termId == null) {
// TODO: investigate!!
// Happens when observations are "" and "Dataset".
return new HashMap<>();
}
var fakeObject = Map.of(JsonLd.ID_KEY, termId);
fakeObservation.put("totalItems", 0);
fakeObservation.put("view", Map.of(JsonLd.ID_KEY, "fake"));
fakeObservation.put("object", fakeObject);
return fakeObservation;
}

private List<String> getIntermediateClassesFor(String typeKey) {
return getAbsentSuperClasses(typeKey);
}

private List<String> getAbsentSuperClasses(String typeKey) {
List<String> allSuperClasses = jsonLd.getSuperClasses(typeKey);

return allSuperClasses.stream()
.takeWhile(s -> !keyToObservation.containsKey(s))
.toList();
}

private Optional<String> getAbsentRoot(String typeKey) {
List<String> allSuperClasses = jsonLd.getSuperClasses(typeKey);
return allSuperClasses.stream()
.filter(this::subClassesContainsAllObservations)
.findFirst();
}

private boolean subClassesContainsAllObservations(String c) {
Set<String> subClasses = jsonLd.getSubClasses(c);
return subClasses.containsAll(keyToObservation.keySet());
}

private boolean hasParentInObservations(String typeKey) {
List<String> allSuperClasses = jsonLd.getSuperClasses(typeKey);

return allSuperClasses.stream()
.anyMatch(s -> keyToObservation.containsKey(s));
}

private boolean isRootNode(String typeKey) {
return !hasParentInObservations(typeKey);
}

private List<Map<String, Object>> findChildren(Map<String, Object> observation, List<Map<String, Object>> observations) {
return observations.stream()
.filter(o -> isDirectSubclass(o, observation))
.collect(Collectors.toList());
}

private boolean isDirectSubclass(Map<String, Object> obsA, Map<String, Object> obsB) {
String idA = jsonLd.toTermKey(get(obsA, List.of("object", "@id"), ""));
String idB = jsonLd.toTermKey(get(obsB, List.of("object", "@id"), ""));
List<String> directSubclasses = jsonLd.getDirectSubclasses(idB);
return directSubclasses.contains(idA);
}

@SuppressWarnings("unchecked")
private static <T> T get(Object m, List<Object> path, T defaultTo) {
return (T) getAtPath(m, path, defaultTo);
}
}
4 changes: 4 additions & 0 deletions whelk-core/src/main/groovy/whelk/search2/Query.java
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ private Map<String, Object> buildSliceByDimension(Map<Property, Map<PathValue, I
}
var sliceNode = new LinkedHashMap<>();
var observations = getObservations(propertyKey, buckets);

if (property.name().equals(JsonLd.Rdfs.RDF_TYPE)) {
observations = new FacetTree(whelk.getJsonld()).sortObservationsAsTree(observations);
}
if (!observations.isEmpty()) {
if (selectedFilters.isRangeFilter(propertyKey)) {
sliceNode.put("search", getRangeTemplate(propertyKey));
Expand Down
203 changes: 203 additions & 0 deletions whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package whelk.search2

import spock.lang.Specification
import whelk.JsonLd

class FacetTreeSpec extends Specification {

JsonLd jsonLd

void setup() {
jsonLd = GroovyMock(JsonLd.class)
jsonLd.toTermKey(_ as String) >> { String s -> s }
jsonLd.toTermId(_ as String) >> { String s -> s }
}

def "Single observation should return list with one observation"() {
given:
jsonLd.getDirectSubclasses("parent") >> []
jsonLd.getSuperClasses("parent") >> []

expect:
def tree = new FacetTree(jsonLd)
tree.sortObservationsAsTree(observations) == sorted

where:
observations | sorted
[["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]]
}

def "Sort one parent and one child"() {
given:
jsonLd.getDirectSubclasses("parent") >> ["child"]
jsonLd.getDirectSubclasses("child") >> []

jsonLd.getSuperClasses("child") >> ["parent"]
jsonLd.getSuperClasses("parent") >> []


expect:
def tree = new FacetTree(jsonLd)
tree.sortObservationsAsTree(observations) == sorted

where:
observations | sorted
[["object": ["@id": "parent"]],
["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child"]]]]]
}

def "Sort one parent with two children, superclasses of root should be ignored"() {
given:
jsonLd.getDirectSubclasses("root") >> ["child1", "child2"]
jsonLd.getDirectSubclasses("Resource") >> ["root"]
jsonLd.getDirectSubclasses("child1") >> []
jsonLd.getDirectSubclasses("child2") >> []

jsonLd.getSuperClasses("child1") >> ["root", "Resource"]
jsonLd.getSuperClasses("child2") >> ["child1", "root", "Resource"]
jsonLd.getSuperClasses("root") >> ["Resource"]
jsonLd.getSuperClasses("Resource") >> []

jsonLd.getSubClasses("Resource") >> ["root", "child1", "child2"]
jsonLd.getSubClasses("root") >> ["child1", "child2"]
jsonLd.getSubClasses("child1") >> []
jsonLd.getSubClasses("child2") >> []


expect:
def tree = new FacetTree(jsonLd)
tree.sortObservationsAsTree(observations) == sorted

where:
observations | sorted
[["object": ["@id": "root"]],
["object": ["@id": "child1"]],
["object": ["@id": "child2"]]] | [["object": ["@id": "root"],
"_children": [["object": ["@id": "child1"]],
["object": ["@id": "child2"]]]]]
}

def "Sort one parent with one child that has one child"() {
given:
jsonLd.getDirectSubclasses("root") >> ["child1"]
jsonLd.getDirectSubclasses("child1") >> ["child2"]
jsonLd.getDirectSubclasses("child2") >> []

jsonLd.getSuperClasses("child1") >> ["root"]
jsonLd.getSuperClasses("child2") >> ["child1", "root"]
jsonLd.getSuperClasses("root") >> []

expect:
def tree = new FacetTree(jsonLd)
tree.sortObservationsAsTree(observations) == sorted

where:
observations | sorted
[["object": ["@id": "root"]],
["object": ["@id": "child1"]],
["object": ["@id": "child2"]]] | [["object": ["@id": "root"],
"_children": [["object": ["@id": "child1"],
"_children": [["object": ["@id": "child2"]]]]]]]
}

def "One parent, two children"() {
given:
jsonLd.getDirectSubclasses("root") >> ["child1", "child2"]
jsonLd.getDirectSubclasses("child1") >> []
jsonLd.getDirectSubclasses("child2") >> []

jsonLd.getSuperClasses("child1") >> ["root"]
jsonLd.getSuperClasses("child2") >> ["root"]
jsonLd.getSuperClasses("root") >> []

expect:
def tree = new FacetTree(jsonLd)
tree.sortObservationsAsTree(observations) == sorted

where:
observations | sorted
[["object": ["@id": "root"]],
["object": ["@id": "child1"]],
["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "_children" : [["object": ["@id": "child1"]],
["object": ["@id": "child2"]]]]]
}

def "Three root nodes"() {
given:
jsonLd.getDirectSubclasses("absentRoot") >> ["root1", "root2", "root3"]
jsonLd.getDirectSubclasses("root1") >> []
jsonLd.getDirectSubclasses("root2") >> []
jsonLd.getDirectSubclasses("root3") >> []

jsonLd.getSuperClasses("root1") >> ["absentRoot"]
jsonLd.getSuperClasses("root2") >> ["absentRoot"]
jsonLd.getSuperClasses("root3") >> ["absentRoot"]
jsonLd.getSuperClasses("absentRoot") >> []

jsonLd.getSubClasses("absentRoot") >> ["root1", "root2", "root3"]
jsonLd.getSubClasses("root1") >> []
jsonLd.getSubClasses("root2") >> []
jsonLd.getSubClasses("root3") >> []

expect:
def tree = new FacetTree(jsonLd)
tree.sortObservationsAsTree(observations) == sorted

where:
observations | sorted
[["object": ["@id": "root1"]],
["object": ["@id": "root2"]],
["object": ["@id": "root3"]]] | [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "absentRoot"], "_children": [["object": ["@id": "root1"]],
["object": ["@id": "root2"]],
["object": ["@id": "root3"]]]]]
}


def "Root with one intermediate observation before one child"() {
given:
jsonLd.getDirectSubclasses("root") >> ["intermediate"]
jsonLd.getDirectSubclasses("intermediate") >> ["child"]
jsonLd.getDirectSubclasses("child") >> []

jsonLd.getSuperClasses("child") >> ["intermediate", "root"]
jsonLd.getSuperClasses("root") >> []

expect:
def tree = new FacetTree(jsonLd)
tree.sortObservationsAsTree(observations) == sorted

// TODO: don't depend on exact form of fake observation
where:
observations | sorted
[["object": ["@id": "root"]],
["object": ["@id": "child"]]] | [["object": ["@id": "root"],
"_children": [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "intermediate"],
"_children": [["object": ["@id": "child"]]]]]]]
}

def "Absent root, two children"() {
given:
jsonLd.getDirectSubclasses("root") >> ["child1", "child2"]
jsonLd.getDirectSubclasses("child1") >> []
jsonLd.getDirectSubclasses("child2") >> []

jsonLd.getSuperClasses("child1") >> ["root"]
jsonLd.getSuperClasses("child2") >> ["root"]
jsonLd.getSuperClasses("root") >> []

jsonLd.getSubClasses("root") >> ["child1", "child2"]
jsonLd.getSubClasses("child1") >> []
jsonLd.getSubClasses("child2") >> []

expect:
def tree = new FacetTree(jsonLd)
tree.sortObservationsAsTree(observations) == sorted

where:
observations | sorted
[["object": ["@id": "child1"]],
["object": ["@id": "child2"]]] | [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "root"],
"_children" : [["object": ["@id": "child1"]],
["object": ["@id": "child2"]]]]]
}
}
Loading