From f34e0168096e4fb8e48d615f07225e7bdd5e6370 Mon Sep 17 00:00:00 2001 From: phix Date: Thu, 30 May 2024 23:46:50 -0700 Subject: [PATCH 01/87] Elasticsearch code. --- api/src/main/java/marquez/MarquezApp.java | 29 ++++- api/src/main/java/marquez/MarquezContext.java | 15 ++- .../java/marquez/api/OpenLineageResource.java | 101 +++++++++++++++++- .../marquez/api/OpenLineageResourceTest.java | 5 +- build.gradle | 4 + docker-compose.yml | 23 ++++ 6 files changed, 172 insertions(+), 5 deletions(-) diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 0bc95bd4ae..ab2340f500 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -5,6 +5,10 @@ package marquez; +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.json.jackson.JacksonJsonpMapper; +import co.elastic.clients.transport.ElasticsearchTransport; +import co.elastic.clients.transport.rest_client.RestClientTransport; import com.codahale.metrics.jdbi3.InstrumentedSqlLogger; import com.fasterxml.jackson.databind.SerializationFeature; import io.dropwizard.Application; @@ -40,6 +44,9 @@ import marquez.tracing.TracingContainerResponseFilter; import marquez.tracing.TracingSQLLogger; import marquez.tracing.TracingServletFilter; +import org.apache.http.Header; +import org.apache.http.HttpHost; +import org.elasticsearch.client.RestClient; import org.flywaydb.core.api.FlywayException; import org.jdbi.v3.core.Jdbi; import org.jdbi.v3.core.statement.SqlLogger; @@ -129,8 +136,13 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { } final Jdbi jdbi = newJdbi(config, env, source); + final ElasticsearchClient elasticsearchClient = newElasticsearchClient(); final MarquezContext marquezContext = - MarquezContext.builder().jdbi(jdbi).tags(config.getTags()).build(); + MarquezContext.builder() + .jdbi(jdbi) + .elasticsearchClient(elasticsearchClient) + .tags(config.getTags()) + .build(); registerResources(config, env, marquezContext); registerServlets(env); @@ -147,6 +159,21 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { Exclusions.use(exclusions); } + private ElasticsearchClient newElasticsearchClient() { + String host = "search"; + int port = 9200; + RestClient restClient = + RestClient.builder(new HttpHost(host, port, "http")) + .setDefaultHeaders( + new Header[] { + // new BasicHeader("Authorization", "ApiKey " + apiKey) + }) + .build(); + ElasticsearchTransport transport = + new RestClientTransport(restClient, new JacksonJsonpMapper()); + return new ElasticsearchClient(transport); + } + private boolean isSentryEnabled(MarquezConfig config) { return config.getSentry() != null && !config.getSentry().getDsn().equals(SentryConfig.DEFAULT_DSN); diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index a92479368e..91aff1cadb 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -5,6 +5,7 @@ package marquez; +import co.elastic.clients.elasticsearch.ElasticsearchClient; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; @@ -101,14 +102,17 @@ public final class MarquezContext { @Getter private final JdbiExceptionExceptionMapper jdbiException; @Getter private final JsonProcessingExceptionMapper jsonException; @Getter private final GraphQLHttpServlet graphqlServlet; + @Getter private final ElasticsearchClient elasticsearchClient; private MarquezContext( @NonNull final Jdbi jdbi, + @NonNull final ElasticsearchClient elasticsearchClient, @NonNull final ImmutableSet tags, List runTransitionListeners) { if (runTransitionListeners == null) { runTransitionListeners = new ArrayList<>(); } + this.elasticsearchClient = elasticsearchClient; final BaseDao baseDao = jdbi.onDemand(NamespaceDao.class); this.namespaceDao = jdbi.onDemand(NamespaceDao.class); @@ -163,7 +167,8 @@ private MarquezContext( this.columnLineageResource = new ColumnLineageResource(serviceFactory); this.jobResource = new JobResource(serviceFactory, jobVersionDao, jobFacetsDao, runFacetsDao); this.tagResource = new TagResource(serviceFactory); - this.openLineageResource = new OpenLineageResource(serviceFactory, openLineageDao); + this.openLineageResource = + new OpenLineageResource(serviceFactory, elasticsearchClient, openLineageDao); this.searchResource = new SearchResource(searchDao); this.resources = @@ -190,6 +195,7 @@ public static Builder builder() { public static class Builder { private Jdbi jdbi; + private ElasticsearchClient elasticsearchClient; private ImmutableSet tags; private List runTransitionListeners; @@ -203,6 +209,11 @@ public Builder jdbi(@NonNull Jdbi jdbi) { return this; } + public Builder elasticsearchClient(@NonNull ElasticsearchClient elasticsearchClient) { + this.elasticsearchClient = elasticsearchClient; + return this; + } + public Builder tags(@NonNull ImmutableSet tags) { this.tags = tags; return this; @@ -219,7 +230,7 @@ public Builder runTransitionListeners( } public MarquezContext build() { - return new MarquezContext(jdbi, tags, runTransitionListeners); + return new MarquezContext(jdbi, elasticsearchClient, tags, runTransitionListeners); } } } diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index a4cae079c0..f92db9f72b 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -9,15 +9,22 @@ import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import static javax.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR; +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.elasticsearch.core.IndexRequest; import com.codahale.metrics.annotation.ExceptionMetered; import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import io.dropwizard.jersey.jsr310.ZonedDateTimeParam; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.sql.SQLException; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.UUID; import java.util.concurrent.CompletionException; import javax.validation.Valid; import javax.validation.constraints.Min; @@ -50,11 +57,15 @@ public class OpenLineageResource extends BaseResource { private static final String DEFAULT_DEPTH = "20"; + private final ElasticsearchClient elasticsearchClient; private final OpenLineageDao openLineageDao; public OpenLineageResource( - @NonNull final ServiceFactory serviceFactory, @NonNull final OpenLineageDao openLineageDao) { + @NonNull final ServiceFactory serviceFactory, + @NonNull final ElasticsearchClient elasticsearchClient, + @NonNull final OpenLineageDao openLineageDao) { super(serviceFactory); + this.elasticsearchClient = elasticsearchClient; this.openLineageDao = openLineageDao; } @@ -67,6 +78,7 @@ public OpenLineageResource( @Path("/lineage") public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncResponse asyncResponse) throws JsonProcessingException, SQLException { + indexEvent((LineageEvent) event); if (event instanceof LineageEvent) { openLineageService .createAsync((LineageEvent) event) @@ -87,6 +99,93 @@ public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncRespon } } + private UUID runUuidFromEvent(LineageEvent.Run run) { + UUID runUuid; + try { + runUuid = UUID.fromString(run.getRunId()); + } catch (Exception e) { + runUuid = UUID.nameUUIDFromBytes(run.getRunId().getBytes(StandardCharsets.UTF_8)); + } + return runUuid; + } + + private void indexEvent(@Valid @NotNull LineageEvent event) { + if (this.elasticsearchClient != null) { + UUID runUuid = runUuidFromEvent(event.getRun()); + log.info("Indexing event {}", event); + + if (event.getInputs() != null) { + indexDatasets(event.getInputs(), runUuid, event); + } + if (event.getOutputs() != null) { + indexDatasets(event.getOutputs(), runUuid, event); + } + indexJob(runUuid, event); + } + } + + private Map buildJobIndexRequest(UUID runUuid, LineageEvent event) { + Map jsonMap = new HashMap<>(); + jsonMap.put("run_id", runUuid.toString()); + jsonMap.put("eventType", event.getEventType()); + jsonMap.put("name", event.getJob().getName()); + jsonMap.put("type", "JOB"); + jsonMap.put("namespace", event.getJob().getNamespace()); + jsonMap.put("facets", event.getJob().getFacets()); + return jsonMap; + } + + private Map buildDatasetIndexRequest( + UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { + Map jsonMap = new HashMap<>(); + jsonMap.put("run_id", runUuid.toString()); + jsonMap.put("eventType", event.getEventType()); + jsonMap.put("name", dataset.getName()); + jsonMap.put("type", "DATASET"); + jsonMap.put("namespace", dataset.getNamespace()); + jsonMap.put("facets", dataset.getFacets()); + return jsonMap; + } + + private void indexJob(UUID runUuid, LineageEvent event) { + index( + IndexRequest.of( + i -> + i.index("jobs") + .id( + String.format( + "JOB:%s:%s", event.getJob().getNamespace(), event.getJob().getName())) + .document(buildJobIndexRequest(runUuid, event)))); + } + + private void indexDatasets( + List datasets, UUID runUuid, LineageEvent event) { + datasets.stream() + .map(dataset -> buildDatasetIndexRequest(runUuid, dataset, event)) + .forEach( + jsonMap -> { + index( + IndexRequest.of( + i -> + i.index("datasets") + .id( + String.format( + "DATASET:%s:%s", + jsonMap.get("namespace"), jsonMap.get("name"))) + .document(jsonMap))); + }); + } + + private void index(IndexRequest> request) { + try { + if (this.elasticsearchClient != null) { + this.elasticsearchClient.index(request); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + private void onComplete(Void result, Throwable err, AsyncResponse asyncResponse) { if (err != null) { log.error("Unexpected error while processing request", err); diff --git a/api/src/test/java/marquez/api/OpenLineageResourceTest.java b/api/src/test/java/marquez/api/OpenLineageResourceTest.java index 9174e520cb..6d81b0f10b 100644 --- a/api/src/test/java/marquez/api/OpenLineageResourceTest.java +++ b/api/src/test/java/marquez/api/OpenLineageResourceTest.java @@ -13,6 +13,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import co.elastic.clients.elasticsearch.ElasticsearchClient; import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.ImmutableSortedSet; import io.dropwizard.testing.junit5.DropwizardExtensionsSupport; @@ -54,7 +55,9 @@ class OpenLineageResourceTest { UNDER_TEST = ResourceExtension.builder() - .addResource(new OpenLineageResource(serviceFactory, openLineageDao)) + .addResource( + new OpenLineageResource( + serviceFactory, new ElasticsearchClient(null), openLineageDao)) .build(); } diff --git a/build.gradle b/build.gradle index c2aef61fa1..3866a4e66d 100644 --- a/build.gradle +++ b/build.gradle @@ -52,7 +52,9 @@ subprojects { ext { assertjVersion = '3.25.3' + elasticsearchVersion = '8.13.4' dropwizardVersion = '2.1.12' + jacksonDatabindVersion = '2.12.3' jacocoVersion = '0.8.11' junit5Version = '5.10.2' lombokVersion = '1.18.32' @@ -64,6 +66,8 @@ subprojects { dependencies { implementation "org.projectlombok:lombok:${lombokVersion}" + implementation "co.elastic.clients:elasticsearch-java:${elasticsearchVersion}" + implementation "com.fasterxml.jackson.core:jackson-databind:${jacksonDatabindVersion}" annotationProcessor "org.projectlombok:lombok:${lombokVersion}" testImplementation "org.assertj:assertj-core:${assertjVersion}" diff --git a/docker-compose.yml b/docker-compose.yml index b8b4403d4a..207aa55b0e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,7 @@ services: links: - "db:postgres" depends_on: + - search - db entrypoint: - /opt/marquez/wait-for-it.sh @@ -40,8 +41,30 @@ services: # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT) # command: ["postgres", "-c", "log_statement=all"] + search: + image: elasticsearch:8.13.4 + container_name: marquez-search + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + environment: + - xpack.security.enabled=false + - discovery.type=single-node + volumes: + - elasticsearch-data:/usr/share/elasticsearch/data + cap_add: + - IPC_LOCK + ports: + - "9200:9200" + - "9300:9300" + volumes: data: + elasticsearch-data: db-conf: db-init: db-backup: From 70c4ce06822398837b34ffbe1752f72bf40b2995 Mon Sep 17 00:00:00 2001 From: phix Date: Fri, 7 Jun 2024 14:13:50 -0700 Subject: [PATCH 02/87] Adding basic responses for elasticsearch. --- api/src/main/java/marquez/MarquezContext.java | 2 +- .../java/marquez/api/OpenLineageResource.java | 17 +--- .../main/java/marquez/api/SearchResource.java | 95 ++++++++++++++++++- 3 files changed, 99 insertions(+), 15 deletions(-) diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index 91aff1cadb..3bfc2d0eac 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -169,7 +169,7 @@ private MarquezContext( this.tagResource = new TagResource(serviceFactory); this.openLineageResource = new OpenLineageResource(serviceFactory, elasticsearchClient, openLineageDao); - this.searchResource = new SearchResource(searchDao); + this.searchResource = new SearchResource(searchDao, elasticsearchClient); this.resources = ImmutableList.of( diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index f92db9f72b..fdf92e2594 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -47,8 +47,6 @@ import marquez.db.OpenLineageDao; import marquez.service.ServiceFactory; import marquez.service.models.BaseEvent; -import marquez.service.models.DatasetEvent; -import marquez.service.models.JobEvent; import marquez.service.models.LineageEvent; import marquez.service.models.NodeId; @@ -83,14 +81,6 @@ public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncRespon openLineageService .createAsync((LineageEvent) event) .whenComplete((result, err) -> onComplete(result, err, asyncResponse)); - } else if (event instanceof DatasetEvent) { - openLineageService - .createAsync((DatasetEvent) event) - .whenComplete((result, err) -> onComplete(result, err, asyncResponse)); - } else if (event instanceof JobEvent) { - openLineageService - .createAsync((JobEvent) event) - .whenComplete((result, err) -> onComplete(result, err, asyncResponse)); } else { log.warn("Unsupported event type {}. Skipping without error", event.getClass().getName()); @@ -129,7 +119,7 @@ private Map buildJobIndexRequest(UUID runUuid, LineageEvent even jsonMap.put("run_id", runUuid.toString()); jsonMap.put("eventType", event.getEventType()); jsonMap.put("name", event.getJob().getName()); - jsonMap.put("type", "JOB"); + jsonMap.put("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH"); jsonMap.put("namespace", event.getJob().getNamespace()); jsonMap.put("facets", event.getJob().getFacets()); return jsonMap; @@ -141,7 +131,8 @@ private Map buildDatasetIndexRequest( jsonMap.put("run_id", runUuid.toString()); jsonMap.put("eventType", event.getEventType()); jsonMap.put("name", dataset.getName()); - jsonMap.put("type", "DATASET"); + jsonMap.put("inputFacets", dataset.getInputFacets()); + jsonMap.put("outputFacets", dataset.getOutputFacets()); jsonMap.put("namespace", dataset.getNamespace()); jsonMap.put("facets", dataset.getFacets()); return jsonMap; @@ -182,7 +173,7 @@ private void index(IndexRequest> request) { this.elasticsearchClient.index(request); } } catch (IOException e) { - throw new RuntimeException(e); + log.info("Failed to index event Elasticsearch not available."); } } diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index dcc8d3206c..bd43db2a97 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -8,11 +8,19 @@ import static javax.ws.rs.core.MediaType.APPLICATION_JSON; import static marquez.common.Utils.toLocateDateOrNull; +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.elasticsearch._types.query_dsl.Operator; +import co.elastic.clients.elasticsearch._types.query_dsl.TextQueryType; +import co.elastic.clients.elasticsearch.core.SearchResponse; +import co.elastic.clients.elasticsearch.core.search.Hit; import com.codahale.metrics.annotation.ExceptionMetered; import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; import java.util.List; +import java.util.stream.Collectors; import javax.annotation.Nullable; import javax.validation.Valid; import javax.validation.constraints.Min; @@ -42,9 +50,12 @@ public class SearchResource { private static final int MIN_LIMIT = 0; private final SearchDao searchDao; + private final ElasticsearchClient elasticsearchClient; - public SearchResource(@NonNull final SearchDao searchDao) { + public SearchResource( + @NonNull final SearchDao searchDao, @Nullable final ElasticsearchClient elasticsearchClient) { this.searchDao = searchDao; + this.elasticsearchClient = elasticsearchClient; } @Timed @@ -72,6 +83,88 @@ public Response search( return Response.ok(new SearchResults(searchResults)).build(); } + /** + * { "query": { "multi_match": { "type": "phrase_prefix", "query": "${query}", "fields": [ + * "facets.sourceCode.sourceCode", "facets.sourceCode.language", "run_id", "name", "namespace", + * "type" ], "operator": "or" } } } + */ + @Timed + @ResponseMetered + @ExceptionMetered + @GET + @Produces(APPLICATION_JSON) + @Path("/jobs") + public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { + if (this.elasticsearchClient != null) { + SearchResponse response = + this.elasticsearchClient.search( + s -> + s.index("jobs") + .query( + q -> + q.multiMatch( + m -> + m.query(query) + .type(TextQueryType.PhrasePrefix) + .fields( + "facets.sourceCode.sourceCode", + "facets.sourceCode.language", + "run_id", + "name", + "namespace", + "type") + .operator(Operator.Or))), + ObjectNode.class); + return Response.ok( + response.hits().hits().stream().map(Hit::source).collect(Collectors.toList())) + .build(); + } else { + return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); + } + } + + /** + * { "query": { "multi_match": { "query": "id", "fields": [ "facets.schema.fields.name", + * "facets.schema.fields.type", "facets.columnLineage.fields.*.inputFields.name", + * "facets.columnLineage.fields.*.inputFields.namespace", + * "facets.columnLineage.fields.*.inputFields.field", + * "facets.columnLineage.fields.*.transformationDescription", + * "facets.columnLineage.fields.*.transformationType" ] } } } + */ + @Timed + @ResponseMetered + @ExceptionMetered + @GET + @Produces(APPLICATION_JSON) + @Path("/datasets") + public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { + if (this.elasticsearchClient != null) { + SearchResponse response = + this.elasticsearchClient.search( + s -> + s.index("datasets") + .query( + q -> + q.multiMatch( + m -> + m.query(query) + .fields( + "facets.schema.fields.name", + "facets.schema.fields.type", + "facets.columnLineage.fields.*.inputFields.name", + "facets.columnLineage.fields.*.inputFields.namespace", + "facets.columnLineage.fields.*.inputFields.field", + "facets.columnLineage.fields.*.transformationDescription", + "facets.columnLineage.fields.*.transformationType"))), + ObjectNode.class); + return Response.ok( + response.hits().hits().stream().map(Hit::source).collect(Collectors.toList())) + .build(); + } else { + return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); + } + } + /** Wrapper for {@link SearchResult}s which also contains a {@code total count}. */ @ToString public static final class SearchResults { From 51a5fa862c6b713f51b6d72a5ec724df87ce4c8a Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 11 Jun 2024 13:09:58 -0700 Subject: [PATCH 03/87] Saving highlights. --- .../main/java/marquez/api/SearchResource.java | 34 ++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index bd43db2a97..77fb141c5b 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -20,6 +20,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import javax.annotation.Nullable; import javax.validation.Valid; @@ -113,11 +114,23 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc "name", "namespace", "type") - .operator(Operator.Or))), + .operator(Operator.Or))) + .highlight( + hl -> + hl.fields("facets.sourceCode.sourceCode", f -> f.type("plain")) + .fields("facets.sourceCode.language", f -> f.type("plain")) + .fields("run_id", f -> f.type("plain")) + .fields("name", f -> f.type("plain")) + .fields("namespace", f -> f.type("plain")) + .fields("type", f -> f.type("plain"))), ObjectNode.class); - return Response.ok( - response.hits().hits().stream().map(Hit::source).collect(Collectors.toList())) - .build(); + + List hits = + response.hits().hits().stream().map(Hit::source).collect(Collectors.toList()); + List>> highlights = + response.hits().hits().stream().map(Hit::highlight).collect(Collectors.toList()); + + return Response.ok(new EsResult(hits, highlights)).build(); } else { return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); } @@ -165,6 +178,19 @@ public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws I } } + @ToString + public static final class EsResult { + @Getter private final List hits; + @Getter private final List>> highlights; + + @JsonCreator + public EsResult( + @NonNull List hits, @NonNull List>> highlights) { + this.hits = hits; + this.highlights = highlights; + } + } + /** Wrapper for {@link SearchResult}s which also contains a {@code total count}. */ @ToString public static final class SearchResults { From 52606f495c60146041fa0ba82a419ae0e324c2eb Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 11 Jun 2024 13:34:55 -0700 Subject: [PATCH 04/87] Saving code cleanup. --- .../main/java/marquez/api/SearchResource.java | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 77fb141c5b..ff1ba11921 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -19,6 +19,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -97,32 +98,35 @@ public Response search( @Path("/jobs") public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { if (this.elasticsearchClient != null) { + String[] fields = { + "facets.sourceCode.sourceCode", + "facets.sourceCode.language", + "run_id", + "name", + "namespace", + "type" + }; SearchResponse response = this.elasticsearchClient.search( - s -> - s.index("jobs") - .query( - q -> - q.multiMatch( - m -> - m.query(query) - .type(TextQueryType.PhrasePrefix) - .fields( - "facets.sourceCode.sourceCode", - "facets.sourceCode.language", - "run_id", - "name", - "namespace", - "type") - .operator(Operator.Or))) - .highlight( - hl -> - hl.fields("facets.sourceCode.sourceCode", f -> f.type("plain")) - .fields("facets.sourceCode.language", f -> f.type("plain")) - .fields("run_id", f -> f.type("plain")) - .fields("name", f -> f.type("plain")) - .fields("namespace", f -> f.type("plain")) - .fields("type", f -> f.type("plain"))), + s -> { + s.index("jobs") + .query( + q -> + q.multiMatch( + m -> + m.query(query) + .type(TextQueryType.PhrasePrefix) + .fields(Arrays.stream(fields).toList()) + .operator(Operator.Or))); + s.highlight( + hl -> { + for (String field : fields) { + hl.fields(field, f -> f.type("plain")); + } + return hl; + }); + return s; + }, ObjectNode.class); List hits = From c489de4f216461edf162549586a3cf5e95fc0cac Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 11 Jun 2024 16:32:45 -0700 Subject: [PATCH 05/87] Adding EsSearch. --- web/src/components/core/chip/MqChip.tsx | 2 +- web/src/components/core/chip/MqChipGroup.tsx | 2 +- web/src/components/search/Search.tsx | 324 ++---------------- web/src/components/search/SearchListItem.tsx | 13 +- .../search/base-search/BaseSearch.tsx | 248 ++++++++++++++ .../components/search/es-search/EsSearch.tsx | 13 + web/src/store/actionCreators/actionTypes.ts | 4 + web/src/store/actionCreators/index.ts | 13 + web/src/store/reducers/esSearch.ts | 36 ++ web/src/store/requests/search.ts | 5 + web/src/store/sagas/index.ts | 17 +- web/src/types/api.ts | 31 ++ 12 files changed, 408 insertions(+), 300 deletions(-) create mode 100644 web/src/components/search/base-search/BaseSearch.tsx create mode 100644 web/src/components/search/es-search/EsSearch.tsx create mode 100644 web/src/store/reducers/esSearch.ts diff --git a/web/src/components/core/chip/MqChip.tsx b/web/src/components/core/chip/MqChip.tsx index 0ab5c3bdba..2ee04b356a 100644 --- a/web/src/components/core/chip/MqChip.tsx +++ b/web/src/components/core/chip/MqChip.tsx @@ -44,7 +44,7 @@ const MqChip: React.FC = ({ padding: '2px 12px', cursor: 'pointer', userSelect: 'none', - boxShadow: selected ? `0 0 2px 3px ${theme.palette.common.white}` : 'initial', + boxShadow: selected ? `0 0 1px 1px ${theme.palette.secondary.main}` : 'initial', }} onClick={() => { if (selectable !== false && onSelect) { diff --git a/web/src/components/core/chip/MqChipGroup.tsx b/web/src/components/core/chip/MqChipGroup.tsx index 314778edf1..5472b9db8b 100644 --- a/web/src/components/core/chip/MqChipGroup.tsx +++ b/web/src/components/core/chip/MqChipGroup.tsx @@ -53,7 +53,7 @@ const MqChipGroup: React.FC = ({ chips, initialSelection, onSe return ( - isSearching: boolean - isSearchingInit: boolean -} - -interface DispatchProps { - setSelectedNode: typeof setSelectedNode - fetchSearch: typeof fetchSearch -} - -interface SearchState { - open: boolean - search: string - selected: string - filter: string - sort: string -} - -type SearchProps = StateProps & DispatchProps const useCmdKShortcut = (callback: () => void) => { useEffect(() => { @@ -102,70 +29,26 @@ const useCmdKShortcut = (callback: () => void) => { }, [callback]) } -const Search: React.FC = (props: SearchProps) => { - const [state, setState] = React.useState({ - open: true, - search: '', - selected: '', - filter: 'All', - sort: 'UPDATE_AT', - }) - - const fetchSearch = (q: string, filter = 'ALL', sort = 'NAME') => { - props.fetchSearch(q, filter, sort) - } +const Search: React.FC = () => { + const [search, setSearch] = useState('') + const [open, setOpen] = useState(true) const inputRef = useRef(null) // focus on cmd + k useCmdKShortcut(() => { - console.log('focus', inputRef) if (inputRef.current) { inputRef.current.focus() } }) - debounce(fetchSearch, 300) - const location = useLocation() useEffect(() => { // close search on a route change - setState({ ...state, open: false }) + setOpen(false) + setSearch('') }, [location]) - // listen for cmd + k to focus search - - const onSearch = (event: React.ChangeEvent) => { - setState({ ...state, search: event.target.value, open: true }) - if (event.target.value.length > 0) { - fetchSearch(event.target.value, state.filter.toUpperCase(), state.sort.toUpperCase()) - } - } - - const onSelectFilter = (label: string) => { - setState({ - ...state, - filter: label, - }) - - setTimeout(() => { - fetchSearch(state.search, label.toUpperCase(), state.sort.toUpperCase()) - }, 1) - } - - const onSelectSortFilter = (label: string) => { - setState({ - ...state, - sort: label, - }) - - setTimeout(() => { - fetchSearch(state.search, state.filter.toUpperCase(), label.toUpperCase()) - }, 1) - } - - const { isSearching, isSearchingInit } = props - return ( = (props: SearchProps) => { height: '100%', }} > - {state.search.length === 0 && } - {state.search.length > 0 && ( + {search.length === 0 && } + {search.length > 0 && ( = (props: SearchProps) => { height: '100%', cursor: 'pointer', }} - > - { - setState({ ...state, open: false, search: '', selected: '' }) - }} - /> - + > )} = (props: SearchProps) => { fullWidth={true} autoFocus startAdornment={} - endAdornment={} - onFocus={() => setState({ ...state, open: true })} - onChange={(event) => onSearch(event)} - value={state.search} + endAdornment={ + <> + { + setOpen(false) + setSearch('') + }} + > + + + + + } + onFocus={() => setOpen(true)} + onChange={(event) => { + setSearch(event.target.value) + setOpen(true) + }} + value={search} autoComplete={'off'} id={'searchBar'} /> setState({ ...state, open: false })} + onClickAway={() => setOpen(false)} > - - {state.open && state.search.length > 0 && ( - - - - - - - {props.searchResults.size === 0 && ( - - - {isSearching || !isSearchingInit - ? i18next.t('search.status') - : i18next.t('search.none')} - - - )} - {[...props.searchResults].map((resultsWithGroups, index) => { - return resultsWithGroups.map((result) => { - if (typeof result === 'string') { - // is group - if (result.length > 0) { - return ( - - - - {parseSearchGroup(result, 'group')} - - - - - {parseSearchGroup(result, 'namespace')} - - - - ) - } else return null - // is a list of group members - } else if (result.length) { - return ( - - {result.map((listItem) => { - return ( - - { - setState({ - ...state, - open: false, - search: nodeName, - }) - props.setSelectedNode(listItem.nodeId) - }} - /> - - ) - })} - - ) - } else { - return null - } - }) - })} - - - )} - + ) } -const mapStateToProps = (state: IState) => { - return { - searchResults: state.search.data.results, - rawResults: state.search.data.rawResults, - isSearching: state.search.isLoading, - isSearchingInit: state.search.init, - } -} - -const mapDispatchToProps = (dispatch: Redux.Dispatch) => - bindActionCreators( - { - setSelectedNode: setSelectedNode, - fetchSearch: fetchSearch, - }, - dispatch - ) - -export default connect(mapStateToProps, mapDispatchToProps)(Search) +export default Search diff --git a/web/src/components/search/SearchListItem.tsx b/web/src/components/search/SearchListItem.tsx index 1ccd1ec6eb..83521787da 100644 --- a/web/src/components/search/SearchListItem.tsx +++ b/web/src/components/search/SearchListItem.tsx @@ -18,7 +18,6 @@ interface OwnProps { searchResult: SearchResult search: string onClick: (nodeName: string) => void - selected: boolean } const searchResultIcon: { [key in JobOrDataset]: JSX.Element } = { @@ -28,12 +27,7 @@ const searchResultIcon: { [key in JobOrDataset]: JSX.Element } = { type DkSearchListItemProps = OwnProps -const SearchListItem: React.FC = ({ - searchResult, - search, - onClick, - selected, -}) => { +const SearchListItem: React.FC = ({ searchResult, search, onClick }) => { const name = searchResult.name.substring( searchResult.name.lastIndexOf('.') + 1, searchResult.name.length @@ -48,7 +42,6 @@ const SearchListItem: React.FC = ({ to={`/lineage/${encodeNode(searchResult.type, searchResult.namespace, searchResult.name)}`} > = ({ borderBottomLeftRadius: '2px', borderBottomRightRadius: '2px', }, - '&:hover, &.selected': { + '&:hover': { backgroundColor: darken(theme.palette.background.paper, 0.02), }, '&:nth-pf-type(even)': { backgroundColor: darken(theme.palette.background.paper, 0.2), - '&:hover, &.selected': { + '&:hover': { backgroundColor: darken(theme.palette.background.paper, 0.02), }, }, diff --git a/web/src/components/search/base-search/BaseSearch.tsx b/web/src/components/search/base-search/BaseSearch.tsx new file mode 100644 index 0000000000..c685fbef04 --- /dev/null +++ b/web/src/components/search/base-search/BaseSearch.tsx @@ -0,0 +1,248 @@ +// Copyright 2018-2024 contributors to the Marquez project +// SPDX-License-Identifier: Apache-2.0 + +import * as Redux from 'redux' +import { GroupedSearch } from '../../../types/api' +import { IState } from '../../../store/reducers' +import { bindActionCreators } from 'redux' +import { connect } from 'react-redux' +import { faCog, faDatabase, faSort } from '@fortawesome/free-solid-svg-icons' +import { fetchSearch, setSelectedNode } from '../../../store/actionCreators' +import { parseSearchGroup } from '../../../helpers/nodes' +import { theme } from '../../../helpers/theme' +import Box from '@mui/system/Box' +import MqChipGroup from '../../core/chip/MqChipGroup' +import MqText from '../../core/text/MqText' +import React, { useEffect, useState } from 'react' +import SearchListItem from '../SearchListItem' + +interface BaseSearchProps { + open: boolean + search: string +} + +interface StateProps { + searchResults: Map + isSearching: boolean + isSearchingInit: boolean +} + +interface DispatchProps { + setSelectedNode: typeof setSelectedNode + fetchSearch: typeof fetchSearch +} + +const INITIAL_SEARCH_FILTER = [ + { + text: 'All', + value: 'All', + }, + { + icon: faCog, + foregroundColor: theme.palette.common.white, + backgroundColor: theme.palette.primary.main, + text: 'JOBS', + value: 'JOB', + }, + { + icon: faDatabase, + foregroundColor: theme.palette.common.white, + backgroundColor: theme.palette.info.main, + text: 'DATASETS', + value: 'DATASET', + }, +] + +const INITIAL_SEARCH_SORT_FILTER = [ + { + icon: faSort, + value: 'Sort', + foregroundColor: theme.palette.common.white, + backgroundColor: 'transparent', + selectable: false, + }, + { + text: 'Updated at', + value: 'UPDATE_AT', + }, + { + text: 'Name', + value: 'NAME', + }, +] + +const BaseSearch: React.FC = ({ + open, + search, + isSearchingInit, + searchResults, + isSearching, + fetchSearch, + setSelectedNode, +}) => { + const [filter, setFilter] = useState('All') + const [sort, setSort] = useState('UPDATE_AT') + + const i18next = require('i18next') + + const onSelectFilter = (label: string) => { + setFilter(label) + fetchSearch(search, label.toUpperCase(), sort.toUpperCase()) + } + + const onSelectSortFilter = (label: string) => { + setSort(label) + fetchSearch(search, filter.toUpperCase(), label.toUpperCase()) + } + + const searchApi = (q: string, filter = 'ALL', sort = 'NAME') => { + fetchSearch(q, filter, sort) + } + + useEffect(() => { + if (search.length > 0) { + searchApi(search, filter, sort) + } + }, [search, filter, sort]) + + return ( + + {open && search.length > 0 && ( + + + + + + + {searchResults.size === 0 && ( + + + {isSearching || !isSearchingInit + ? i18next.t('search.status') + : i18next.t('search.none')} + + + )} + {[...searchResults].map((resultsWithGroups, index) => { + return resultsWithGroups.map((result) => { + if (typeof result === 'string') { + // is group + if (result.length > 0) { + return ( + + + + {parseSearchGroup(result, 'group')} + + + + + {parseSearchGroup(result, 'namespace')} + + + + ) + } else return null + // is a list of group members + } else if (result.length) { + return ( + + {result.map((listItem) => { + return ( + + { + setSelectedNode(listItem.nodeId) + }} + /> + + ) + })} + + ) + } else { + return null + } + }) + })} + + + )} + + ) +} + +const mapStateToProps = (state: IState) => { + return { + searchResults: state.search.data.results, + rawResults: state.search.data.rawResults, + isSearching: state.search.isLoading, + isSearchingInit: state.search.init, + } +} + +const mapDispatchToProps = (dispatch: Redux.Dispatch) => + bindActionCreators( + { + setSelectedNode: setSelectedNode, + fetchSearch: fetchSearch, + }, + dispatch + ) + +export default connect(mapStateToProps, mapDispatchToProps)(BaseSearch) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx new file mode 100644 index 0000000000..acdcdf38bf --- /dev/null +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -0,0 +1,13 @@ +// Copyright 2018-2024 contributors to the Marquez project +// SPDX-License-Identifier: Apache-2.0 + +import Box from '@mui/system/Box' +import React from 'react' + +interface EsSearchProps {} + +const EsSearch: React.FC = () => { + return +} + +export default EsSearch diff --git a/web/src/store/actionCreators/actionTypes.ts b/web/src/store/actionCreators/actionTypes.ts index 524b455513..1d98c59d6f 100644 --- a/web/src/store/actionCreators/actionTypes.ts +++ b/web/src/store/actionCreators/actionTypes.ts @@ -62,6 +62,10 @@ export const SET_SHOW_FULL_GRAPH = 'SET_SHOW_FULL_GRAPH' export const FETCH_SEARCH = 'FETCH_SEARCH' export const FETCH_SEARCH_SUCCESS = 'FETCH_SEARCH _SUCCESS' +// search +export const FETCH_ES_SEARCH = 'FETCH_ES_SEARCH' +export const FETCH_ES_SEARCH_SUCCESS = 'FETCH_ES_SEARCH_SUCCESS' + // facets export const FETCH_RUN_FACETS = 'FETCH_RUN_FACETS' export const FETCH_JOB_FACETS = 'FETCH_JOB_FACETS' diff --git a/web/src/store/actionCreators/index.ts b/web/src/store/actionCreators/index.ts index 144dcab568..11115c966d 100644 --- a/web/src/store/actionCreators/index.ts +++ b/web/src/store/actionCreators/index.ts @@ -7,6 +7,7 @@ import { ColumnLineageGraph, Dataset, DatasetVersion, + EsSearchResult, Events, Facets, Job, @@ -426,3 +427,15 @@ export const setColumnLineageGraphDepth = (depth: number) => ({ type: actionTypes.SET_COLUMN_LINEAGE_GRAPH_DEPTH, payload: depth, }) + +export const fetchEsSearch = (q: string) => ({ + type: actionTypes.FETCH_SEARCH, + payload: { + q, + }, +}) + +export const fetchEsSearchSuccess = (search: EsSearchResult) => ({ + type: actionTypes.FETCH_SEARCH_SUCCESS, + payload: search, +}) diff --git a/web/src/store/reducers/esSearch.ts b/web/src/store/reducers/esSearch.ts new file mode 100644 index 0000000000..ab9a5d2adb --- /dev/null +++ b/web/src/store/reducers/esSearch.ts @@ -0,0 +1,36 @@ +// Copyright 2018-2023 contributors to the Marquez project +// SPDX-License-Identifier: Apache-2.0 + +import { FETCH_ES_SEARCH, FETCH_ES_SEARCH_SUCCESS } from '../actionCreators/actionTypes' + +import { EsSearchResult } from '../../types/api' +import { fetchEsSearch, fetchEsSearchSuccess } from '../actionCreators' + +export type IEsSearchState = { isLoading: boolean; data: EsSearchResult; init: boolean } + +export const initialState: IEsSearchState = { + isLoading: false, + data: { hits: [], highlights: [] }, + init: false, +} + +type IJobsAction = ReturnType & ReturnType + +export default (state = initialState, action: IJobsAction): IEsSearchState => { + const { type, payload } = action + + switch (type) { + case FETCH_ES_SEARCH: + return { ...state, isLoading: true } + case FETCH_ES_SEARCH_SUCCESS: { + return { + ...state, + isLoading: false, + init: true, + data: payload, + } + } + default: + return state + } +} diff --git a/web/src/store/requests/search.ts b/web/src/store/requests/search.ts index 72b3922521..28cc779fe0 100644 --- a/web/src/store/requests/search.ts +++ b/web/src/store/requests/search.ts @@ -11,3 +11,8 @@ export const getSearch = async (q: string, filter = 'ALL', sort = 'NAME', limit } return genericFetchWrapper(url, { method: 'GET' }, 'fetchSearch') } + +export const getEsSearch = async (q: string) => { + const url = `${API_URL}/search/jobs?q=${q}` + return genericFetchWrapper(url, { method: 'GET' }, 'fetchEsSearch') +} diff --git a/web/src/store/sagas/index.ts b/web/src/store/sagas/index.ts index b233da757b..fafb138006 100644 --- a/web/src/store/sagas/index.ts +++ b/web/src/store/sagas/index.ts @@ -14,6 +14,7 @@ import { FETCH_DATASET, FETCH_DATASETS, FETCH_DATASET_VERSIONS, + FETCH_ES_SEARCH, FETCH_EVENTS, FETCH_JOBS, FETCH_JOB_FACETS, @@ -27,6 +28,7 @@ import { Dataset, DatasetVersion, Datasets, + EsSearchResult, Events, Facets, Jobs, @@ -73,6 +75,7 @@ import { fetchDatasetSuccess, fetchDatasetVersionsSuccess, fetchDatasetsSuccess, + fetchEsSearchSuccess, fetchEventsSuccess, fetchFacetsSuccess, fetchJobsSuccess, @@ -83,8 +86,8 @@ import { fetchTagsSuccess, } from '../actionCreators' import { getColumnLineage } from '../requests/columnlineage' +import { getEsSearch, getSearch } from '../requests/search' import { getLineage } from '../requests/lineage' -import { getSearch } from '../requests/search' export function* fetchTags() { try { @@ -376,6 +379,18 @@ export function* fetchRunFacetsSaga() { } } +export function* fetchEsSearchSaga() { + while (true) { + try { + const { payload } = yield take(FETCH_ES_SEARCH) + const esSearchResult: EsSearchResult = yield call(getEsSearch, payload.runId) + yield put(fetchEsSearchSuccess(esSearchResult)) + } catch (e) { + yield put(applicationError('Something went wrong while fetching run facets')) + } + } +} + export default function* rootSaga(): Generator { const sagasThatAreKickedOffImmediately = [fetchNamespaces(), fetchTags()] const sagasThatWatchForAction = [ diff --git a/web/src/types/api.ts b/web/src/types/api.ts index ca7d56fdf2..5da490c7f5 100644 --- a/web/src/types/api.ts +++ b/web/src/types/api.ts @@ -279,3 +279,34 @@ export interface ColumnLineageOutEdge { origin: string destination: string } + +// esSearch + +interface SourceCodeFacet { + language: string + _producer: string + _schemaURL: string + sourceCode: string +} + +interface EsSearchFacet { + sourceCode?: SourceCodeFacet +} + +interface Hit { + run_id: string + name: string + namespace: string + eventType: string + type: string + facets: EsSearchFacet +} + +interface Highlight { + 'facets.sourceCode.sourceCode'?: string[] +} + +export interface EsSearchResult { + hits: Hit[] + highlights: Highlight[] +} From f05bfb1f86e001e75a42d1ffcc0244dd450433df Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 11 Jun 2024 19:42:55 -0700 Subject: [PATCH 06/87] Saving partial progress. --- web/src/components/search/Search.tsx | 33 ++- .../search/base-search/BaseSearch.tsx | 211 ++++++++---------- .../components/search/es-search/EsSearch.tsx | 104 ++++++++- web/src/store/actionCreators/index.ts | 4 +- web/src/store/reducers/index.ts | 3 + web/src/store/sagas/index.ts | 6 +- 6 files changed, 234 insertions(+), 127 deletions(-) diff --git a/web/src/components/search/Search.tsx b/web/src/components/search/Search.tsx index 555cc871ef..5883adc298 100644 --- a/web/src/components/search/Search.tsx +++ b/web/src/components/search/Search.tsx @@ -8,6 +8,7 @@ import { MqInputBase } from '../core/input-base/MqInputBase' import { useLocation } from 'react-router' import BaseSearch from './base-search/BaseSearch' import ClickAwayListener from '@mui/material/ClickAwayListener' +import EsSearch from './es-search/EsSearch' import IconButton from '@mui/material/IconButton' import React, { useEffect, useRef, useState } from 'react' import SearchPlaceholder from './SearchPlaceholder' @@ -29,6 +30,8 @@ const useCmdKShortcut = (callback: () => void) => { }, [callback]) } +const elasticSearchEnabled = true + const Search: React.FC = () => { const [search, setSearch] = useState('') const [open, setOpen] = useState(true) @@ -115,7 +118,35 @@ const Search: React.FC = () => { touchEvent='onTouchStart' onClickAway={() => setOpen(false)} > - + + {open && search.length > 0 && ( + + + {elasticSearchEnabled ? ( + + ) : ( + + )} + + + )} + diff --git a/web/src/components/search/base-search/BaseSearch.tsx b/web/src/components/search/base-search/BaseSearch.tsx index c685fbef04..0ecffe2835 100644 --- a/web/src/components/search/base-search/BaseSearch.tsx +++ b/web/src/components/search/base-search/BaseSearch.tsx @@ -17,7 +17,6 @@ import React, { useEffect, useState } from 'react' import SearchListItem from '../SearchListItem' interface BaseSearchProps { - open: boolean search: string } @@ -72,7 +71,6 @@ const INITIAL_SEARCH_SORT_FILTER = [ ] const BaseSearch: React.FC = ({ - open, search, isSearchingInit, searchResults, @@ -106,124 +104,103 @@ const BaseSearch: React.FC = ({ }, [search, filter, sort]) return ( - - {open && search.length > 0 && ( - - - - + <> + + + + + + {searchResults.size === 0 && ( + + + {isSearching || !isSearchingInit + ? i18next.t('search.status') + : i18next.t('search.none')} + - - {searchResults.size === 0 && ( - - - {isSearching || !isSearchingInit - ? i18next.t('search.status') - : i18next.t('search.none')} - - - )} - {[...searchResults].map((resultsWithGroups, index) => { - return resultsWithGroups.map((result) => { - if (typeof result === 'string') { - // is group - if (result.length > 0) { + )} + {[...searchResults].map((resultsWithGroups, index) => { + return resultsWithGroups.map((result) => { + if (typeof result === 'string') { + // is group + if (result.length > 0) { + return ( + + + + {parseSearchGroup(result, 'group')} + + + + + {parseSearchGroup(result, 'namespace')} + + + + ) + } else return null + // is a list of group members + } else if (result.length) { + return ( + + {result.map((listItem) => { return ( - - - - {parseSearchGroup(result, 'group')} - - - - - {parseSearchGroup(result, 'namespace')} - - - + + { + setSelectedNode(listItem.nodeId) + }} + /> + ) - } else return null - // is a list of group members - } else if (result.length) { - return ( - - {result.map((listItem) => { - return ( - - { - setSelectedNode(listItem.nodeId) - }} - /> - - ) - })} - - ) - } else { - return null - } - }) - })} - - - )} - + })} + + ) + } else { + return null + } + }) + })} + + ) } diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index acdcdf38bf..0b910cf6e0 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -1,13 +1,107 @@ // Copyright 2018-2024 contributors to the Marquez project // SPDX-License-Identifier: Apache-2.0 +import * as Redux from 'redux' +import { Chip } from '@mui/material' +import { IEsSearchState } from '../../../store/reducers/esSearch' +import { IState } from '../../../store/reducers' +import { bindActionCreators } from 'redux' +import { connect } from 'react-redux' +import { fetchEsSearch } from '../../../store/actionCreators' import Box from '@mui/system/Box' -import React from 'react' +import MqText from '../../core/text/MqText' +import React, { useEffect } from 'react' -interface EsSearchProps {} +interface StateProps { + esSearch: IEsSearchState +} + +interface DispatchProps { + fetchEsSearch: typeof fetchEsSearch +} + +interface Props { + search: string +} + +type TextSegment = { + text: string + isBold: boolean +} -const EsSearch: React.FC = () => { - return +function parseStringToSegments(input: string): TextSegment[] { + return input.split(/(.*?<\/em>)/).map((segment) => { + if (segment.startsWith('') && segment.endsWith('')) { + return { + text: segment.slice(4, -5), + isBold: true, + } + } else { + return { + text: segment, + isBold: false, + } + } + }) } -export default EsSearch +const EsSearch: React.FC = ({ + search, + fetchEsSearch, + esSearch, +}) => { + useEffect(() => { + fetchEsSearch(search) + }, [search, fetchEsSearch]) + console.log(esSearch) + + return ( + + {esSearch.data.hits.map((hit, index) => { + return ( + + {hit.name} + + {Object.entries(esSearch.data.highlights[index]).map(([key, value]) => { + return value.map((highlightedString: any, idx: number) => { + return ( + + + {parseStringToSegments(highlightedString || '').map((segment) => ( + + {segment.text} + + ))} + + ) + }) + })} + + + ) + })} + + ) +} + +const mapStateToProps = (state: IState) => { + return { + esSearch: state.esSearch, + } +} + +const mapDispatchToProps = (dispatch: Redux.Dispatch) => + bindActionCreators( + { + fetchEsSearch: fetchEsSearch, + }, + dispatch + ) + +export default connect(mapStateToProps, mapDispatchToProps)(EsSearch) diff --git a/web/src/store/actionCreators/index.ts b/web/src/store/actionCreators/index.ts index 11115c966d..c3f85852d8 100644 --- a/web/src/store/actionCreators/index.ts +++ b/web/src/store/actionCreators/index.ts @@ -429,13 +429,13 @@ export const setColumnLineageGraphDepth = (depth: number) => ({ }) export const fetchEsSearch = (q: string) => ({ - type: actionTypes.FETCH_SEARCH, + type: actionTypes.FETCH_ES_SEARCH, payload: { q, }, }) export const fetchEsSearchSuccess = (search: EsSearchResult) => ({ - type: actionTypes.FETCH_SEARCH_SUCCESS, + type: actionTypes.FETCH_ES_SEARCH_SUCCESS, payload: search, }) diff --git a/web/src/store/reducers/index.ts b/web/src/store/reducers/index.ts index a12b86cf30..88dfb39be4 100644 --- a/web/src/store/reducers/index.ts +++ b/web/src/store/reducers/index.ts @@ -10,6 +10,7 @@ import dataset, { IDatasetState } from './dataset' import datasetVersions, { IDatasetVersionsState } from './datasetVersions' import datasets, { IDatasetsState } from './datasets' import display, { IDisplayState } from './display' +import esSearch, { IEsSearchState } from './esSearch' import events, { IEventsState } from './events' import facets, { IFacetsState } from './facets' import jobs, { IJobsState } from './jobs' @@ -24,6 +25,7 @@ export interface IState { datasets: IDatasetsState dataset: IDatasetState datasetVersions: IDatasetVersionsState + esSearch: IEsSearchState events: IEventsState jobs: IJobsState runs: IRunsState @@ -50,6 +52,7 @@ export default (history: History): Reducer => display, lineage, search, + esSearch, facets, tags, }) diff --git a/web/src/store/sagas/index.ts b/web/src/store/sagas/index.ts index fafb138006..ad45369404 100644 --- a/web/src/store/sagas/index.ts +++ b/web/src/store/sagas/index.ts @@ -383,10 +383,11 @@ export function* fetchEsSearchSaga() { while (true) { try { const { payload } = yield take(FETCH_ES_SEARCH) - const esSearchResult: EsSearchResult = yield call(getEsSearch, payload.runId) + const esSearchResult: EsSearchResult = yield call(getEsSearch, payload.q) yield put(fetchEsSearchSuccess(esSearchResult)) } catch (e) { - yield put(applicationError('Something went wrong while fetching run facets')) + console.log(e) + yield put(applicationError('Something went wrong while searching')) } } } @@ -406,6 +407,7 @@ export default function* rootSaga(): Generator { fetchColumnLineage(), fetchSearch(), deleteJobSaga(), + fetchEsSearchSaga(), deleteDatasetSaga(), deleteDatasetTagSaga(), addDatasetTagSaga(), From b5667f2d4d81f74b5f6ed7ebec84c1cfa0bec867 Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 11 Jun 2024 20:30:39 -0700 Subject: [PATCH 07/87] Refinements. --- web/src/components/search/Search.tsx | 22 ++-- .../components/search/es-search/EsSearch.tsx | 100 +++++++++++++----- 2 files changed, 88 insertions(+), 34 deletions(-) diff --git a/web/src/components/search/Search.tsx b/web/src/components/search/Search.tsx index 5883adc298..12a6939afd 100644 --- a/web/src/components/search/Search.tsx +++ b/web/src/components/search/Search.tsx @@ -91,16 +91,18 @@ const Search: React.FC = () => { startAdornment={} endAdornment={ <> - { - setOpen(false) - setSearch('') - }} - > - - + {open && ( + { + setOpen(false) + setSearch('') + }} + > + + + )} } diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 0b910cf6e0..8ed5e28cf4 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -2,12 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 import * as Redux from 'redux' -import { Chip } from '@mui/material' +import { Chip, Divider } from '@mui/material' +import { FontAwesomeIcon } from '@fortawesome/react-fontawesome' import { IEsSearchState } from '../../../store/reducers/esSearch' import { IState } from '../../../store/reducers' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' +import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' import { fetchEsSearch } from '../../../store/actionCreators' +import { theme } from '../../../helpers/theme' import Box from '@mui/system/Box' import MqText from '../../core/text/MqText' import React, { useEffect } from 'react' @@ -45,6 +48,10 @@ function parseStringToSegments(input: string): TextSegment[] { }) } +function getValueAfterLastPeriod(s: string) { + return s.split('.').pop() +} + const EsSearch: React.FC = ({ search, fetchEsSearch, @@ -53,35 +60,80 @@ const EsSearch: React.FC = ({ useEffect(() => { fetchEsSearch(search) }, [search, fetchEsSearch]) - console.log(esSearch) return ( {esSearch.data.hits.map((hit, index) => { return ( - - {hit.name} - - {Object.entries(esSearch.data.highlights[index]).map(([key, value]) => { - return value.map((highlightedString: any, idx: number) => { - return ( - - - {parseStringToSegments(highlightedString || '').map((segment) => ( - + + + + {hit.name} + + {Object.entries(esSearch.data.highlights[index]).map(([key, value]) => { + return value.map((highlightedString: any, idx: number) => { + return ( + - {segment.text} - - ))} - - ) - }) - })} + + {parseStringToSegments(highlightedString || '').map((segment) => ( + + {segment.text} + + ))} + + ) + }) + })} + + + {hit.facets.sourceCode?.language && ( + <> + + + {'Language'} + + + + )} + + + {'Namespace'} + {hit.namespace} + ) From e78bafbb933292e84d6581a63db455bcec61f19e Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 11 Jun 2024 23:53:20 -0700 Subject: [PATCH 08/87] Small bug fixes. --- .../components/core/input-base/MqInputBase.tsx | 3 +-- web/src/components/search/Search.tsx | 16 ++++++++++++---- web/src/components/search/es-search/EsSearch.tsx | 8 ++++---- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/web/src/components/core/input-base/MqInputBase.tsx b/web/src/components/core/input-base/MqInputBase.tsx index b3b1466e5e..fd946f1713 100644 --- a/web/src/components/core/input-base/MqInputBase.tsx +++ b/web/src/components/core/input-base/MqInputBase.tsx @@ -1,4 +1,4 @@ -// Copyright 2018-2023 contributors to the Marquez project +// Copyright 2018-2024 contributors to the Marquez project // SPDX-License-Identifier: Apache-2.0 import { createTheme } from '@mui/material/styles' @@ -10,7 +10,6 @@ export interface MqInputBaseProps extends InputBaseProps {} export const MqInputBase: React.FC = (props) => { const theme = createTheme(useTheme()) - return ( { sx={{ mr: 1 }} size={'small'} onClick={() => { - setOpen(false) setSearch('') + setOpen(false) }} > )} - + } onFocus={() => setOpen(true)} @@ -118,7 +123,10 @@ const Search: React.FC = () => { setOpen(false)} + onClickAway={() => { + setOpen(false) + setSearch('') + }} > {open && search.length > 0 && ( diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 8ed5e28cf4..cb272b4518 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -88,7 +88,7 @@ const EsSearch: React.FC = ({ return value.map((highlightedString: any, idx: number) => { return ( = ({ size={'small'} sx={{ mr: 1 }} /> - {parseStringToSegments(highlightedString || '').map((segment) => ( + {parseStringToSegments(highlightedString || '').map((segment, index) => ( @@ -130,7 +130,7 @@ const EsSearch: React.FC = ({ )} - + {'Namespace'} {hit.namespace} From 9f9abfe97f77d795b0eb9cf44ff14bbdb4516e64 Mon Sep 17 00:00:00 2001 From: phix Date: Wed, 12 Jun 2024 00:05:28 -0700 Subject: [PATCH 09/87] Fixing alignment --- web/src/components/search/es-search/EsSearch.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index cb272b4518..fc56e1e100 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -79,8 +79,10 @@ const EsSearch: React.FC = ({ }, }} > - - + + + + {hit.name} From 545f7cbfb278ebb4a320bce557c802b96ee33bd1 Mon Sep 17 00:00:00 2001 From: phix Date: Wed, 12 Jun 2024 08:17:56 -0700 Subject: [PATCH 10/87] Migrating es jobs naming to be specific. --- .../components/search/es-search/EsSearch.tsx | 32 +++++++++++-------- web/src/store/actionCreators/actionTypes.ts | 4 +-- web/src/store/actionCreators/index.ts | 10 +++--- web/src/store/reducers/esSearch.ts | 19 +++++------ web/src/store/reducers/index.ts | 6 ++-- web/src/store/requests/search.ts | 9 ++++-- web/src/store/sagas/index.ts | 18 +++++------ web/src/types/api.ts | 2 +- 8 files changed, 55 insertions(+), 45 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index fc56e1e100..f866cbedd9 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -4,23 +4,23 @@ import * as Redux from 'redux' import { Chip, Divider } from '@mui/material' import { FontAwesomeIcon } from '@fortawesome/react-fontawesome' -import { IEsSearchState } from '../../../store/reducers/esSearch' +import { IEsSearchJobsState } from '../../../store/reducers/esSearch' import { IState } from '../../../store/reducers' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' -import { fetchEsSearch } from '../../../store/actionCreators' +import { fetchEsSearchJobs } from '../../../store/actionCreators' import { theme } from '../../../helpers/theme' import Box from '@mui/system/Box' import MqText from '../../core/text/MqText' import React, { useEffect } from 'react' interface StateProps { - esSearch: IEsSearchState + esSearchJobs: IEsSearchJobsState } interface DispatchProps { - fetchEsSearch: typeof fetchEsSearch + fetchEsSearchJobs: typeof fetchEsSearchJobs } interface Props { @@ -54,16 +54,16 @@ function getValueAfterLastPeriod(s: string) { const EsSearch: React.FC = ({ search, - fetchEsSearch, - esSearch, + fetchEsSearchJobs, + esSearchJobs, }) => { useEffect(() => { - fetchEsSearch(search) - }, [search, fetchEsSearch]) + fetchEsSearchJobs(search) + }, [search, fetchEsSearchJobs]) return ( - {esSearch.data.hits.map((hit, index) => { + {esSearchJobs.data.hits.map((hit, index) => { return ( = ({ {hit.name} - {Object.entries(esSearch.data.highlights[index]).map(([key, value]) => { + {Object.entries(esSearchJobs.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { return ( = ({ <> - {'Language'} + + {'Language'} + = ({ )} - {'Namespace'} + + {'Namespace'} + {hit.namespace} @@ -146,14 +150,14 @@ const EsSearch: React.FC = ({ const mapStateToProps = (state: IState) => { return { - esSearch: state.esSearch, + esSearchJobs: state.esSearchJobs, } } const mapDispatchToProps = (dispatch: Redux.Dispatch) => bindActionCreators( { - fetchEsSearch: fetchEsSearch, + fetchEsSearchJobs: fetchEsSearchJobs, }, dispatch ) diff --git a/web/src/store/actionCreators/actionTypes.ts b/web/src/store/actionCreators/actionTypes.ts index 1d98c59d6f..fd885c6c98 100644 --- a/web/src/store/actionCreators/actionTypes.ts +++ b/web/src/store/actionCreators/actionTypes.ts @@ -63,8 +63,8 @@ export const FETCH_SEARCH = 'FETCH_SEARCH' export const FETCH_SEARCH_SUCCESS = 'FETCH_SEARCH _SUCCESS' // search -export const FETCH_ES_SEARCH = 'FETCH_ES_SEARCH' -export const FETCH_ES_SEARCH_SUCCESS = 'FETCH_ES_SEARCH_SUCCESS' +export const FETCH_ES_SEARCH_JOBS = 'FETCH_ES_SEARCH_JOBS' +export const FETCH_ES_SEARCH_JOBS_SUCCESS = 'FETCH_ES_SEARCH_JOBS_SUCCESS' // facets export const FETCH_RUN_FACETS = 'FETCH_RUN_FACETS' diff --git a/web/src/store/actionCreators/index.ts b/web/src/store/actionCreators/index.ts index c3f85852d8..c9a07040fb 100644 --- a/web/src/store/actionCreators/index.ts +++ b/web/src/store/actionCreators/index.ts @@ -7,7 +7,7 @@ import { ColumnLineageGraph, Dataset, DatasetVersion, - EsSearchResult, + EsSearchResultJobs, Events, Facets, Job, @@ -428,14 +428,14 @@ export const setColumnLineageGraphDepth = (depth: number) => ({ payload: depth, }) -export const fetchEsSearch = (q: string) => ({ - type: actionTypes.FETCH_ES_SEARCH, +export const fetchEsSearchJobs = (q: string) => ({ + type: actionTypes.FETCH_ES_SEARCH_JOBS, payload: { q, }, }) -export const fetchEsSearchSuccess = (search: EsSearchResult) => ({ - type: actionTypes.FETCH_ES_SEARCH_SUCCESS, +export const fetchEsSearchJobsSuccess = (search: EsSearchResultJobs) => ({ + type: actionTypes.FETCH_ES_SEARCH_JOBS_SUCCESS, payload: search, }) diff --git a/web/src/store/reducers/esSearch.ts b/web/src/store/reducers/esSearch.ts index ab9a5d2adb..d6626ae8d5 100644 --- a/web/src/store/reducers/esSearch.ts +++ b/web/src/store/reducers/esSearch.ts @@ -1,28 +1,29 @@ // Copyright 2018-2023 contributors to the Marquez project // SPDX-License-Identifier: Apache-2.0 -import { FETCH_ES_SEARCH, FETCH_ES_SEARCH_SUCCESS } from '../actionCreators/actionTypes' +import { FETCH_ES_SEARCH_JOBS, FETCH_ES_SEARCH_JOBS_SUCCESS } from '../actionCreators/actionTypes' -import { EsSearchResult } from '../../types/api' -import { fetchEsSearch, fetchEsSearchSuccess } from '../actionCreators' +import { EsSearchResultJobs } from '../../types/api' +import { fetchEsSearchJobs, fetchEsSearchJobsSuccess } from '../actionCreators' -export type IEsSearchState = { isLoading: boolean; data: EsSearchResult; init: boolean } +export type IEsSearchJobsState = { isLoading: boolean; data: EsSearchResultJobs; init: boolean } -export const initialState: IEsSearchState = { +export const initialState: IEsSearchJobsState = { isLoading: false, data: { hits: [], highlights: [] }, init: false, } -type IJobsAction = ReturnType & ReturnType +type IJobsAction = ReturnType & + ReturnType -export default (state = initialState, action: IJobsAction): IEsSearchState => { +export default (state = initialState, action: IJobsAction): IEsSearchJobsState => { const { type, payload } = action switch (type) { - case FETCH_ES_SEARCH: + case FETCH_ES_SEARCH_JOBS: return { ...state, isLoading: true } - case FETCH_ES_SEARCH_SUCCESS: { + case FETCH_ES_SEARCH_JOBS_SUCCESS: { return { ...state, isLoading: false, diff --git a/web/src/store/reducers/index.ts b/web/src/store/reducers/index.ts index 88dfb39be4..5776ba4a41 100644 --- a/web/src/store/reducers/index.ts +++ b/web/src/store/reducers/index.ts @@ -10,7 +10,7 @@ import dataset, { IDatasetState } from './dataset' import datasetVersions, { IDatasetVersionsState } from './datasetVersions' import datasets, { IDatasetsState } from './datasets' import display, { IDisplayState } from './display' -import esSearch, { IEsSearchState } from './esSearch' +import esSearchJobs, { IEsSearchJobsState } from './esSearch' import events, { IEventsState } from './events' import facets, { IFacetsState } from './facets' import jobs, { IJobsState } from './jobs' @@ -25,7 +25,7 @@ export interface IState { datasets: IDatasetsState dataset: IDatasetState datasetVersions: IDatasetVersionsState - esSearch: IEsSearchState + esSearchJobs: IEsSearchJobsState events: IEventsState jobs: IJobsState runs: IRunsState @@ -52,7 +52,7 @@ export default (history: History): Reducer => display, lineage, search, - esSearch, + esSearchJobs, facets, tags, }) diff --git a/web/src/store/requests/search.ts b/web/src/store/requests/search.ts index 28cc779fe0..99dd6de72f 100644 --- a/web/src/store/requests/search.ts +++ b/web/src/store/requests/search.ts @@ -12,7 +12,12 @@ export const getSearch = async (q: string, filter = 'ALL', sort = 'NAME', limit return genericFetchWrapper(url, { method: 'GET' }, 'fetchSearch') } -export const getEsSearch = async (q: string) => { +export const getEsSearchJobs = async (q: string) => { const url = `${API_URL}/search/jobs?q=${q}` - return genericFetchWrapper(url, { method: 'GET' }, 'fetchEsSearch') + return genericFetchWrapper(url, { method: 'GET' }, 'fetchEsSearchJobs') +} + +export const getEsSearchDatasets = async (q: string) => { + const url = `${API_URL}/search/datasets?q=${q}` + return genericFetchWrapper(url, { method: 'GET' }, 'fetchEsSearchDatasets') } diff --git a/web/src/store/sagas/index.ts b/web/src/store/sagas/index.ts index ad45369404..65f8620311 100644 --- a/web/src/store/sagas/index.ts +++ b/web/src/store/sagas/index.ts @@ -14,7 +14,7 @@ import { FETCH_DATASET, FETCH_DATASETS, FETCH_DATASET_VERSIONS, - FETCH_ES_SEARCH, + FETCH_ES_SEARCH_JOBS, FETCH_EVENTS, FETCH_JOBS, FETCH_JOB_FACETS, @@ -28,7 +28,7 @@ import { Dataset, DatasetVersion, Datasets, - EsSearchResult, + EsSearchResultJobs, Events, Facets, Jobs, @@ -75,7 +75,7 @@ import { fetchDatasetSuccess, fetchDatasetVersionsSuccess, fetchDatasetsSuccess, - fetchEsSearchSuccess, + fetchEsSearchJobsSuccess, fetchEventsSuccess, fetchFacetsSuccess, fetchJobsSuccess, @@ -86,7 +86,7 @@ import { fetchTagsSuccess, } from '../actionCreators' import { getColumnLineage } from '../requests/columnlineage' -import { getEsSearch, getSearch } from '../requests/search' +import { getEsSearchJobs, getSearch } from '../requests/search' import { getLineage } from '../requests/lineage' export function* fetchTags() { @@ -379,12 +379,12 @@ export function* fetchRunFacetsSaga() { } } -export function* fetchEsSearchSaga() { +export function* fetchEsSearchJobsSaga() { while (true) { try { - const { payload } = yield take(FETCH_ES_SEARCH) - const esSearchResult: EsSearchResult = yield call(getEsSearch, payload.q) - yield put(fetchEsSearchSuccess(esSearchResult)) + const { payload } = yield take(FETCH_ES_SEARCH_JOBS) + const EsSearchResultJobs: EsSearchResultJobs = yield call(getEsSearchJobs, payload.q) + yield put(fetchEsSearchJobsSuccess(EsSearchResultJobs)) } catch (e) { console.log(e) yield put(applicationError('Something went wrong while searching')) @@ -407,7 +407,7 @@ export default function* rootSaga(): Generator { fetchColumnLineage(), fetchSearch(), deleteJobSaga(), - fetchEsSearchSaga(), + fetchEsSearchJobsSaga(), deleteDatasetSaga(), deleteDatasetTagSaga(), addDatasetTagSaga(), diff --git a/web/src/types/api.ts b/web/src/types/api.ts index 5da490c7f5..1967b8f16a 100644 --- a/web/src/types/api.ts +++ b/web/src/types/api.ts @@ -306,7 +306,7 @@ interface Highlight { 'facets.sourceCode.sourceCode'?: string[] } -export interface EsSearchResult { +export interface EsSearchResultJobs { hits: Hit[] highlights: Highlight[] } From ec4c0c69e2b475f56d45536becc369f4765b044e Mon Sep 17 00:00:00 2001 From: phix Date: Wed, 12 Jun 2024 09:19:46 -0700 Subject: [PATCH 11/87] Adding boilerplate for dataset es search --- .../main/java/marquez/api/SearchResource.java | 57 +++++++++-------- web/src/store/actionCreators/actionTypes.ts | 4 +- web/src/store/actionCreators/index.ts | 13 ++++ web/src/store/reducers/esSearchDatasets.ts | 40 ++++++++++++ web/src/store/reducers/index.ts | 3 + web/src/store/sagas/index.ts | 22 ++++++- web/src/types/api.ts | 64 +++++++++++++++++-- 7 files changed, 170 insertions(+), 33 deletions(-) create mode 100644 web/src/store/reducers/esSearchDatasets.ts diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index ff1ba11921..1512548c3a 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -129,12 +129,7 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc }, ObjectNode.class); - List hits = - response.hits().hits().stream().map(Hit::source).collect(Collectors.toList()); - List>> highlights = - response.hits().hits().stream().map(Hit::highlight).collect(Collectors.toList()); - - return Response.ok(new EsResult(hits, highlights)).build(); + return formatEsResponse(response); } else { return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); } @@ -156,32 +151,42 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc @Path("/datasets") public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { if (this.elasticsearchClient != null) { - SearchResponse response = - this.elasticsearchClient.search( - s -> - s.index("datasets") - .query( - q -> - q.multiMatch( - m -> - m.query(query) - .fields( - "facets.schema.fields.name", - "facets.schema.fields.type", - "facets.columnLineage.fields.*.inputFields.name", - "facets.columnLineage.fields.*.inputFields.namespace", - "facets.columnLineage.fields.*.inputFields.field", - "facets.columnLineage.fields.*.transformationDescription", - "facets.columnLineage.fields.*.transformationType"))), + String[] fields = { + "facets.schema.fields.name", + "facets.schema.fields.type", + "facets.columnLineage.fields.*.inputFields.name", + "facets.columnLineage.fields.*.inputFields.namespace", + "facets.columnLineage.fields.*.inputFields.field", + "facets.columnLineage.fields.*.transformationDescription", + "facets.columnLineage.fields.*.transformationType" + }; + SearchResponse response = this.elasticsearchClient.search( + s -> s.index("datasets") + .query(q -> q.multiMatch(m -> m.query(query) + .fields(Arrays.stream(fields).toList()))) + .highlight(hl -> { + for (String field : fields) { + hl.fields(field, f -> f.type("plain")); + } + return hl; + }), ObjectNode.class); - return Response.ok( - response.hits().hits().stream().map(Hit::source).collect(Collectors.toList())) - .build(); + + return formatEsResponse(response); } else { return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); } } + private Response formatEsResponse(SearchResponse response) { + List hits = + response.hits().hits().stream().map(Hit::source).collect(Collectors.toList()); + List>> highlights = + response.hits().hits().stream().map(Hit::highlight).collect(Collectors.toList()); + + return Response.ok(new EsResult(hits, highlights)).build(); + } + @ToString public static final class EsResult { @Getter private final List hits; diff --git a/web/src/store/actionCreators/actionTypes.ts b/web/src/store/actionCreators/actionTypes.ts index fd885c6c98..76bc11422d 100644 --- a/web/src/store/actionCreators/actionTypes.ts +++ b/web/src/store/actionCreators/actionTypes.ts @@ -62,9 +62,11 @@ export const SET_SHOW_FULL_GRAPH = 'SET_SHOW_FULL_GRAPH' export const FETCH_SEARCH = 'FETCH_SEARCH' export const FETCH_SEARCH_SUCCESS = 'FETCH_SEARCH _SUCCESS' -// search +// esSearch export const FETCH_ES_SEARCH_JOBS = 'FETCH_ES_SEARCH_JOBS' export const FETCH_ES_SEARCH_JOBS_SUCCESS = 'FETCH_ES_SEARCH_JOBS_SUCCESS' +export const FETCH_ES_SEARCH_DATASETS = 'FETCH_ES_SEARCH_DATASETS' +export const FETCH_ES_SEARCH_DATASETS_SUCCESS = 'FETCH_ES_SEARCH_DATASETS_SUCCESS' // facets export const FETCH_RUN_FACETS = 'FETCH_RUN_FACETS' diff --git a/web/src/store/actionCreators/index.ts b/web/src/store/actionCreators/index.ts index c9a07040fb..7d9697dd23 100644 --- a/web/src/store/actionCreators/index.ts +++ b/web/src/store/actionCreators/index.ts @@ -7,6 +7,7 @@ import { ColumnLineageGraph, Dataset, DatasetVersion, + EsSearchResultDatasets, EsSearchResultJobs, Events, Facets, @@ -439,3 +440,15 @@ export const fetchEsSearchJobsSuccess = (search: EsSearchResultJobs) => ({ type: actionTypes.FETCH_ES_SEARCH_JOBS_SUCCESS, payload: search, }) + +export const fetchEsSearchDatasets = (q: string) => ({ + type: actionTypes.FETCH_ES_SEARCH_DATASETS, + payload: { + q, + }, +}) + +export const fetchEsSearchDatasetsSuccess = (search: EsSearchResultDatasets) => ({ + type: actionTypes.FETCH_ES_SEARCH_DATASETS_SUCCESS, + payload: search, +}) diff --git a/web/src/store/reducers/esSearchDatasets.ts b/web/src/store/reducers/esSearchDatasets.ts new file mode 100644 index 0000000000..ff83607f6f --- /dev/null +++ b/web/src/store/reducers/esSearchDatasets.ts @@ -0,0 +1,40 @@ +import { EsSearchResultDatasets } from '../../types/api' +import { + FETCH_ES_SEARCH_DATASETS, + FETCH_ES_SEARCH_DATASETS_SUCCESS, +} from '../actionCreators/actionTypes' +import { fetchEsSearchDatasets, fetchEsSearchDatasetsSuccess } from '../actionCreators' + +export type IEsSearchDatasetsState = { + isLoading: boolean + data: EsSearchResultDatasets + init: boolean +} + +export const initialState: IEsSearchDatasetsState = { + isLoading: false, + data: { hits: [], highlights: [] }, + init: false, +} + +type IDatasetsAction = ReturnType & + ReturnType + +export default (state = initialState, action: IDatasetsAction): IEsSearchDatasetsState => { + const { type, payload } = action + + switch (type) { + case FETCH_ES_SEARCH_DATASETS: + return { ...state, isLoading: true } + case FETCH_ES_SEARCH_DATASETS_SUCCESS: { + return { + ...state, + isLoading: false, + init: true, + data: payload, + } + } + default: + return state + } +} diff --git a/web/src/store/reducers/index.ts b/web/src/store/reducers/index.ts index 5776ba4a41..aee8660353 100644 --- a/web/src/store/reducers/index.ts +++ b/web/src/store/reducers/index.ts @@ -10,6 +10,7 @@ import dataset, { IDatasetState } from './dataset' import datasetVersions, { IDatasetVersionsState } from './datasetVersions' import datasets, { IDatasetsState } from './datasets' import display, { IDisplayState } from './display' +import esSearchDatasets, { IEsSearchDatasetsState } from './esSearchDatasets' import esSearchJobs, { IEsSearchJobsState } from './esSearch' import events, { IEventsState } from './events' import facets, { IFacetsState } from './facets' @@ -26,6 +27,7 @@ export interface IState { dataset: IDatasetState datasetVersions: IDatasetVersionsState esSearchJobs: IEsSearchJobsState + esSearchDatasets: IEsSearchDatasetsState events: IEventsState jobs: IJobsState runs: IRunsState @@ -53,6 +55,7 @@ export default (history: History): Reducer => lineage, search, esSearchJobs, + esSearchDatasets, facets, tags, }) diff --git a/web/src/store/sagas/index.ts b/web/src/store/sagas/index.ts index 65f8620311..eb02e8f5b2 100644 --- a/web/src/store/sagas/index.ts +++ b/web/src/store/sagas/index.ts @@ -14,6 +14,7 @@ import { FETCH_DATASET, FETCH_DATASETS, FETCH_DATASET_VERSIONS, + FETCH_ES_SEARCH_DATASETS, FETCH_ES_SEARCH_JOBS, FETCH_EVENTS, FETCH_JOBS, @@ -28,6 +29,7 @@ import { Dataset, DatasetVersion, Datasets, + EsSearchResultDatasets, EsSearchResultJobs, Events, Facets, @@ -75,6 +77,7 @@ import { fetchDatasetSuccess, fetchDatasetVersionsSuccess, fetchDatasetsSuccess, + fetchEsSearchDatasetsSuccess, fetchEsSearchJobsSuccess, fetchEventsSuccess, fetchFacetsSuccess, @@ -86,7 +89,7 @@ import { fetchTagsSuccess, } from '../actionCreators' import { getColumnLineage } from '../requests/columnlineage' -import { getEsSearchJobs, getSearch } from '../requests/search' +import { getEsSearchDatasets, getEsSearchJobs, getSearch } from '../requests/search' import { getLineage } from '../requests/lineage' export function* fetchTags() { @@ -386,7 +389,21 @@ export function* fetchEsSearchJobsSaga() { const EsSearchResultJobs: EsSearchResultJobs = yield call(getEsSearchJobs, payload.q) yield put(fetchEsSearchJobsSuccess(EsSearchResultJobs)) } catch (e) { - console.log(e) + yield put(applicationError('Something went wrong while searching')) + } + } +} + +export function* fetchEsSearchDatasetsSaga() { + while (true) { + try { + const { payload } = yield take(FETCH_ES_SEARCH_DATASETS) + const EsSearchResultDatasets: EsSearchResultDatasets = yield call( + getEsSearchDatasets, + payload.q + ) + yield put(fetchEsSearchDatasetsSuccess(EsSearchResultDatasets)) + } catch (e) { yield put(applicationError('Something went wrong while searching')) } } @@ -408,6 +425,7 @@ export default function* rootSaga(): Generator { fetchSearch(), deleteJobSaga(), fetchEsSearchJobsSaga(), + fetchEsSearchDatasetsSaga(), deleteDatasetSaga(), deleteDatasetTagSaga(), addDatasetTagSaga(), diff --git a/web/src/types/api.ts b/web/src/types/api.ts index 1967b8f16a..b3a330a801 100644 --- a/web/src/types/api.ts +++ b/web/src/types/api.ts @@ -282,6 +282,7 @@ export interface ColumnLineageOutEdge { // esSearch +// jobs interface SourceCodeFacet { language: string _producer: string @@ -293,7 +294,7 @@ interface EsSearchFacet { sourceCode?: SourceCodeFacet } -interface Hit { +interface JobHit { run_id: string name: string namespace: string @@ -302,11 +303,66 @@ interface Hit { facets: EsSearchFacet } -interface Highlight { +interface JobHighlight { 'facets.sourceCode.sourceCode'?: string[] } export interface EsSearchResultJobs { - hits: Hit[] - highlights: Highlight[] + hits: JobHit[] + highlights: JobHighlight[] +} + +// datasets +type DatasetHighlight = { + [key: string]: string[] +} + +type SearchInputField = { + namespace: string + name: string + field: string +} + +type ColumnLineageField = { + inputFields: SearchInputField[] + transformationDescription: string + transformationType: string +} + +type SchemaField = { + name: string + type: string + fields: any[] +} + +type SchemaFacet = { + _producer: string + _schemaURL: string + fields: SchemaField[] +} + +type ColumnLineageFacet = { + _producer: string + _schemaURL: string + fields: { + [key: string]: ColumnLineageField + } +} + +type EsSearchDatasetFacets = { + schema: SchemaFacet + columnLineage: ColumnLineageFacet +} + +type DatasetHit = { + run_id: string + name: string + namespace: string + eventType: string + facets: EsSearchDatasetFacets +} + +export type EsSearchResultDatasets = { + hits: DatasetHit[] + highlights: DatasetHighlight[] } From 897f1be79129525ac9c69d4bdac260971bdff9c4 Mon Sep 17 00:00:00 2001 From: phix Date: Wed, 12 Jun 2024 09:49:24 -0700 Subject: [PATCH 12/87] Adding datasets. --- .../components/search/es-search/EsSearch.tsx | 90 +++++++++++++++++-- 1 file changed, 83 insertions(+), 7 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index f866cbedd9..f53cca396b 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -4,12 +4,14 @@ import * as Redux from 'redux' import { Chip, Divider } from '@mui/material' import { FontAwesomeIcon } from '@fortawesome/react-fontawesome' +import { IEsSearchDatasetsState } from '../../../store/reducers/esSearchDatasets' import { IEsSearchJobsState } from '../../../store/reducers/esSearch' import { IState } from '../../../store/reducers' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' -import { fetchEsSearchJobs } from '../../../store/actionCreators' +import { faDatabase } from '@fortawesome/free-solid-svg-icons' +import { fetchEsSearchDatasets, fetchEsSearchJobs } from '../../../store/actionCreators' import { theme } from '../../../helpers/theme' import Box from '@mui/system/Box' import MqText from '../../core/text/MqText' @@ -17,10 +19,12 @@ import React, { useEffect } from 'react' interface StateProps { esSearchJobs: IEsSearchJobsState + esSearchDatasets: IEsSearchDatasetsState } interface DispatchProps { fetchEsSearchJobs: typeof fetchEsSearchJobs + fetchEsSearchDatasets: typeof fetchEsSearchDatasets } interface Props { @@ -55,10 +59,13 @@ function getValueAfterLastPeriod(s: string) { const EsSearch: React.FC = ({ search, fetchEsSearchJobs, + fetchEsSearchDatasets, esSearchJobs, + esSearchDatasets, }) => { useEffect(() => { fetchEsSearchJobs(search) + fetchEsSearchDatasets(search) }, [search, fetchEsSearchJobs]) return ( @@ -118,13 +125,16 @@ const EsSearch: React.FC = ({ })} + + + {'Namespace'} + {hit.namespace} + {hit.facets.sourceCode?.language && ( <> - - {'Language'} - + {'Language'} = ({ )} + + + + ) + })} + {esSearchDatasets.data.hits.map((hit, index) => { + return ( + + + + + + + {hit.name} + + {Object.entries(esSearchDatasets.data.highlights[index]).map(([key, value]) => { + return value.map((highlightedString: any, idx: number) => { + return ( + + + {parseStringToSegments(highlightedString || '').map((segment, index) => ( + + {segment.text} + + ))} + + ) + }) + })} + + - - {'Namespace'} - + {'Namespace'} {hit.namespace} + + + Total Fields + {hit.facets.schema.fields.length.toString()} fields + ) @@ -151,6 +225,7 @@ const EsSearch: React.FC = ({ const mapStateToProps = (state: IState) => { return { esSearchJobs: state.esSearchJobs, + esSearchDatasets: state.esSearchDatasets, } } @@ -158,6 +233,7 @@ const mapDispatchToProps = (dispatch: Redux.Dispatch) => bindActionCreators( { fetchEsSearchJobs: fetchEsSearchJobs, + fetchEsSearchDatasets: fetchEsSearchDatasets, }, dispatch ) From 38c8db6350292841f23c5edc3d057d49ffaaf1cd Mon Sep 17 00:00:00 2001 From: phix Date: Wed, 12 Jun 2024 12:30:01 -0700 Subject: [PATCH 13/87] Adding polish for more data. --- web/src/components/search/Search.tsx | 10 ++++++++-- .../components/search/es-search/EsSearch.tsx | 18 +++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/web/src/components/search/Search.tsx b/web/src/components/search/Search.tsx index 34687e77bf..ba680b0b4a 100644 --- a/web/src/components/search/Search.tsx +++ b/web/src/components/search/Search.tsx @@ -3,7 +3,7 @@ import { Box, Chip } from '@mui/material' import { Close, SearchOutlined } from '@mui/icons-material' -import { DRAWER_WIDTH, theme } from '../../helpers/theme' +import { DRAWER_WIDTH, HEADER_HEIGHT, theme } from '../../helpers/theme' import { MqInputBase } from '../core/input-base/MqInputBase' import { useLocation } from 'react-router' import BaseSearch from './base-search/BaseSearch' @@ -147,7 +147,13 @@ const Search: React.FC = () => { borderBottomLeftRadius: 0, }} > - + {elasticSearchEnabled ? ( ) : ( diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index f53cca396b..ccc83c361d 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -13,7 +13,9 @@ import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' import { faDatabase } from '@fortawesome/free-solid-svg-icons' import { fetchEsSearchDatasets, fetchEsSearchJobs } from '../../../store/actionCreators' import { theme } from '../../../helpers/theme' +import { truncateText } from '../../../helpers/text' import Box from '@mui/system/Box' +import MQTooltip from '../../core/tooltip/MQTooltip' import MqText from '../../core/text/MqText' import React, { useEffect } from 'react' @@ -91,7 +93,7 @@ const EsSearch: React.FC = ({ - {hit.name} + {truncateText(hit.name, 20)} {Object.entries(esSearchJobs.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { @@ -143,7 +145,6 @@ const EsSearch: React.FC = ({ )} - ) @@ -169,7 +170,11 @@ const EsSearch: React.FC = ({ - {hit.name} + + + {truncateText(hit.name, 20)} + + {Object.entries(esSearchDatasets.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { @@ -181,12 +186,7 @@ const EsSearch: React.FC = ({ mb={0.5} mr={0.5} > - + {parseStringToSegments(highlightedString || '').map((segment, index) => ( Date: Wed, 12 Jun 2024 16:19:20 -0700 Subject: [PATCH 14/87] Empty state and other small enhancements. --- web/src/components/search/Search.tsx | 42 ++++++++++++++++++- .../components/search/es-search/EsSearch.tsx | 15 ++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/web/src/components/search/Search.tsx b/web/src/components/search/Search.tsx index ba680b0b4a..58c2100446 100644 --- a/web/src/components/search/Search.tsx +++ b/web/src/components/search/Search.tsx @@ -4,9 +4,12 @@ import { Box, Chip } from '@mui/material' import { Close, SearchOutlined } from '@mui/icons-material' import { DRAWER_WIDTH, HEADER_HEIGHT, theme } from '../../helpers/theme' +import { IState } from '../../store/reducers' import { MqInputBase } from '../core/input-base/MqInputBase' +import { connect } from 'react-redux' import { useLocation } from 'react-router' import BaseSearch from './base-search/BaseSearch' +import CircularProgress from '@mui/material/CircularProgress/CircularProgress' import ClickAwayListener from '@mui/material/ClickAwayListener' import EsSearch from './es-search/EsSearch' import IconButton from '@mui/material/IconButton' @@ -30,9 +33,30 @@ const useCmdKShortcut = (callback: () => void) => { }, [callback]) } +const useEscapeShortcut = (callback: () => void) => { + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === 'Escape') { + event.preventDefault() // Prevent the default browser action + callback() + } + } + + window.addEventListener('keydown', handleKeyDown) + + return () => { + window.removeEventListener('keydown', handleKeyDown) + } + }, [callback]) +} + const elasticSearchEnabled = true -const Search: React.FC = () => { +interface StateProps { + isLoading: boolean +} + +const Search: React.FC = ({ isLoading }: StateProps) => { const [search, setSearch] = useState('') const [open, setOpen] = useState(true) @@ -45,6 +69,14 @@ const Search: React.FC = () => { } }) + useEffect(() => { + if (search === '') setOpen(false) + }, [search]) + + useEscapeShortcut(() => { + setOpen(false) + }) + const location = useLocation() useEffect(() => { // close search on a route change @@ -91,8 +123,10 @@ const Search: React.FC = () => { startAdornment={} endAdornment={ <> + {isLoading && } {open && ( { @@ -169,4 +203,8 @@ const Search: React.FC = () => { ) } -export default Search +const mapStateToProps = (state: IState) => ({ + isLoading: state.esSearchJobs.isLoading || state.esSearchDatasets.isLoading, +}) + +export default connect(mapStateToProps)(Search) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index ccc83c361d..216d1cce87 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -16,6 +16,7 @@ import { theme } from '../../../helpers/theme' import { truncateText } from '../../../helpers/text' import Box from '@mui/system/Box' import MQTooltip from '../../core/tooltip/MQTooltip' +import MqEmpty from '../../core/empty/MqEmpty' import MqText from '../../core/text/MqText' import React, { useEffect } from 'react' @@ -70,6 +71,14 @@ const EsSearch: React.FC = ({ fetchEsSearchDatasets(search) }, [search, fetchEsSearchJobs]) + if (esSearchJobs.data.hits.length === 0 && esSearchDatasets.data.hits.length === 0) { + return ( + + + + ) + } + return ( {esSearchJobs.data.hits.map((hit, index) => { @@ -93,7 +102,11 @@ const EsSearch: React.FC = ({ - {truncateText(hit.name, 20)} + + + {truncateText(hit.name, 20)} + + {Object.entries(esSearchJobs.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { From bb5ce2d5e0ff22b3fe1584c93cd30f1a7e16f75c Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 18 Jun 2024 12:26:52 -0700 Subject: [PATCH 15/87] Adding arrow key functionality. --- .../components/search/es-search/EsSearch.tsx | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 216d1cce87..6f3ea98711 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -7,6 +7,7 @@ import { FontAwesomeIcon } from '@fortawesome/react-fontawesome' import { IEsSearchDatasetsState } from '../../../store/reducers/esSearchDatasets' import { IEsSearchJobsState } from '../../../store/reducers/esSearch' import { IState } from '../../../store/reducers' +import { Nullable } from '../../../types/util/Nullable' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' @@ -59,6 +60,24 @@ function getValueAfterLastPeriod(s: string) { return s.split('.').pop() } +const useArrowKeys = (callback: (direction: 'up' | 'down') => void) => { + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === 'ArrowDown') { + event.preventDefault() // Prevent the default browser action + callback('down') + } else if (event.key === 'ArrowUp') { + event.preventDefault() // Prevent the default browser action + callback('up') + } + } + window.addEventListener('keydown', handleKeyDown) + return () => { + window.removeEventListener('keydown', handleKeyDown) + } + }, [callback]) +} + const EsSearch: React.FC = ({ search, fetchEsSearchJobs, @@ -66,11 +85,33 @@ const EsSearch: React.FC = ({ esSearchJobs, esSearchDatasets, }) => { + const [selectedIndex, setSelectedIndex] = React.useState>(null) + + useArrowKeys((direction) => { + console.log('what') + if (direction === 'up') { + setSelectedIndex(selectedIndex === null ? null : Math.max(selectedIndex - 1, 0)) + } else { + setSelectedIndex( + selectedIndex === null + ? 0 + : Math.min( + selectedIndex + 1, + esSearchJobs.data.hits.length + esSearchDatasets.data.hits.length - 1 + ) + ) + } + }) + useEffect(() => { fetchEsSearchJobs(search) fetchEsSearchDatasets(search) }, [search, fetchEsSearchJobs]) + useEffect(() => { + setSelectedIndex(null) + }, [esSearchJobs.data.hits, esSearchDatasets.data.hits]) + if (esSearchJobs.data.hits.length === 0 && esSearchDatasets.data.hits.length === 0) { return ( @@ -95,6 +136,7 @@ const EsSearch: React.FC = ({ '&:hover': { backgroundColor: theme.palette.action.hover, }, + backgroundColor: selectedIndex === index ? theme.palette.action.hover : undefined, }} > @@ -176,6 +218,10 @@ const EsSearch: React.FC = ({ '&:hover': { backgroundColor: theme.palette.action.hover, }, + backgroundColor: + selectedIndex === index + esSearchDatasets.data.hits.length + ? theme.palette.action.hover + : undefined, }} > From cffd9cca4b123e8051c2c163a487888a69538f05 Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 18 Jun 2024 12:31:05 -0700 Subject: [PATCH 16/87] Removing console log --- web/src/components/search/es-search/EsSearch.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 6f3ea98711..241077b740 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -88,7 +88,6 @@ const EsSearch: React.FC = ({ const [selectedIndex, setSelectedIndex] = React.useState>(null) useArrowKeys((direction) => { - console.log('what') if (direction === 'up') { setSelectedIndex(selectedIndex === null ? null : Math.max(selectedIndex - 1, 0)) } else { From 6f62898ec5e0454fbba116d6c74245d463fe46c0 Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 18 Jun 2024 13:23:15 -0700 Subject: [PATCH 17/87] Spotless --- .../main/java/marquez/api/SearchResource.java | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 1512548c3a..3ba375d614 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -152,24 +152,29 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { if (this.elasticsearchClient != null) { String[] fields = { - "facets.schema.fields.name", - "facets.schema.fields.type", - "facets.columnLineage.fields.*.inputFields.name", - "facets.columnLineage.fields.*.inputFields.namespace", - "facets.columnLineage.fields.*.inputFields.field", - "facets.columnLineage.fields.*.transformationDescription", - "facets.columnLineage.fields.*.transformationType" + "facets.schema.fields.name", + "facets.schema.fields.type", + "facets.columnLineage.fields.*.inputFields.name", + "facets.columnLineage.fields.*.inputFields.namespace", + "facets.columnLineage.fields.*.inputFields.field", + "facets.columnLineage.fields.*.transformationDescription", + "facets.columnLineage.fields.*.transformationType" }; - SearchResponse response = this.elasticsearchClient.search( - s -> s.index("datasets") - .query(q -> q.multiMatch(m -> m.query(query) - .fields(Arrays.stream(fields).toList()))) - .highlight(hl -> { - for (String field : fields) { - hl.fields(field, f -> f.type("plain")); - } - return hl; - }), + SearchResponse response = + this.elasticsearchClient.search( + s -> + s.index("datasets") + .query( + q -> + q.multiMatch( + m -> m.query(query).fields(Arrays.stream(fields).toList()))) + .highlight( + hl -> { + for (String field : fields) { + hl.fields(field, f -> f.type("plain")); + } + return hl; + }), ObjectNode.class); return formatEsResponse(response); @@ -180,9 +185,9 @@ public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws I private Response formatEsResponse(SearchResponse response) { List hits = - response.hits().hits().stream().map(Hit::source).collect(Collectors.toList()); + response.hits().hits().stream().map(Hit::source).collect(Collectors.toList()); List>> highlights = - response.hits().hits().stream().map(Hit::highlight).collect(Collectors.toList()); + response.hits().hits().stream().map(Hit::highlight).collect(Collectors.toList()); return Response.ok(new EsResult(hits, highlights)).build(); } From 4a5f1aada3b239f599d7185f1f92e2e146f3660c Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 25 Jun 2024 15:36:49 -0700 Subject: [PATCH 18/87] Refinements to queries. --- .../main/java/marquez/api/SearchResource.java | 1 + .../components/search/es-search/EsSearch.tsx | 72 +++++++++++-------- 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 3ba375d614..8a9b40b818 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -99,6 +99,7 @@ public Response search( public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { if (this.elasticsearchClient != null) { String[] fields = { + "facets.sql.query", "facets.sourceCode.sourceCode", "facets.sourceCode.language", "run_id", diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 241077b740..5505cb931f 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -37,7 +37,7 @@ interface Props { type TextSegment = { text: string - isBold: boolean + isHighlighted: boolean } function parseStringToSegments(input: string): TextSegment[] { @@ -45,12 +45,12 @@ function parseStringToSegments(input: string): TextSegment[] { if (segment.startsWith('') && segment.endsWith('')) { return { text: segment.slice(4, -5), - isBold: true, + isHighlighted: true, } } else { return { text: segment, - isBold: false, + isHighlighted: false, } } }) @@ -142,12 +142,21 @@ const EsSearch: React.FC = ({ - + - {truncateText(hit.name, 20)} + {truncateText(hit.name, 30)} + + + {truncateText(hit.namespace, 30)} + + + + + + Match {Object.entries(esSearchJobs.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { @@ -164,28 +173,27 @@ const EsSearch: React.FC = ({ size={'small'} sx={{ mr: 1 }} /> - {parseStringToSegments(highlightedString || '').map((segment, index) => ( - - {segment.text} - - ))} + + {parseStringToSegments(highlightedString || '').map( + (segment, index) => ( + + {segment.text} + + ) + )} + ) }) })} - - - {'Namespace'} - {hit.namespace} - {hit.facets.sourceCode?.language && ( <> @@ -227,13 +235,22 @@ const EsSearch: React.FC = ({ - + - {truncateText(hit.name, 20)} + {truncateText(hit.name, 30)} + + + + + {truncateText(hit.namespace, 30)} - + + + + Match + {Object.entries(esSearchDatasets.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { return ( @@ -251,7 +268,7 @@ const EsSearch: React.FC = ({ small key={`${key}-${highlightedString}-${segment.text}-${index}`} inline - highlight={segment.isBold} + highlight={segment.isHighlighted} > {segment.text} @@ -262,11 +279,6 @@ const EsSearch: React.FC = ({ })} - - - {'Namespace'} - {hit.namespace} - Total Fields From 40bc497e1bcbdbacce49463924c455f0de213e45 Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 25 Jun 2024 17:05:41 -0700 Subject: [PATCH 19/87] Adding debounce. --- .../components/search/es-search/EsSearch.tsx | 43 ++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 5505cb931f..e81988d0e9 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -10,6 +10,7 @@ import { IState } from '../../../store/reducers' import { Nullable } from '../../../types/util/Nullable' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' +import { debounce } from 'lodash' import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' import { faDatabase } from '@fortawesome/free-solid-svg-icons' import { fetchEsSearchDatasets, fetchEsSearchJobs } from '../../../store/actionCreators' @@ -19,7 +20,7 @@ import Box from '@mui/system/Box' import MQTooltip from '../../core/tooltip/MQTooltip' import MqEmpty from '../../core/empty/MqEmpty' import MqText from '../../core/text/MqText' -import React, { useEffect } from 'react' +import React, { useCallback, useEffect } from 'react' interface StateProps { esSearchJobs: IEsSearchJobsState @@ -78,6 +79,9 @@ const useArrowKeys = (callback: (direction: 'up' | 'down') => void) => { }, [callback]) } +const FIELDS_TO_PRINT = 5 +const DEBOUNCE_TIME_MS = 500 + const EsSearch: React.FC = ({ search, fetchEsSearchJobs, @@ -102,10 +106,20 @@ const EsSearch: React.FC = ({ } }) + const debouncedFetchJobs = useCallback( + debounce((searchTerm) => fetchEsSearchJobs(searchTerm), DEBOUNCE_TIME_MS), + [] + ) + + const debouncedFetchDatasets = useCallback( + debounce((searchTerm) => fetchEsSearchDatasets(searchTerm), DEBOUNCE_TIME_MS), + [] + ) + useEffect(() => { - fetchEsSearchJobs(search) - fetchEsSearchDatasets(search) - }, [search, fetchEsSearchJobs]) + debouncedFetchJobs(search) + debouncedFetchDatasets(search) + }, [search, debouncedFetchJobs, debouncedFetchDatasets]) useEffect(() => { setSelectedIndex(null) @@ -281,8 +295,25 @@ const EsSearch: React.FC = ({ - Total Fields - {hit.facets.schema.fields.length.toString()} fields + Fields + + {hit.facets.schema.fields.slice(0, FIELDS_TO_PRINT).map((field) => { + return ( + + ) + })} + {hit.facets.schema.fields.length > FIELDS_TO_PRINT && ( + {`+ ${ + hit.facets.schema.fields.length - FIELDS_TO_PRINT + }`} + )} + From 19c76c82cfa39e4c42b40fa1d573db18ab7c880a Mon Sep 17 00:00:00 2001 From: phix Date: Tue, 25 Jun 2024 18:48:08 -0700 Subject: [PATCH 20/87] Fixing alignment issues. --- .../components/search/es-search/EsSearch.tsx | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index e81988d0e9..d7f907d309 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -153,20 +153,22 @@ const EsSearch: React.FC = ({ }} > - - - - - - - {truncateText(hit.name, 30)} - - - - - {truncateText(hit.namespace, 30)} - - + + + + + + + + {truncateText(hit.name, 30)} + + + + + {truncateText(hit.namespace, 30)} + + + @@ -246,20 +248,22 @@ const EsSearch: React.FC = ({ }} > - - - - - - - {truncateText(hit.name, 30)} - - - - - {truncateText(hit.namespace, 30)} - - + + + + + + + + {truncateText(hit.name, 30)} + + + + + {truncateText(hit.namespace, 30)} + + + From c7f5860b2831368066b8e808d06eccd59add1a92 Mon Sep 17 00:00:00 2001 From: phix Date: Wed, 26 Jun 2024 11:55:03 -0700 Subject: [PATCH 21/87] Saving updates for password setting via env config for elasticsearch. --- api/src/main/java/marquez/MarquezApp.java | 23 +++++++++---- api/src/main/java/marquez/MarquezConfig.java | 5 +++ .../java/marquez/search/ElasticConfig.java | 32 +++++++++++++++++++ docker-compose.seed.yml | 3 +- docker-compose.yml | 9 +++++- marquez.dev.yml | 8 +++++ marquez.example.yml | 7 ++++ .../components/search/es-search/EsSearch.tsx | 4 +-- web/src/types/api.ts | 2 +- 9 files changed, 81 insertions(+), 12 deletions(-) create mode 100644 api/src/main/java/marquez/search/ElasticConfig.java diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index ab2340f500..ef36859107 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -40,13 +40,20 @@ import marquez.db.DbMigration; import marquez.jobs.DbRetentionJob; import marquez.logging.LoggingMdcFilter; +import marquez.search.ElasticConfig; import marquez.tracing.SentryConfig; import marquez.tracing.TracingContainerResponseFilter; import marquez.tracing.TracingSQLLogger; import marquez.tracing.TracingServletFilter; import org.apache.http.Header; import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.apache.http.message.BasicHeader; import org.elasticsearch.client.RestClient; +import org.elasticsearch.client.RestClientBuilder; import org.flywaydb.core.api.FlywayException; import org.jdbi.v3.core.Jdbi; import org.jdbi.v3.core.statement.SqlLogger; @@ -136,7 +143,7 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { } final Jdbi jdbi = newJdbi(config, env, source); - final ElasticsearchClient elasticsearchClient = newElasticsearchClient(); + final ElasticsearchClient elasticsearchClient = newElasticsearchClient(config.getElasticConfig()); final MarquezContext marquezContext = MarquezContext.builder() .jdbi(jdbi) @@ -159,15 +166,17 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { Exclusions.use(exclusions); } - private ElasticsearchClient newElasticsearchClient() { + private ElasticsearchClient newElasticsearchClient(ElasticConfig elasticConfig) { String host = "search"; int port = 9200; + + final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + credentialsProvider.setCredentials(AuthScope.ANY, + new UsernamePasswordCredentials(elasticConfig.getUsername(), elasticConfig.getPassword())); + RestClient restClient = - RestClient.builder(new HttpHost(host, port, "http")) - .setDefaultHeaders( - new Header[] { - // new BasicHeader("Authorization", "ApiKey " + apiKey) - }) + RestClient.builder(new HttpHost(elasticConfig.getHost(), elasticConfig.getPort(), elasticConfig.getScheme())) + .setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) .build(); ElasticsearchTransport transport = new RestClientTransport(restClient, new JacksonJsonpMapper()); diff --git a/api/src/main/java/marquez/MarquezConfig.java b/api/src/main/java/marquez/MarquezConfig.java index 7336b06c22..dc3848666c 100644 --- a/api/src/main/java/marquez/MarquezConfig.java +++ b/api/src/main/java/marquez/MarquezConfig.java @@ -16,6 +16,7 @@ import marquez.db.FlywayFactory; import marquez.graphql.GraphqlConfig; import marquez.jobs.DbRetentionConfig; +import marquez.search.ElasticConfig; import marquez.service.models.Tag; import marquez.tracing.SentryConfig; @@ -44,6 +45,10 @@ public class MarquezConfig extends Configuration { @JsonProperty("sentry") private final SentryConfig sentry = new SentryConfig(); + @Getter + @JsonProperty("elastic") + private final ElasticConfig elasticConfig = new ElasticConfig(); + @Getter @Setter @JsonProperty("dbRetention") diff --git a/api/src/main/java/marquez/search/ElasticConfig.java b/api/src/main/java/marquez/search/ElasticConfig.java new file mode 100644 index 0000000000..da4ed306c9 --- /dev/null +++ b/api/src/main/java/marquez/search/ElasticConfig.java @@ -0,0 +1,32 @@ +package marquez.search; + +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Getter; + +public class ElasticConfig { + public static final boolean ENABLED = false; + public static final String SCHEME = "http"; + public static final String HOST = "search"; + public static final int PORT = 9200; + public static final String USERNAME = "elastic"; + public static final String PASSWORD = "elastic"; + + @Getter @JsonProperty + private boolean enabled = ENABLED; + + @Getter @JsonProperty + private String scheme = SCHEME; + + @Getter @JsonProperty + private String host = HOST; + + @Getter @JsonProperty + private int port = PORT; + + @Getter @JsonProperty + private String username = USERNAME; + + @Getter @JsonProperty + private String password = PASSWORD; + +} diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index 3aa9ea373a..25ce4de2cb 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -13,4 +13,5 @@ services: - "db:postgres" depends_on: - api - entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./seed.sh"] + - search + entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "search:9200", "--", "./seed.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index 207aa55b0e..6693d98d79 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,8 @@ services: environment: - MARQUEZ_PORT=${API_PORT} - MARQUEZ_ADMIN_PORT=${API_ADMIN_PORT} + - ELASTIC_USER=elastic + - ELASTIC_PASSWORD=elastic ports: - "${API_PORT}:${API_PORT}" - "${API_ADMIN_PORT}:${API_ADMIN_PORT}" @@ -20,6 +22,9 @@ services: - /opt/marquez/wait-for-it.sh - db:${POSTGRES_PORT} - -- + - /opt/marquez/wait-for-it.sh + - search:9200 + - -- - ./entrypoint.sh db: @@ -52,8 +57,10 @@ services: soft: 65536 hard: 65536 environment: - - xpack.security.enabled=false + - xpack.security.enabled=true + - xpack.security.authc.api_key.enabled=true - discovery.type=single-node + - ELASTIC_PASSWORD=elastic volumes: - elasticsearch-data:/usr/share/elasticsearch/data cap_add: diff --git a/marquez.dev.yml b/marquez.dev.yml index 930f91e759..1613418bbb 100644 --- a/marquez.dev.yml +++ b/marquez.dev.yml @@ -23,6 +23,14 @@ logging: appenders: - type: console +elastic: + enabled: true + scheme: http + host: search + port: 9200 + username: elastic + password: elastic + tags: - name: PII description: Personally identifiable information diff --git a/marquez.example.yml b/marquez.example.yml index 9033438907..b0aa31cb6f 100644 --- a/marquez.example.yml +++ b/marquez.example.yml @@ -66,6 +66,13 @@ logging: # environment: ${SENTRY_ENVIRONMENT} # stacktraceAppPackages: ['marquez'] +elastic: + enabled: true + scheme: http + host: search + port: 9200 + password: ${ELASTIC_PASSWORD} + ### TRACING ### # Enables tracing with sentry (see: https://docs.sentry.io/product/sentry-basics/tracing/distributed-tracing) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index d7f907d309..2c0f3cee29 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -210,7 +210,7 @@ const EsSearch: React.FC = ({ })} - {hit.facets.sourceCode?.language && ( + {hit.facets?.sourceCode?.language && ( <> @@ -218,7 +218,7 @@ const EsSearch: React.FC = ({ diff --git a/web/src/types/api.ts b/web/src/types/api.ts index b3a330a801..6f9bc4f70c 100644 --- a/web/src/types/api.ts +++ b/web/src/types/api.ts @@ -300,7 +300,7 @@ interface JobHit { namespace: string eventType: string type: string - facets: EsSearchFacet + facets?: EsSearchFacet } interface JobHighlight { From fd37df92d959835e42c4e0733d9833abb4932fe7 Mon Sep 17 00:00:00 2001 From: phix Date: Wed, 26 Jun 2024 13:56:28 -0700 Subject: [PATCH 22/87] Setting up startup scripts and adding corresponding waits. --- .env.example | 1 + api/src/main/java/marquez/MarquezApp.java | 3 --- docker-compose.seed.yml | 2 +- docker-compose.yml | 2 +- docker/up.sh | 8 +++++++- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index 4612671cd0..de98e232a0 100644 --- a/.env.example +++ b/.env.example @@ -2,4 +2,5 @@ API_PORT=5000 API_ADMIN_PORT=5001 WEB_PORT=3000 POSTGRES_PORT=5432 +ELASTICSEARCH_PORT=9200 TAG=0.47.0 diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index ef36859107..67822cd2b9 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -167,9 +167,6 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { } private ElasticsearchClient newElasticsearchClient(ElasticConfig elasticConfig) { - String host = "search"; - int port = 9200; - final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(elasticConfig.getUsername(), elasticConfig.getPassword())); diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index 25ce4de2cb..153d3550f4 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -14,4 +14,4 @@ services: depends_on: - api - search - entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "search:9200", "--", "./seed.sh"] + entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "search:${ELASTICSEARCH_PORT}", "--", "./seed.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index 6693d98d79..9aa8b8600b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,7 +23,7 @@ services: - db:${POSTGRES_PORT} - -- - /opt/marquez/wait-for-it.sh - - search:9200 + - search:${ELASTICSEARCH_PORT} - -- - ./entrypoint.sh diff --git a/docker/up.sh b/docker/up.sh index 32fc4b415f..8b747170d6 100755 --- a/docker/up.sh +++ b/docker/up.sh @@ -40,6 +40,7 @@ usage() { echo " -a, --api-port int api port (default: 5000)" echo " -m, --api-admin-port int api admin port (default: 5001)" echo " -w, --web-port int web port (default: 3000)" + echo " -e --es-port int elasticsearch port (default: 9200)" echo " -t, --tag string docker image tag (default: ${VERSION})" echo " --args string docker arguments" echo @@ -65,6 +66,7 @@ API_PORT=5000 API_ADMIN_PORT=5001 WEB_PORT=3000 POSTGRES_PORT=5432 +ELASTICSEARCH_PORT=9200 NO_WEB="false" NO_VOLUMES="false" TAG="${VERSION}" @@ -89,6 +91,10 @@ while [ $# -gt 0 ]; do shift POSTGRES_PORT="${1}" ;; + -e|'--es-port') + shift + ELASTICSEARCH_PORT="${1}" + ;; -t|'--tag') shift TAG="${1}" @@ -147,5 +153,5 @@ if [[ "${NO_VOLUMES}" = "false" ]]; then fi # Run docker compose cmd with overrides -DOCKER_SCAN_SUGGEST="false" API_PORT=${API_PORT} API_ADMIN_PORT=${API_ADMIN_PORT} WEB_PORT=${WEB_PORT} POSTGRES_PORT=${POSTGRES_PORT} TAG=${TAG} \ +DOCKER_SCAN_SUGGEST="false" API_PORT=${API_PORT} API_ADMIN_PORT=${API_ADMIN_PORT} WEB_PORT=${WEB_PORT} POSTGRES_PORT=${POSTGRES_PORT} ELASTICSEARCH_PORT=${ELASTICSEARCH_PORT} TAG=${TAG} \ docker --log-level ERROR compose $compose_files up $compose_args From 0a4cdccb8b685e96f86ae3e36ef5db24be56e06b Mon Sep 17 00:00:00 2001 From: phix Date: Thu, 27 Jun 2024 14:49:42 -0700 Subject: [PATCH 23/87] Adding logs and more fields for jobs. --- api/src/main/java/marquez/MarquezApp.java | 20 +++--- .../java/marquez/api/OpenLineageResource.java | 17 +++++ .../main/java/marquez/api/SearchResource.java | 1 + .../java/marquez/search/ElasticConfig.java | 31 ++++----- .../components/search/SearchPlaceholder.tsx | 5 +- .../components/search/es-search/EsSearch.tsx | 32 +++++++--- .../search/es-search/airlfow-logo.svg | 11 ++++ .../search/es-search/spark-logo.svg | 7 +++ web/src/types/api.ts | 63 +++++++++++++++++++ 9 files changed, 149 insertions(+), 38 deletions(-) create mode 100644 web/src/components/search/es-search/airlfow-logo.svg create mode 100644 web/src/components/search/es-search/spark-logo.svg diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 67822cd2b9..90d8f648ba 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -45,15 +45,11 @@ import marquez.tracing.TracingContainerResponseFilter; import marquez.tracing.TracingSQLLogger; import marquez.tracing.TracingServletFilter; -import org.apache.http.Header; import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.impl.client.BasicCredentialsProvider; -import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; -import org.apache.http.message.BasicHeader; import org.elasticsearch.client.RestClient; -import org.elasticsearch.client.RestClientBuilder; import org.flywaydb.core.api.FlywayException; import org.jdbi.v3.core.Jdbi; import org.jdbi.v3.core.statement.SqlLogger; @@ -143,7 +139,8 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { } final Jdbi jdbi = newJdbi(config, env, source); - final ElasticsearchClient elasticsearchClient = newElasticsearchClient(config.getElasticConfig()); + final ElasticsearchClient elasticsearchClient = + newElasticsearchClient(config.getElasticConfig()); final MarquezContext marquezContext = MarquezContext.builder() .jdbi(jdbi) @@ -168,12 +165,17 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { private ElasticsearchClient newElasticsearchClient(ElasticConfig elasticConfig) { final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); - credentialsProvider.setCredentials(AuthScope.ANY, - new UsernamePasswordCredentials(elasticConfig.getUsername(), elasticConfig.getPassword())); + credentialsProvider.setCredentials( + AuthScope.ANY, + new UsernamePasswordCredentials(elasticConfig.getUsername(), elasticConfig.getPassword())); RestClient restClient = - RestClient.builder(new HttpHost(elasticConfig.getHost(), elasticConfig.getPort(), elasticConfig.getScheme())) - .setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) + RestClient.builder( + new HttpHost( + elasticConfig.getHost(), elasticConfig.getPort(), elasticConfig.getScheme())) + .setHttpClientConfigCallback( + httpClientBuilder -> + httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) .build(); ElasticsearchTransport transport = new RestClientTransport(restClient, new JacksonJsonpMapper()); diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index fdf92e2594..726d3c3894 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -14,8 +14,13 @@ import com.codahale.metrics.annotation.ExceptionMetered; import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import io.dropwizard.jersey.jsr310.ZonedDateTimeParam; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -116,15 +121,27 @@ private void indexEvent(@Valid @NotNull LineageEvent event) { private Map buildJobIndexRequest(UUID runUuid, LineageEvent event) { Map jsonMap = new HashMap<>(); + jsonMap.put("run_id", runUuid.toString()); jsonMap.put("eventType", event.getEventType()); jsonMap.put("name", event.getJob().getName()); jsonMap.put("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH"); jsonMap.put("namespace", event.getJob().getNamespace()); jsonMap.put("facets", event.getJob().getFacets()); + // note event.getRun().getFacets() has a serialization issue with jackson; + jsonMap.put("runFacets", event.getRun().getFacets().getAdditionalFacets()); return jsonMap; } + public ObjectMapper objectMapper() { + ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); + mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); + mapper.disable(DeserializationFeature.ADJUST_DATES_TO_CONTEXT_TIME_ZONE); + return mapper; + } + private Map buildDatasetIndexRequest( UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { Map jsonMap = new HashMap<>(); diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 8a9b40b818..402b63dcf5 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -102,6 +102,7 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc "facets.sql.query", "facets.sourceCode.sourceCode", "facets.sourceCode.language", + "runFacets.processing_engine.name", "run_id", "name", "namespace", diff --git a/api/src/main/java/marquez/search/ElasticConfig.java b/api/src/main/java/marquez/search/ElasticConfig.java index da4ed306c9..bf41c132fb 100644 --- a/api/src/main/java/marquez/search/ElasticConfig.java +++ b/api/src/main/java/marquez/search/ElasticConfig.java @@ -4,29 +4,22 @@ import lombok.Getter; public class ElasticConfig { - public static final boolean ENABLED = false; - public static final String SCHEME = "http"; - public static final String HOST = "search"; - public static final int PORT = 9200; - public static final String USERNAME = "elastic"; - public static final String PASSWORD = "elastic"; + public static final boolean ENABLED = false; + public static final String SCHEME = "http"; + public static final String HOST = "search"; + public static final int PORT = 9200; + public static final String USERNAME = "elastic"; + public static final String PASSWORD = "elastic"; - @Getter @JsonProperty - private boolean enabled = ENABLED; + @Getter @JsonProperty private boolean enabled = ENABLED; - @Getter @JsonProperty - private String scheme = SCHEME; + @Getter @JsonProperty private String scheme = SCHEME; - @Getter @JsonProperty - private String host = HOST; + @Getter @JsonProperty private String host = HOST; - @Getter @JsonProperty - private int port = PORT; + @Getter @JsonProperty private int port = PORT; - @Getter @JsonProperty - private String username = USERNAME; - - @Getter @JsonProperty - private String password = PASSWORD; + @Getter @JsonProperty private String username = USERNAME; + @Getter @JsonProperty private String password = PASSWORD; } diff --git a/web/src/components/search/SearchPlaceholder.tsx b/web/src/components/search/SearchPlaceholder.tsx index b48532d681..6e921f5c18 100644 --- a/web/src/components/search/SearchPlaceholder.tsx +++ b/web/src/components/search/SearchPlaceholder.tsx @@ -40,7 +40,10 @@ const SearchPlaceholder: React.FC = () => { {' '} - + diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 2c0f3cee29..413c1a2a20 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -21,6 +21,8 @@ import MQTooltip from '../../core/tooltip/MQTooltip' import MqEmpty from '../../core/empty/MqEmpty' import MqText from '../../core/text/MqText' import React, { useCallback, useEffect } from 'react' +import airflow_logo from './airlfow-logo.svg' +import spark_logo from './spark-logo.svg' interface StateProps { esSearchJobs: IEsSearchJobsState @@ -57,9 +59,9 @@ function parseStringToSegments(input: string): TextSegment[] { }) } -function getValueAfterLastPeriod(s: string) { - return s.split('.').pop() -} +// function getValueAfterLastPeriod(s: string) { +// return s.split('.').pop() +// } const useArrowKeys = (callback: (direction: 'up' | 'down') => void) => { useEffect(() => { @@ -170,6 +172,23 @@ const EsSearch: React.FC = ({ + + {hit.runFacets?.processing_engine && ( + <> + + + {'Integration'} + {hit.runFacets.processing_engine.name === 'spark' ? ( + Spark + ) : hit.runFacets.processing_engine.name === 'Airflow' ? ( + Airflow + ) : ( + + )} + + + )} + Match @@ -183,12 +202,7 @@ const EsSearch: React.FC = ({ alignItems={'center'} mb={0.5} > - + {parseStringToSegments(highlightedString || '').map( (segment, index) => ( diff --git a/web/src/components/search/es-search/airlfow-logo.svg b/web/src/components/search/es-search/airlfow-logo.svg new file mode 100644 index 0000000000..f7864f8d53 --- /dev/null +++ b/web/src/components/search/es-search/airlfow-logo.svg @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/web/src/components/search/es-search/spark-logo.svg b/web/src/components/search/es-search/spark-logo.svg new file mode 100644 index 0000000000..fc4f679021 --- /dev/null +++ b/web/src/components/search/es-search/spark-logo.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/web/src/types/api.ts b/web/src/types/api.ts index 6f9bc4f70c..0231de6289 100644 --- a/web/src/types/api.ts +++ b/web/src/types/api.ts @@ -301,6 +301,69 @@ interface JobHit { eventType: string type: string facets?: EsSearchFacet + runFacets: EsSearchRunFacet +} + +interface SparkLogicalPlan { + _producer: string + _schemaURL: string + plan: Plan[] +} + +interface Plan { + class: string + numChildren: number + ifPartitionNotExists?: boolean + partitionColumns?: any[] + query?: number + outputColumnNames?: string + output?: AttributeReference[][] + isStreaming?: boolean +} + +interface AttributeReference { + class: string + numChildren: number + name: string + dataType: string + nullable: boolean + metadata: Record + exprId: ExprId + qualifier: any[] +} + +interface ExprId { + productClass: string + id: number + jvmId: string +} + +interface SparkVersion { + _producer: string + _schemaURL: string + sparkVersion: string + openlineageSparkVersion: string +} + +interface ProcessingEngine { + _producer: string + _schemaURL: string + version: string + name: string + openlineageAdapterVersion: string +} + +interface EnvironmentProperties { + _producer: string + _schemaURL: string + environmentProperties: Record +} + +interface EsSearchRunFacet { + 'spark.logicalPlan'?: SparkLogicalPlan + spark_version?: SparkVersion + processing_engine?: ProcessingEngine + 'environment-properties'?: EnvironmentProperties } interface JobHighlight { From f9023fbc57c76636eb6bffbf5fe8a538df92f58e Mon Sep 17 00:00:00 2001 From: phix Date: Thu, 27 Jun 2024 16:39:02 -0700 Subject: [PATCH 24/87] Resolving jackson serialization issue. --- api/src/main/java/marquez/MarquezApp.java | 7 +++++-- .../java/marquez/api/OpenLineageResource.java | 17 +---------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 90d8f648ba..032e1cd3c5 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -11,6 +11,7 @@ import co.elastic.clients.transport.rest_client.RestClientTransport; import com.codahale.metrics.jdbi3.InstrumentedSqlLogger; import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import io.dropwizard.Application; import io.dropwizard.assets.AssetsBundle; import io.dropwizard.configuration.EnvironmentVariableSubstitutor; @@ -177,8 +178,10 @@ private ElasticsearchClient newElasticsearchClient(ElasticConfig elasticConfig) httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) .build(); - ElasticsearchTransport transport = - new RestClientTransport(restClient, new JacksonJsonpMapper()); + JacksonJsonpMapper jsonpMapper = new JacksonJsonpMapper(); + // register JavaTimeModule to handle ZonedDateTime + jsonpMapper.objectMapper().registerModule(new JavaTimeModule()); + ElasticsearchTransport transport = new RestClientTransport(restClient, jsonpMapper); return new ElasticsearchClient(transport); } diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index 726d3c3894..0e169cdc52 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -14,13 +14,8 @@ import com.codahale.metrics.annotation.ExceptionMetered; import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; -import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.SerializationFeature; -import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import io.dropwizard.jersey.jsr310.ZonedDateTimeParam; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -128,20 +123,10 @@ private Map buildJobIndexRequest(UUID runUuid, LineageEvent even jsonMap.put("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH"); jsonMap.put("namespace", event.getJob().getNamespace()); jsonMap.put("facets", event.getJob().getFacets()); - // note event.getRun().getFacets() has a serialization issue with jackson; - jsonMap.put("runFacets", event.getRun().getFacets().getAdditionalFacets()); + jsonMap.put("runFacets", event.getRun().getFacets()); return jsonMap; } - public ObjectMapper objectMapper() { - ObjectMapper mapper = new ObjectMapper(); - mapper.registerModule(new JavaTimeModule()); - mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); - mapper.disable(DeserializationFeature.ADJUST_DATES_TO_CONTEXT_TIME_ZONE); - return mapper; - } - private Map buildDatasetIndexRequest( UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { Map jsonMap = new HashMap<>(); From 96ab8f876a993104c7219285ed430c2f2e699d5c Mon Sep 17 00:00:00 2001 From: phix Date: Mon, 1 Jul 2024 12:23:30 -0700 Subject: [PATCH 25/87] Small updates for search display. --- .../components/search/es-search/EsSearch.tsx | 45 ++++++++++++++----- web/src/types/api.ts | 2 +- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 413c1a2a20..e4b2a83a79 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -11,6 +11,7 @@ import { Nullable } from '../../../types/util/Nullable' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' import { debounce } from 'lodash' +import { eventTypeColor } from '../../../helpers/nodes' import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' import { faDatabase } from '@fortawesome/free-solid-svg-icons' import { fetchEsSearchDatasets, fetchEsSearchJobs } from '../../../store/actionCreators' @@ -19,6 +20,7 @@ import { truncateText } from '../../../helpers/text' import Box from '@mui/system/Box' import MQTooltip from '../../core/tooltip/MQTooltip' import MqEmpty from '../../core/empty/MqEmpty' +import MqStatus from '../../core/status/MqStatus' import MqText from '../../core/text/MqText' import React, { useCallback, useEffect } from 'react' import airflow_logo from './airlfow-logo.svg' @@ -167,17 +169,27 @@ const EsSearch: React.FC = ({ - {truncateText(hit.namespace, 30)} + + {truncateText(hit.namespace, 30)} + - + + + + Last State + + + {hit.runFacets?.processing_engine && ( <> - {'Integration'} + + {'Integration'} + {hit.runFacets.processing_engine.name === 'spark' ? ( Spark ) : hit.runFacets.processing_engine.name === 'Airflow' ? ( @@ -191,7 +203,9 @@ const EsSearch: React.FC = ({ - Match + + Match + {Object.entries(esSearchJobs.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { @@ -202,7 +216,10 @@ const EsSearch: React.FC = ({ alignItems={'center'} mb={0.5} > - + {/**/} + + {`${key}: `} + {parseStringToSegments(highlightedString || '').map( (segment, index) => ( @@ -228,7 +245,9 @@ const EsSearch: React.FC = ({ <> - {'Language'} + + {'Language'} + = ({ - {truncateText(hit.namespace, 30)} + + {truncateText(hit.namespace, 30)} + - Match + + Match + {Object.entries(esSearchDatasets.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { @@ -313,8 +336,10 @@ const EsSearch: React.FC = ({ - Fields - + + Fields + + {hit.facets.schema.fields.slice(0, FIELDS_TO_PRINT).map((field) => { return ( Date: Wed, 3 Jul 2024 15:05:10 -0700 Subject: [PATCH 26/87] Adding onClick handlers. --- web/src/components/search/es-search/EsSearch.tsx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index e4b2a83a79..e95822bee7 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -25,6 +25,7 @@ import MqText from '../../core/text/MqText' import React, { useCallback, useEffect } from 'react' import airflow_logo from './airlfow-logo.svg' import spark_logo from './spark-logo.svg' +import {useNavigate} from "react-router-dom"; interface StateProps { esSearchJobs: IEsSearchJobsState @@ -94,6 +95,7 @@ const EsSearch: React.FC = ({ esSearchDatasets, }) => { const [selectedIndex, setSelectedIndex] = React.useState>(null) + const navigate = useNavigate() useArrowKeys((direction) => { if (direction === 'up') { @@ -143,6 +145,7 @@ const EsSearch: React.FC = ({ return ( navigate(`/lineage/job/${hit.namespace}/${hit.name}`)} px={2} py={1} borderBottom={1} @@ -264,6 +267,7 @@ const EsSearch: React.FC = ({ return ( navigate(`/lineage/dataset/${hit.namespace}/${hit.name}`)} px={2} py={1} borderBottom={1} From 5133c421c7a85fef4a9efe8e97a96dfcdf157281 Mon Sep 17 00:00:00 2001 From: phixMe Date: Sat, 6 Jul 2024 11:10:18 -0700 Subject: [PATCH 27/87] Fixing null cases, adding more search options for datasets. --- .../main/java/marquez/api/SearchResource.java | 16 ++---- web/src/components/datasets/DatasetTags.tsx | 4 +- .../components/search/es-search/EsSearch.tsx | 54 ++++++++++--------- web/src/types/api.ts | 6 +-- 4 files changed, 37 insertions(+), 43 deletions(-) diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 402b63dcf5..f94059bc9d 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -85,11 +85,6 @@ public Response search( return Response.ok(new SearchResults(searchResults)).build(); } - /** - * { "query": { "multi_match": { "type": "phrase_prefix", "query": "${query}", "fields": [ - * "facets.sourceCode.sourceCode", "facets.sourceCode.language", "run_id", "name", "namespace", - * "type" ], "operator": "or" } } } - */ @Timed @ResponseMetered @ExceptionMetered @@ -137,14 +132,6 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc } } - /** - * { "query": { "multi_match": { "query": "id", "fields": [ "facets.schema.fields.name", - * "facets.schema.fields.type", "facets.columnLineage.fields.*.inputFields.name", - * "facets.columnLineage.fields.*.inputFields.namespace", - * "facets.columnLineage.fields.*.inputFields.field", - * "facets.columnLineage.fields.*.transformationDescription", - * "facets.columnLineage.fields.*.transformationType" ] } } } - */ @Timed @ResponseMetered @ExceptionMetered @@ -154,6 +141,9 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { if (this.elasticsearchClient != null) { String[] fields = { + "run_id", + "name", + "namespace", "facets.schema.fields.name", "facets.schema.fields.type", "facets.columnLineage.fields.*.inputFields.name", diff --git a/web/src/components/datasets/DatasetTags.tsx b/web/src/components/datasets/DatasetTags.tsx index a66fd9b63e..c32615124d 100644 --- a/web/src/components/datasets/DatasetTags.tsx +++ b/web/src/components/datasets/DatasetTags.tsx @@ -189,8 +189,8 @@ const DatasetTags: React.FC = (props) => { multiple disableCloseOnSelect id='dataset-tags' - sx={{ flex : 1, width: datasetField ? 494 : 'auto' }} - limitTags={!datasetField ? 8 : 6} + sx={{ flex: 1, width: datasetField ? 494 : 'auto' }} + limitTags={!datasetField ? 8 : 6} autoHighlight disableClearable disablePortal diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index e95822bee7..57dfadc226 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -17,6 +17,7 @@ import { faDatabase } from '@fortawesome/free-solid-svg-icons' import { fetchEsSearchDatasets, fetchEsSearchJobs } from '../../../store/actionCreators' import { theme } from '../../../helpers/theme' import { truncateText } from '../../../helpers/text' +import { useNavigate } from 'react-router-dom' import Box from '@mui/system/Box' import MQTooltip from '../../core/tooltip/MQTooltip' import MqEmpty from '../../core/empty/MqEmpty' @@ -25,7 +26,6 @@ import MqText from '../../core/text/MqText' import React, { useCallback, useEffect } from 'react' import airflow_logo from './airlfow-logo.svg' import spark_logo from './spark-logo.svg' -import {useNavigate} from "react-router-dom"; interface StateProps { esSearchJobs: IEsSearchJobsState @@ -338,30 +338,34 @@ const EsSearch: React.FC = ({ })} - - - - Fields - - - {hit.facets.schema.fields.slice(0, FIELDS_TO_PRINT).map((field) => { - return ( - - ) - })} - {hit.facets.schema.fields.length > FIELDS_TO_PRINT && ( - {`+ ${ - hit.facets.schema.fields.length - FIELDS_TO_PRINT - }`} - )} - - + {hit.facets?.schema?.fields && ( + <> + + + + Fields + + + {hit.facets?.schema?.fields.slice(0, FIELDS_TO_PRINT).map((field) => { + return ( + + ) + })} + {hit.facets?.schema && hit.facets.schema.fields.length > FIELDS_TO_PRINT && ( + {`+ ${ + hit.facets.schema.fields.length - FIELDS_TO_PRINT + }`} + )} + + + + )} ) diff --git a/web/src/types/api.ts b/web/src/types/api.ts index d5a08df9da..6630612bb2 100644 --- a/web/src/types/api.ts +++ b/web/src/types/api.ts @@ -413,8 +413,8 @@ type ColumnLineageFacet = { } type EsSearchDatasetFacets = { - schema: SchemaFacet - columnLineage: ColumnLineageFacet + schema?: SchemaFacet + columnLineage?: ColumnLineageFacet } type DatasetHit = { @@ -422,7 +422,7 @@ type DatasetHit = { name: string namespace: string eventType: string - facets: EsSearchDatasetFacets + facets?: EsSearchDatasetFacets } export type EsSearchResultDatasets = { From 924130791fbe1d4248030f656174572566a701e6 Mon Sep 17 00:00:00 2001 From: phixMe Date: Sat, 6 Jul 2024 12:18:25 -0700 Subject: [PATCH 28/87] Handling enter key. --- .../components/search/es-search/EsSearch.tsx | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 57dfadc226..369562a156 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -66,15 +66,18 @@ function parseStringToSegments(input: string): TextSegment[] { // return s.split('.').pop() // } -const useArrowKeys = (callback: (direction: 'up' | 'down') => void) => { +const useArrowKeys = (callback: (key: 'up' | 'down' | 'enter') => void) => { useEffect(() => { const handleKeyDown = (event: KeyboardEvent) => { if (event.key === 'ArrowDown') { - event.preventDefault() // Prevent the default browser action + event.preventDefault() callback('down') } else if (event.key === 'ArrowUp') { - event.preventDefault() // Prevent the default browser action + event.preventDefault() callback('up') + } else if (event.key === 'Enter') { + event.preventDefault() + callback('enter') } } window.addEventListener('keydown', handleKeyDown) @@ -97,10 +100,10 @@ const EsSearch: React.FC = ({ const [selectedIndex, setSelectedIndex] = React.useState>(null) const navigate = useNavigate() - useArrowKeys((direction) => { - if (direction === 'up') { + useArrowKeys((key) => { + if (key === 'up') { setSelectedIndex(selectedIndex === null ? null : Math.max(selectedIndex - 1, 0)) - } else { + } else if (key === 'down') { setSelectedIndex( selectedIndex === null ? 0 @@ -109,6 +112,14 @@ const EsSearch: React.FC = ({ esSearchJobs.data.hits.length + esSearchDatasets.data.hits.length - 1 ) ) + } else if (selectedIndex !== null) { + if (selectedIndex < esSearchJobs.data.hits.length) { + const jobHit = esSearchJobs.data.hits[selectedIndex] + navigate(`/lineage/job/${jobHit.namespace}/${jobHit.name}`) + } else { + const datasetHit = esSearchDatasets.data.hits[selectedIndex - esSearchJobs.data.hits.length] + navigate(`/lineage/dataset/${datasetHit.namespace}/${datasetHit.name}`) + } } }) @@ -279,7 +290,7 @@ const EsSearch: React.FC = ({ backgroundColor: theme.palette.action.hover, }, backgroundColor: - selectedIndex === index + esSearchDatasets.data.hits.length + selectedIndex === index + esSearchJobs.data.hits.length ? theme.palette.action.hover : undefined, }} From 259fea74eb542bfdc2fc3a87f7f29f265e90652b Mon Sep 17 00:00:00 2001 From: phixMe Date: Sun, 7 Jul 2024 17:23:23 -0700 Subject: [PATCH 29/87] Fixing minor encoding and layout issues for spark related open lineage events. --- api/src/main/java/marquez/api/SearchResource.java | 5 ++++- web/src/components/search/es-search/EsSearch.tsx | 10 +++++----- web/src/routes/events/Events.tsx | 7 ++++--- web/src/routes/table-level/ActionBar.tsx | 7 +++++-- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index f94059bc9d..58309452e9 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -159,7 +159,10 @@ public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws I .query( q -> q.multiMatch( - m -> m.query(query).fields(Arrays.stream(fields).toList()))) + m -> m.query(query) + .type(TextQueryType.PhrasePrefix) + .fields(Arrays.stream(fields).toList()) + .operator(Operator.Or))) .highlight( hl -> { for (String field : fields) { diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 369562a156..2b60ed72e1 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -11,7 +11,7 @@ import { Nullable } from '../../../types/util/Nullable' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' import { debounce } from 'lodash' -import { eventTypeColor } from '../../../helpers/nodes' +import { encodeNode, eventTypeColor } from '../../../helpers/nodes' import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' import { faDatabase } from '@fortawesome/free-solid-svg-icons' import { fetchEsSearchDatasets, fetchEsSearchJobs } from '../../../store/actionCreators' @@ -115,10 +115,10 @@ const EsSearch: React.FC = ({ } else if (selectedIndex !== null) { if (selectedIndex < esSearchJobs.data.hits.length) { const jobHit = esSearchJobs.data.hits[selectedIndex] - navigate(`/lineage/job/${jobHit.namespace}/${jobHit.name}`) + navigate(`/lineage/${encodeNode('JOB', jobHit.namespace, jobHit.name)}`) } else { const datasetHit = esSearchDatasets.data.hits[selectedIndex - esSearchJobs.data.hits.length] - navigate(`/lineage/dataset/${datasetHit.namespace}/${datasetHit.name}`) + navigate(`/lineage/${encodeNode('DATASET', datasetHit.namespace, datasetHit.name)}`) } } }) @@ -156,7 +156,7 @@ const EsSearch: React.FC = ({ return ( navigate(`/lineage/job/${hit.namespace}/${hit.name}`)} + onClick={() => navigate(`/lineage/${encodeNode('JOB', hit.namespace, hit.name)}`)} px={2} py={1} borderBottom={1} @@ -278,7 +278,7 @@ const EsSearch: React.FC = ({ return ( navigate(`/lineage/dataset/${hit.namespace}/${hit.name}`)} + onClick={() => navigate(`/lineage/${encodeNode('DATASET', hit.namespace, hit.name)}`)} px={2} py={1} borderBottom={1} diff --git a/web/src/routes/events/Events.tsx b/web/src/routes/events/Events.tsx index b3e18bd183..02fc9460d1 100644 --- a/web/src/routes/events/Events.tsx +++ b/web/src/routes/events/Events.tsx @@ -25,6 +25,7 @@ import { fetchEvents, resetEvents } from '../../store/actionCreators' import { fileSize, formatUpdatedAt } from '../../helpers' import { formatDateAPIQuery, formatDatePicker } from '../../helpers/time' import { saveAs } from 'file-saver' +import { truncateText } from '../../helpers/text' import { useSearchParams } from 'react-router-dom' import { useTheme } from '@emotion/react' import Box from '@mui/material/Box' @@ -294,7 +295,7 @@ const Events: React.FC = ({ > - {event.run.runId} + {event.run.runId.substring(0, 8)}... @@ -304,9 +305,9 @@ const Events: React.FC = ({ label={event.eventType} /> - {event.job.name} + {truncateText(event.job.name, 40)} - {event.job.namespace} + {truncateText(event.job.namespace, 40)} {formatUpdatedAt(event.eventTime)} diff --git a/web/src/routes/table-level/ActionBar.tsx b/web/src/routes/table-level/ActionBar.tsx index aa73fda967..87c5c3c14d 100644 --- a/web/src/routes/table-level/ActionBar.tsx +++ b/web/src/routes/table-level/ActionBar.tsx @@ -2,6 +2,7 @@ import { ArrowBackIosRounded, Refresh } from '@mui/icons-material' import { Divider, FormControlLabel, Switch, TextField } from '@mui/material' import { fetchLineage } from '../../store/actionCreators' import { theme } from '../../helpers/theme' +import { truncateText } from '../../helpers/text' import { useNavigate, useParams, useSearchParams } from 'react-router-dom' import Box from '@mui/material/Box' import IconButton from '@mui/material/IconButton' @@ -68,12 +69,14 @@ export const ActionBar = ({ Namespace - {namespace || 'Unknown namespace name'} + + {namespace ? truncateText(namespace, 40) : 'Unknown namespace name'} + Name - {name || 'Unknown dataset name'} + {name ? truncateText(name, 40) : 'Unknown dataset name'} From 54409171ff8f09c28ad142f0fe657c3a62f40d81 Mon Sep 17 00:00:00 2001 From: phixMe Date: Sun, 7 Jul 2024 21:05:59 -0700 Subject: [PATCH 30/87] Additional fixes for text overflow on names and namespaces. --- web/src/routes/datasets/Datasets.tsx | 5 +++-- web/src/routes/jobs/Jobs.tsx | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/web/src/routes/datasets/Datasets.tsx b/web/src/routes/datasets/Datasets.tsx index 359cf9d56e..a3c6bdae49 100644 --- a/web/src/routes/datasets/Datasets.tsx +++ b/web/src/routes/datasets/Datasets.tsx @@ -28,6 +28,7 @@ import { } from '../../helpers/nodes' import { fetchDatasets, resetDatasets } from '../../store/actionCreators' import { formatUpdatedAt } from '../../helpers' +import { truncateText } from '../../helpers/text' import { useTheme } from '@emotion/react' import Assertions from '../../components/datasets/Assertions' import Box from '@mui/material/Box' @@ -202,11 +203,11 @@ const Datasets: React.FC = ({ dataset.name )}`} > - {dataset.name} + {truncateText(dataset.name, 40)} - {dataset.namespace} + {truncateText(dataset.namespace, 40)} {dataset.sourceName} diff --git a/web/src/routes/jobs/Jobs.tsx b/web/src/routes/jobs/Jobs.tsx index a28da6efa0..681532422c 100644 --- a/web/src/routes/jobs/Jobs.tsx +++ b/web/src/routes/jobs/Jobs.tsx @@ -25,6 +25,7 @@ import { encodeNode, runStateColor } from '../../helpers/nodes' import { fetchJobs, resetJobs } from '../../store/actionCreators' import { formatUpdatedAt } from '../../helpers' import { stopWatchDuration } from '../../helpers/time' +import { truncateText } from '../../helpers/text' import { useTheme } from '@emotion/react' import Box from '@mui/material/Box' import CircularProgress from '@mui/material/CircularProgress/CircularProgress' @@ -186,11 +187,11 @@ const Jobs: React.FC = ({ link linkTo={`/lineage/${encodeNode('JOB', job.namespace, job.name)}`} > - {job.name} + {truncateText(job.name, 40)} - {job.namespace} + {truncateText(job.namespace, 40)} {formatUpdatedAt(job.updatedAt)} From df43ab8631cf4b189c5984eae75fae77fc5cef45 Mon Sep 17 00:00:00 2001 From: phixMe Date: Mon, 8 Jul 2024 07:06:25 -0700 Subject: [PATCH 31/87] Fixing indexing problem. --- web/src/components/search/es-search/EsSearch.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 2b60ed72e1..27eb2b1810 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -155,7 +155,7 @@ const EsSearch: React.FC = ({ {esSearchJobs.data.hits.map((hit, index) => { return ( navigate(`/lineage/${encodeNode('JOB', hit.namespace, hit.name)}`)} px={2} py={1} @@ -277,7 +277,7 @@ const EsSearch: React.FC = ({ {esSearchDatasets.data.hits.map((hit, index) => { return ( navigate(`/lineage/${encodeNode('DATASET', hit.namespace, hit.name)}`)} px={2} py={1} From b6f46912b376c7d55c67d8cf637733b4a6e66326 Mon Sep 17 00:00:00 2001 From: phixMe Date: Mon, 8 Jul 2024 22:59:41 -0700 Subject: [PATCH 32/87] Transitioning to opensearch. --- api/src/main/java/marquez/MarquezApp.java | 76 +++++++++++++------ api/src/main/java/marquez/MarquezContext.java | 20 ++--- .../java/marquez/api/OpenLineageResource.java | 18 ++--- .../main/java/marquez/api/SearchResource.java | 30 ++++---- .../marquez/api/OpenLineageResourceTest.java | 3 +- build.gradle | 2 + docker-compose.seed.yml | 2 +- docker-compose.yml | 42 +++++++--- 8 files changed, 126 insertions(+), 67 deletions(-) diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 032e1cd3c5..2640b90ac2 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -5,10 +5,6 @@ package marquez; -import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.json.jackson.JacksonJsonpMapper; -import co.elastic.clients.transport.ElasticsearchTransport; -import co.elastic.clients.transport.rest_client.RestClientTransport; import com.codahale.metrics.jdbi3.InstrumentedSqlLogger; import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; @@ -26,6 +22,8 @@ import io.prometheus.client.exporter.MetricsServlet; import io.prometheus.client.hotspot.DefaultExports; import io.sentry.Sentry; + +import java.io.IOException; import java.util.EnumSet; import javax.servlet.DispatcherType; import lombok.NonNull; @@ -41,7 +39,6 @@ import marquez.db.DbMigration; import marquez.jobs.DbRetentionJob; import marquez.logging.LoggingMdcFilter; -import marquez.search.ElasticConfig; import marquez.tracing.SentryConfig; import marquez.tracing.TracingContainerResponseFilter; import marquez.tracing.TracingSQLLogger; @@ -50,7 +47,6 @@ import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.impl.client.BasicCredentialsProvider; -import org.elasticsearch.client.RestClient; import org.flywaydb.core.api.FlywayException; import org.jdbi.v3.core.Jdbi; import org.jdbi.v3.core.statement.SqlLogger; @@ -58,6 +54,16 @@ import org.jdbi.v3.jackson2.Jackson2Plugin; import org.jdbi.v3.postgres.PostgresPlugin; import org.jdbi.v3.sqlobject.SqlObjectPlugin; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestClientBuilder; +import org.opensearch.client.json.jackson.JacksonJsonpMapper; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.transport.OpenSearchTransport; +import org.opensearch.client.transport.endpoints.BooleanResponse; +import org.opensearch.client.transport.rest_client.RestClientTransport; @Slf4j public final class MarquezApp extends Application { @@ -140,12 +146,10 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { } final Jdbi jdbi = newJdbi(config, env, source); - final ElasticsearchClient elasticsearchClient = - newElasticsearchClient(config.getElasticConfig()); final MarquezContext marquezContext = MarquezContext.builder() .jdbi(jdbi) - .elasticsearchClient(elasticsearchClient) + .openSearchClient(newOpenSearchClient()) .tags(config.getTags()) .build(); @@ -164,25 +168,51 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { Exclusions.use(exclusions); } - private ElasticsearchClient newElasticsearchClient(ElasticConfig elasticConfig) { +// private ElasticsearchClient newElasticsearchClient(ElasticConfig elasticConfig) { +// final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); +// credentialsProvider.setCredentials( +// AuthScope.ANY, +// new UsernamePasswordCredentials(elasticConfig.getUsername(), elasticConfig.getPassword())); +// +// RestClient restClient = +// RestClient.builder( +// new HttpHost( +// elasticConfig.getHost(), elasticConfig.getPort(), elasticConfig.getScheme())) +// .setHttpClientConfigCallback( +// httpClientBuilder -> +// httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) +// .build(); +// JacksonJsonpMapper jsonpMapper = new JacksonJsonpMapper(); +// // register JavaTimeModule to handle ZonedDateTime +// jsonpMapper.objectMapper().registerModule(new JavaTimeModule()); +// ElasticsearchTransport transport = new RestClientTransport(restClient, jsonpMapper); +// return new ElasticsearchClient(transport); +// } + + private OpenSearchClient newOpenSearchClient() { + final HttpHost host = new HttpHost("marquez-opensearch", 9200, "http"); final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); - credentialsProvider.setCredentials( - AuthScope.ANY, - new UsernamePasswordCredentials(elasticConfig.getUsername(), elasticConfig.getPassword())); - - RestClient restClient = - RestClient.builder( - new HttpHost( - elasticConfig.getHost(), elasticConfig.getPort(), elasticConfig.getScheme())) - .setHttpClientConfigCallback( - httpClientBuilder -> - httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) + // Only for demo purposes. Don't specify your credentials in code. + credentialsProvider.setCredentials(new AuthScope(host), new UsernamePasswordCredentials("admin", "admin")); + + // Initialize the client with SSL and TLS enabled + final RestClient restClient = RestClient.builder(host) + .setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) .build(); + JacksonJsonpMapper jsonpMapper = new JacksonJsonpMapper(); // register JavaTimeModule to handle ZonedDateTime jsonpMapper.objectMapper().registerModule(new JavaTimeModule()); - ElasticsearchTransport transport = new RestClientTransport(restClient, jsonpMapper); - return new ElasticsearchClient(transport); + final OpenSearchTransport transport = new RestClientTransport(restClient, jsonpMapper); + OpenSearchClient openSearchClient = new OpenSearchClient(transport); + BooleanResponse booleanResponse = null; + try { + booleanResponse = openSearchClient.ping(); + log.info("OpenSearch Active: {}", booleanResponse.value()); + } catch (IOException e) { + throw new RuntimeException(e); + } + return openSearchClient; } private boolean isSentryEnabled(MarquezConfig config) { diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index 3bfc2d0eac..a7a6d53dbf 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -5,7 +5,6 @@ package marquez; -import co.elastic.clients.elasticsearch.ElasticsearchClient; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; @@ -59,6 +58,7 @@ import marquez.service.TagService; import marquez.service.models.Tag; import org.jdbi.v3.core.Jdbi; +import org.opensearch.client.opensearch.OpenSearchClient; @Getter public final class MarquezContext { @@ -102,17 +102,17 @@ public final class MarquezContext { @Getter private final JdbiExceptionExceptionMapper jdbiException; @Getter private final JsonProcessingExceptionMapper jsonException; @Getter private final GraphQLHttpServlet graphqlServlet; - @Getter private final ElasticsearchClient elasticsearchClient; + @Getter private final OpenSearchClient openSearchClient; private MarquezContext( @NonNull final Jdbi jdbi, - @NonNull final ElasticsearchClient elasticsearchClient, + @NonNull final OpenSearchClient openSearchClient, @NonNull final ImmutableSet tags, List runTransitionListeners) { if (runTransitionListeners == null) { runTransitionListeners = new ArrayList<>(); } - this.elasticsearchClient = elasticsearchClient; + this.openSearchClient = openSearchClient; final BaseDao baseDao = jdbi.onDemand(NamespaceDao.class); this.namespaceDao = jdbi.onDemand(NamespaceDao.class); @@ -168,8 +168,8 @@ private MarquezContext( this.jobResource = new JobResource(serviceFactory, jobVersionDao, jobFacetsDao, runFacetsDao); this.tagResource = new TagResource(serviceFactory); this.openLineageResource = - new OpenLineageResource(serviceFactory, elasticsearchClient, openLineageDao); - this.searchResource = new SearchResource(searchDao, elasticsearchClient); + new OpenLineageResource(serviceFactory, openSearchClient, openLineageDao); + this.searchResource = new SearchResource(searchDao, openSearchClient); this.resources = ImmutableList.of( @@ -195,7 +195,7 @@ public static Builder builder() { public static class Builder { private Jdbi jdbi; - private ElasticsearchClient elasticsearchClient; + private OpenSearchClient openSearchClient; private ImmutableSet tags; private List runTransitionListeners; @@ -209,8 +209,8 @@ public Builder jdbi(@NonNull Jdbi jdbi) { return this; } - public Builder elasticsearchClient(@NonNull ElasticsearchClient elasticsearchClient) { - this.elasticsearchClient = elasticsearchClient; + public Builder openSearchClient(@NonNull OpenSearchClient openSearchClient) { + this.openSearchClient = openSearchClient; return this; } @@ -230,7 +230,7 @@ public Builder runTransitionListeners( } public MarquezContext build() { - return new MarquezContext(jdbi, elasticsearchClient, tags, runTransitionListeners); + return new MarquezContext(jdbi, openSearchClient, tags, runTransitionListeners); } } } diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index 0e169cdc52..bb369e3a93 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -9,8 +9,6 @@ import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import static javax.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR; -import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.elasticsearch.core.IndexRequest; import com.codahale.metrics.annotation.ExceptionMetered; import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; @@ -49,21 +47,23 @@ import marquez.service.models.BaseEvent; import marquez.service.models.LineageEvent; import marquez.service.models.NodeId; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.core.IndexRequest; @Slf4j @Path("/api/v1") public class OpenLineageResource extends BaseResource { private static final String DEFAULT_DEPTH = "20"; - private final ElasticsearchClient elasticsearchClient; + private final OpenSearchClient openSearchClient; private final OpenLineageDao openLineageDao; public OpenLineageResource( @NonNull final ServiceFactory serviceFactory, - @NonNull final ElasticsearchClient elasticsearchClient, + @NonNull final OpenSearchClient openSearchClient, @NonNull final OpenLineageDao openLineageDao) { super(serviceFactory); - this.elasticsearchClient = elasticsearchClient; + this.openSearchClient = openSearchClient; this.openLineageDao = openLineageDao; } @@ -100,7 +100,7 @@ private UUID runUuidFromEvent(LineageEvent.Run run) { } private void indexEvent(@Valid @NotNull LineageEvent event) { - if (this.elasticsearchClient != null) { + if (this.openSearchClient != null) { UUID runUuid = runUuidFromEvent(event.getRun()); log.info("Indexing event {}", event); @@ -171,11 +171,11 @@ private void indexDatasets( private void index(IndexRequest> request) { try { - if (this.elasticsearchClient != null) { - this.elasticsearchClient.index(request); + if (this.openSearchClient != null) { + this.openSearchClient.index(request); } } catch (IOException e) { - log.info("Failed to index event Elasticsearch not available."); + log.info("Failed to index event Elasticsearch not available.", e); } } diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 58309452e9..1e5a9c7180 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -8,11 +8,6 @@ import static javax.ws.rs.core.MediaType.APPLICATION_JSON; import static marquez.common.Utils.toLocateDateOrNull; -import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.elasticsearch._types.query_dsl.Operator; -import co.elastic.clients.elasticsearch._types.query_dsl.TextQueryType; -import co.elastic.clients.elasticsearch.core.SearchResponse; -import co.elastic.clients.elasticsearch.core.search.Hit; import com.codahale.metrics.annotation.ExceptionMetered; import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; @@ -42,6 +37,13 @@ import marquez.api.models.SearchResult; import marquez.api.models.SearchSort; import marquez.db.SearchDao; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.query_dsl.Operator; +import org.opensearch.client.opensearch._types.query_dsl.TextQueryType; +import org.opensearch.client.opensearch.core.SearchResponse; +import org.opensearch.client.opensearch.core.search.BuiltinHighlighterType; +import org.opensearch.client.opensearch.core.search.HighlighterType; +import org.opensearch.client.opensearch.core.search.Hit; @Slf4j @Path("/api/v1/search") @@ -52,12 +54,12 @@ public class SearchResource { private static final int MIN_LIMIT = 0; private final SearchDao searchDao; - private final ElasticsearchClient elasticsearchClient; + private final OpenSearchClient openSearchClient; public SearchResource( - @NonNull final SearchDao searchDao, @Nullable final ElasticsearchClient elasticsearchClient) { + @NonNull final SearchDao searchDao, @Nullable final OpenSearchClient openSearchClient) { this.searchDao = searchDao; - this.elasticsearchClient = elasticsearchClient; + this.openSearchClient = openSearchClient; } @Timed @@ -92,7 +94,7 @@ public Response search( @Produces(APPLICATION_JSON) @Path("/jobs") public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { - if (this.elasticsearchClient != null) { + if (this.openSearchClient != null) { String[] fields = { "facets.sql.query", "facets.sourceCode.sourceCode", @@ -104,7 +106,7 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc "type" }; SearchResponse response = - this.elasticsearchClient.search( + this.openSearchClient.search( s -> { s.index("jobs") .query( @@ -118,7 +120,7 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc s.highlight( hl -> { for (String field : fields) { - hl.fields(field, f -> f.type("plain")); + hl.fields(field, f -> f.type(HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); } return hl; }); @@ -139,7 +141,7 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc @Produces(APPLICATION_JSON) @Path("/datasets") public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { - if (this.elasticsearchClient != null) { + if (this.openSearchClient != null) { String[] fields = { "run_id", "name", @@ -153,7 +155,7 @@ public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws I "facets.columnLineage.fields.*.transformationType" }; SearchResponse response = - this.elasticsearchClient.search( + this.openSearchClient.search( s -> s.index("datasets") .query( @@ -166,7 +168,7 @@ public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws I .highlight( hl -> { for (String field : fields) { - hl.fields(field, f -> f.type("plain")); + hl.fields(field, f -> f.type(HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); } return hl; }), diff --git a/api/src/test/java/marquez/api/OpenLineageResourceTest.java b/api/src/test/java/marquez/api/OpenLineageResourceTest.java index 6d81b0f10b..aa982e5e14 100644 --- a/api/src/test/java/marquez/api/OpenLineageResourceTest.java +++ b/api/src/test/java/marquez/api/OpenLineageResourceTest.java @@ -30,6 +30,7 @@ import marquez.service.models.NodeId; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.opensearch.client.opensearch.OpenSearchClient; @ExtendWith(DropwizardExtensionsSupport.class) class OpenLineageResourceTest { @@ -57,7 +58,7 @@ class OpenLineageResourceTest { ResourceExtension.builder() .addResource( new OpenLineageResource( - serviceFactory, new ElasticsearchClient(null), openLineageDao)) + serviceFactory, new OpenSearchClient(null), openLineageDao)) .build(); } diff --git a/build.gradle b/build.gradle index 3866a4e66d..9f0fe4ec0f 100644 --- a/build.gradle +++ b/build.gradle @@ -68,6 +68,8 @@ subprojects { implementation "org.projectlombok:lombok:${lombokVersion}" implementation "co.elastic.clients:elasticsearch-java:${elasticsearchVersion}" implementation "com.fasterxml.jackson.core:jackson-databind:${jacksonDatabindVersion}" + implementation 'org.opensearch.client:opensearch-rest-client:2.15.0' + implementation 'org.opensearch.client:opensearch-java:2.6.0' annotationProcessor "org.projectlombok:lombok:${lombokVersion}" testImplementation "org.assertj:assertj-core:${assertjVersion}" diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index 153d3550f4..e7d0256950 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -13,5 +13,5 @@ services: - "db:postgres" depends_on: - api - - search + - opensearch entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "search:${ELASTICSEARCH_PORT}", "--", "./seed.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index 9aa8b8600b..f661b85d57 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,15 +15,16 @@ services: - data:/opt/marquez links: - "db:postgres" + - opensearch depends_on: - - search + - opensearch - db entrypoint: - /opt/marquez/wait-for-it.sh - db:${POSTGRES_PORT} - -- - /opt/marquez/wait-for-it.sh - - search:${ELASTICSEARCH_PORT} + - opensearch:${ELASTICSEARCH_PORT} - -- - ./entrypoint.sh @@ -46,9 +47,32 @@ services: # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT) # command: ["postgres", "-c", "log_statement=all"] - search: - image: elasticsearch:8.13.4 - container_name: marquez-search +# search: +# image: elasticsearch:8.13.4 +# container_name: marquez-search +# ulimits: +# memlock: +# soft: -1 +# hard: -1 +# nofile: +# soft: 65536 +# hard: 65536 +# environment: +# - xpack.security.enabled=true +# - xpack.security.authc.api_key.enabled=true +# - discovery.type=single-node +# - ELASTIC_PASSWORD=elastic +# volumes: +# - elasticsearch-data:/usr/share/elasticsearch/data +# cap_add: +# - IPC_LOCK +# ports: +# - "9200:9200" +# - "9300:9300" + + opensearch: + image: opensearchproject/opensearch:2.5.0 + container_name: marquez-opensearch ulimits: memlock: soft: -1 @@ -57,12 +81,11 @@ services: soft: 65536 hard: 65536 environment: - - xpack.security.enabled=true - - xpack.security.authc.api_key.enabled=true + - plugins.security.ssl.http.enabled=false - discovery.type=single-node - - ELASTIC_PASSWORD=elastic + - OPENSEARCH_PASSWORD=admin volumes: - - elasticsearch-data:/usr/share/elasticsearch/data + - opensearch-data:/usr/share/opensearch/data cap_add: - IPC_LOCK ports: @@ -72,6 +95,7 @@ services: volumes: data: elasticsearch-data: + opensearch-data: db-conf: db-init: db-backup: From 82e00e2a1a2834f93207bb3ff8411a4c35975d7c Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 9 Jul 2024 10:27:42 -0700 Subject: [PATCH 33/87] Removing elasticsearch references. --- .env.example | 2 +- api/src/main/java/marquez/MarquezApp.java | 41 ++++--------------- api/src/main/java/marquez/MarquezConfig.java | 6 +-- .../java/marquez/api/OpenLineageResource.java | 2 +- .../{ElasticConfig.java => SearchConfig.java} | 8 ++-- build.gradle | 2 - docker-compose.seed.yml | 2 +- docker-compose.yml | 28 +------------ docker/up.sh | 10 ++--- marquez.dev.yml | 8 ++-- marquez.example.yml | 7 ++-- web/src/components/search/Search.tsx | 4 +- 12 files changed, 33 insertions(+), 87 deletions(-) rename api/src/main/java/marquez/search/{ElasticConfig.java => SearchConfig.java} (75%) diff --git a/.env.example b/.env.example index de98e232a0..869c018aea 100644 --- a/.env.example +++ b/.env.example @@ -2,5 +2,5 @@ API_PORT=5000 API_ADMIN_PORT=5001 WEB_PORT=3000 POSTGRES_PORT=5432 -ELASTICSEARCH_PORT=9200 +SEARCH_PORT=9200 TAG=0.47.0 diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 2640b90ac2..1360dbfc1e 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -39,6 +39,7 @@ import marquez.db.DbMigration; import marquez.jobs.DbRetentionJob; import marquez.logging.LoggingMdcFilter; +import marquez.search.SearchConfig; import marquez.tracing.SentryConfig; import marquez.tracing.TracingContainerResponseFilter; import marquez.tracing.TracingSQLLogger; @@ -54,11 +55,7 @@ import org.jdbi.v3.jackson2.Jackson2Plugin; import org.jdbi.v3.postgres.PostgresPlugin; import org.jdbi.v3.sqlobject.SqlObjectPlugin; -import org.apache.http.auth.UsernamePasswordCredentials; -import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; -import org.apache.http.impl.client.BasicCredentialsProvider; import org.opensearch.client.RestClient; -import org.opensearch.client.RestClientBuilder; import org.opensearch.client.json.jackson.JacksonJsonpMapper; import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.transport.OpenSearchTransport; @@ -149,7 +146,7 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { final MarquezContext marquezContext = MarquezContext.builder() .jdbi(jdbi) - .openSearchClient(newOpenSearchClient()) + .openSearchClient(newOpenSearchClient(config.getSearchConfig())) .tags(config.getTags()) .build(); @@ -168,34 +165,10 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { Exclusions.use(exclusions); } -// private ElasticsearchClient newElasticsearchClient(ElasticConfig elasticConfig) { -// final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); -// credentialsProvider.setCredentials( -// AuthScope.ANY, -// new UsernamePasswordCredentials(elasticConfig.getUsername(), elasticConfig.getPassword())); -// -// RestClient restClient = -// RestClient.builder( -// new HttpHost( -// elasticConfig.getHost(), elasticConfig.getPort(), elasticConfig.getScheme())) -// .setHttpClientConfigCallback( -// httpClientBuilder -> -// httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) -// .build(); -// JacksonJsonpMapper jsonpMapper = new JacksonJsonpMapper(); -// // register JavaTimeModule to handle ZonedDateTime -// jsonpMapper.objectMapper().registerModule(new JavaTimeModule()); -// ElasticsearchTransport transport = new RestClientTransport(restClient, jsonpMapper); -// return new ElasticsearchClient(transport); -// } - - private OpenSearchClient newOpenSearchClient() { - final HttpHost host = new HttpHost("marquez-opensearch", 9200, "http"); + private OpenSearchClient newOpenSearchClient(SearchConfig searchConfig) { + final HttpHost host = new HttpHost(searchConfig.getHost(), searchConfig.getPort(), searchConfig.getScheme()); final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); - // Only for demo purposes. Don't specify your credentials in code. - credentialsProvider.setCredentials(new AuthScope(host), new UsernamePasswordCredentials("admin", "admin")); - - // Initialize the client with SSL and TLS enabled + credentialsProvider.setCredentials(new AuthScope(host), new UsernamePasswordCredentials(searchConfig.getUsername(), searchConfig.getPassword())); final RestClient restClient = RestClient.builder(host) .setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) .build(); @@ -205,12 +178,12 @@ private OpenSearchClient newOpenSearchClient() { jsonpMapper.objectMapper().registerModule(new JavaTimeModule()); final OpenSearchTransport transport = new RestClientTransport(restClient, jsonpMapper); OpenSearchClient openSearchClient = new OpenSearchClient(transport); - BooleanResponse booleanResponse = null; + BooleanResponse booleanResponse; try { booleanResponse = openSearchClient.ping(); log.info("OpenSearch Active: {}", booleanResponse.value()); } catch (IOException e) { - throw new RuntimeException(e); + log.warn("Search not configured"); } return openSearchClient; } diff --git a/api/src/main/java/marquez/MarquezConfig.java b/api/src/main/java/marquez/MarquezConfig.java index dc3848666c..d678fc7de5 100644 --- a/api/src/main/java/marquez/MarquezConfig.java +++ b/api/src/main/java/marquez/MarquezConfig.java @@ -16,7 +16,7 @@ import marquez.db.FlywayFactory; import marquez.graphql.GraphqlConfig; import marquez.jobs.DbRetentionConfig; -import marquez.search.ElasticConfig; +import marquez.search.SearchConfig; import marquez.service.models.Tag; import marquez.tracing.SentryConfig; @@ -46,8 +46,8 @@ public class MarquezConfig extends Configuration { private final SentryConfig sentry = new SentryConfig(); @Getter - @JsonProperty("elastic") - private final ElasticConfig elasticConfig = new ElasticConfig(); + @JsonProperty("search") + private final SearchConfig searchConfig = new SearchConfig(); @Getter @Setter diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index bb369e3a93..3385308ff6 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -175,7 +175,7 @@ private void index(IndexRequest> request) { this.openSearchClient.index(request); } } catch (IOException e) { - log.info("Failed to index event Elasticsearch not available.", e); + log.info("Failed to index event OpenSearch not available.", e); } } diff --git a/api/src/main/java/marquez/search/ElasticConfig.java b/api/src/main/java/marquez/search/SearchConfig.java similarity index 75% rename from api/src/main/java/marquez/search/ElasticConfig.java rename to api/src/main/java/marquez/search/SearchConfig.java index bf41c132fb..b15c6e6c75 100644 --- a/api/src/main/java/marquez/search/ElasticConfig.java +++ b/api/src/main/java/marquez/search/SearchConfig.java @@ -3,13 +3,13 @@ import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Getter; -public class ElasticConfig { +public class SearchConfig { public static final boolean ENABLED = false; public static final String SCHEME = "http"; - public static final String HOST = "search"; + public static final String HOST = "marquez-opensearch"; public static final int PORT = 9200; - public static final String USERNAME = "elastic"; - public static final String PASSWORD = "elastic"; + public static final String USERNAME = "admin"; + public static final String PASSWORD = "admin"; @Getter @JsonProperty private boolean enabled = ENABLED; diff --git a/build.gradle b/build.gradle index 9f0fe4ec0f..34645f80a4 100644 --- a/build.gradle +++ b/build.gradle @@ -52,7 +52,6 @@ subprojects { ext { assertjVersion = '3.25.3' - elasticsearchVersion = '8.13.4' dropwizardVersion = '2.1.12' jacksonDatabindVersion = '2.12.3' jacocoVersion = '0.8.11' @@ -66,7 +65,6 @@ subprojects { dependencies { implementation "org.projectlombok:lombok:${lombokVersion}" - implementation "co.elastic.clients:elasticsearch-java:${elasticsearchVersion}" implementation "com.fasterxml.jackson.core:jackson-databind:${jacksonDatabindVersion}" implementation 'org.opensearch.client:opensearch-rest-client:2.15.0' implementation 'org.opensearch.client:opensearch-java:2.6.0' diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index e7d0256950..f88ed7ca11 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -14,4 +14,4 @@ services: depends_on: - api - opensearch - entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "search:${ELASTICSEARCH_PORT}", "--", "./seed.sh"] + entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "opensearch:${SEARCH_PORT}", "--", "./seed.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index f661b85d57..abb61ba045 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,8 +6,6 @@ services: environment: - MARQUEZ_PORT=${API_PORT} - MARQUEZ_ADMIN_PORT=${API_ADMIN_PORT} - - ELASTIC_USER=elastic - - ELASTIC_PASSWORD=elastic ports: - "${API_PORT}:${API_PORT}" - "${API_ADMIN_PORT}:${API_ADMIN_PORT}" @@ -24,7 +22,7 @@ services: - db:${POSTGRES_PORT} - -- - /opt/marquez/wait-for-it.sh - - opensearch:${ELASTICSEARCH_PORT} + - opensearch:${SEARCH_PORT} - -- - ./entrypoint.sh @@ -47,29 +45,6 @@ services: # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT) # command: ["postgres", "-c", "log_statement=all"] -# search: -# image: elasticsearch:8.13.4 -# container_name: marquez-search -# ulimits: -# memlock: -# soft: -1 -# hard: -1 -# nofile: -# soft: 65536 -# hard: 65536 -# environment: -# - xpack.security.enabled=true -# - xpack.security.authc.api_key.enabled=true -# - discovery.type=single-node -# - ELASTIC_PASSWORD=elastic -# volumes: -# - elasticsearch-data:/usr/share/elasticsearch/data -# cap_add: -# - IPC_LOCK -# ports: -# - "9200:9200" -# - "9300:9300" - opensearch: image: opensearchproject/opensearch:2.5.0 container_name: marquez-opensearch @@ -94,7 +69,6 @@ services: volumes: data: - elasticsearch-data: opensearch-data: db-conf: db-init: diff --git a/docker/up.sh b/docker/up.sh index 8b747170d6..8da262924d 100755 --- a/docker/up.sh +++ b/docker/up.sh @@ -40,7 +40,7 @@ usage() { echo " -a, --api-port int api port (default: 5000)" echo " -m, --api-admin-port int api admin port (default: 5001)" echo " -w, --web-port int web port (default: 3000)" - echo " -e --es-port int elasticsearch port (default: 9200)" + echo " -e --search-port int search port (default: 9200)" echo " -t, --tag string docker image tag (default: ${VERSION})" echo " --args string docker arguments" echo @@ -66,7 +66,7 @@ API_PORT=5000 API_ADMIN_PORT=5001 WEB_PORT=3000 POSTGRES_PORT=5432 -ELASTICSEARCH_PORT=9200 +SEARCH_PORT=9200 NO_WEB="false" NO_VOLUMES="false" TAG="${VERSION}" @@ -91,9 +91,9 @@ while [ $# -gt 0 ]; do shift POSTGRES_PORT="${1}" ;; - -e|'--es-port') + -e|'--search-port') shift - ELASTICSEARCH_PORT="${1}" + SEARCH_PORT="${1}" ;; -t|'--tag') shift @@ -153,5 +153,5 @@ if [[ "${NO_VOLUMES}" = "false" ]]; then fi # Run docker compose cmd with overrides -DOCKER_SCAN_SUGGEST="false" API_PORT=${API_PORT} API_ADMIN_PORT=${API_ADMIN_PORT} WEB_PORT=${WEB_PORT} POSTGRES_PORT=${POSTGRES_PORT} ELASTICSEARCH_PORT=${ELASTICSEARCH_PORT} TAG=${TAG} \ +DOCKER_SCAN_SUGGEST="false" API_PORT=${API_PORT} API_ADMIN_PORT=${API_ADMIN_PORT} WEB_PORT=${WEB_PORT} POSTGRES_PORT=${POSTGRES_PORT} SEARCH_PORT=${SEARCH_PORT} TAG=${TAG} \ docker --log-level ERROR compose $compose_files up $compose_args diff --git a/marquez.dev.yml b/marquez.dev.yml index 1613418bbb..1ec5770bd0 100644 --- a/marquez.dev.yml +++ b/marquez.dev.yml @@ -23,13 +23,13 @@ logging: appenders: - type: console -elastic: +search: enabled: true scheme: http - host: search + host: marquez-opensearch port: 9200 - username: elastic - password: elastic + username: admin + password: admin tags: - name: PII diff --git a/marquez.example.yml b/marquez.example.yml index b0aa31cb6f..e9e270d384 100644 --- a/marquez.example.yml +++ b/marquez.example.yml @@ -66,12 +66,13 @@ logging: # environment: ${SENTRY_ENVIRONMENT} # stacktraceAppPackages: ['marquez'] -elastic: +search: enabled: true scheme: http - host: search + host: marquez-opensearch port: 9200 - password: ${ELASTIC_PASSWORD} + username: admin + password: admin ### TRACING ### diff --git a/web/src/components/search/Search.tsx b/web/src/components/search/Search.tsx index 58c2100446..0de3ac208c 100644 --- a/web/src/components/search/Search.tsx +++ b/web/src/components/search/Search.tsx @@ -50,7 +50,7 @@ const useEscapeShortcut = (callback: () => void) => { }, [callback]) } -const elasticSearchEnabled = true +const isAdvancedSearchEnabled = true interface StateProps { isLoading: boolean @@ -188,7 +188,7 @@ const Search: React.FC = ({ isLoading }: StateProps) => { overflow={'auto'} maxHeight={`calc(100vh - ${HEADER_HEIGHT}px - 24px)`} > - {elasticSearchEnabled ? ( + {isAdvancedSearchEnabled ? ( ) : ( From 37e80ed9485f269a00daea4dbb81ddebbeb0241d Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 9 Jul 2024 11:58:20 -0700 Subject: [PATCH 34/87] Isolation of search code, calling services. --- api/src/main/java/marquez/MarquezContext.java | 21 +- .../java/marquez/api/OpenLineageResource.java | 91 +-------- .../main/java/marquez/api/SearchResource.java | 90 +-------- .../java/marquez/service/SearchService.java | 185 ++++++++++++++++++ .../java/marquez/service/ServiceFactory.java | 1 + .../marquez/api/OpenLineageResourceTest.java | 4 +- 6 files changed, 201 insertions(+), 191 deletions(-) create mode 100644 api/src/main/java/marquez/service/SearchService.java diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index a7a6d53dbf..a95f78425f 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -43,19 +43,7 @@ import marquez.db.TagDao; import marquez.graphql.GraphqlSchemaBuilder; import marquez.graphql.MarquezGraphqlServletBuilder; -import marquez.service.ColumnLineageService; -import marquez.service.DatasetFieldService; -import marquez.service.DatasetService; -import marquez.service.DatasetVersionService; -import marquez.service.JobService; -import marquez.service.LineageService; -import marquez.service.NamespaceService; -import marquez.service.OpenLineageService; -import marquez.service.RunService; -import marquez.service.RunTransitionListener; -import marquez.service.ServiceFactory; -import marquez.service.SourceService; -import marquez.service.TagService; +import marquez.service.*; import marquez.service.models.Tag; import org.jdbi.v3.core.Jdbi; import org.opensearch.client.opensearch.OpenSearchClient; @@ -90,6 +78,7 @@ public final class MarquezContext { @Getter private final OpenLineageService openLineageService; @Getter private final LineageService lineageService; @Getter private final ColumnLineageService columnLineageService; + @Getter private final SearchService searchService; @Getter private final NamespaceResource namespaceResource; @Getter private final SourceResource sourceResource; @Getter private final DatasetResource datasetResource; @@ -145,6 +134,7 @@ private MarquezContext( this.openLineageService = new OpenLineageService(baseDao, runService); this.lineageService = new LineageService(lineageDao, jobDao); this.columnLineageService = new ColumnLineageService(columnLineageDao, datasetFieldDao); + this.searchService = new SearchService(openSearchClient); this.jdbiException = new JdbiExceptionExceptionMapper(); this.jsonException = new JsonProcessingExceptionMapper(); final ServiceFactory serviceFactory = @@ -155,6 +145,7 @@ private MarquezContext( .namespaceService(namespaceService) .tagService(tagService) .openLineageService(openLineageService) + .searchService(searchService) .sourceService(sourceService) .lineageService(lineageService) .columnLineageService(columnLineageService) @@ -168,8 +159,8 @@ private MarquezContext( this.jobResource = new JobResource(serviceFactory, jobVersionDao, jobFacetsDao, runFacetsDao); this.tagResource = new TagResource(serviceFactory); this.openLineageResource = - new OpenLineageResource(serviceFactory, openSearchClient, openLineageDao); - this.searchResource = new SearchResource(searchDao, openSearchClient); + new OpenLineageResource(serviceFactory, openLineageDao); + this.searchResource = new SearchResource(serviceFactory, searchDao, openSearchClient); this.resources = ImmutableList.of( diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index 3385308ff6..b7f8f9e233 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -15,13 +15,10 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import io.dropwizard.jersey.jsr310.ZonedDateTimeParam; -import java.io.IOException; import java.nio.charset.StandardCharsets; import java.sql.SQLException; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.UUID; import java.util.concurrent.CompletionException; import javax.validation.Valid; @@ -47,23 +44,17 @@ import marquez.service.models.BaseEvent; import marquez.service.models.LineageEvent; import marquez.service.models.NodeId; -import org.opensearch.client.opensearch.OpenSearchClient; -import org.opensearch.client.opensearch.core.IndexRequest; - @Slf4j @Path("/api/v1") public class OpenLineageResource extends BaseResource { private static final String DEFAULT_DEPTH = "20"; - private final OpenSearchClient openSearchClient; private final OpenLineageDao openLineageDao; public OpenLineageResource( @NonNull final ServiceFactory serviceFactory, - @NonNull final OpenSearchClient openSearchClient, @NonNull final OpenLineageDao openLineageDao) { super(serviceFactory); - this.openSearchClient = openSearchClient; this.openLineageDao = openLineageDao; } @@ -76,7 +67,7 @@ public OpenLineageResource( @Path("/lineage") public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncResponse asyncResponse) throws JsonProcessingException, SQLException { - indexEvent((LineageEvent) event); + serviceFactory.getSearchService().indexEvent((LineageEvent) event); if (event instanceof LineageEvent) { openLineageService .createAsync((LineageEvent) event) @@ -99,86 +90,6 @@ private UUID runUuidFromEvent(LineageEvent.Run run) { return runUuid; } - private void indexEvent(@Valid @NotNull LineageEvent event) { - if (this.openSearchClient != null) { - UUID runUuid = runUuidFromEvent(event.getRun()); - log.info("Indexing event {}", event); - - if (event.getInputs() != null) { - indexDatasets(event.getInputs(), runUuid, event); - } - if (event.getOutputs() != null) { - indexDatasets(event.getOutputs(), runUuid, event); - } - indexJob(runUuid, event); - } - } - - private Map buildJobIndexRequest(UUID runUuid, LineageEvent event) { - Map jsonMap = new HashMap<>(); - - jsonMap.put("run_id", runUuid.toString()); - jsonMap.put("eventType", event.getEventType()); - jsonMap.put("name", event.getJob().getName()); - jsonMap.put("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH"); - jsonMap.put("namespace", event.getJob().getNamespace()); - jsonMap.put("facets", event.getJob().getFacets()); - jsonMap.put("runFacets", event.getRun().getFacets()); - return jsonMap; - } - - private Map buildDatasetIndexRequest( - UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { - Map jsonMap = new HashMap<>(); - jsonMap.put("run_id", runUuid.toString()); - jsonMap.put("eventType", event.getEventType()); - jsonMap.put("name", dataset.getName()); - jsonMap.put("inputFacets", dataset.getInputFacets()); - jsonMap.put("outputFacets", dataset.getOutputFacets()); - jsonMap.put("namespace", dataset.getNamespace()); - jsonMap.put("facets", dataset.getFacets()); - return jsonMap; - } - - private void indexJob(UUID runUuid, LineageEvent event) { - index( - IndexRequest.of( - i -> - i.index("jobs") - .id( - String.format( - "JOB:%s:%s", event.getJob().getNamespace(), event.getJob().getName())) - .document(buildJobIndexRequest(runUuid, event)))); - } - - private void indexDatasets( - List datasets, UUID runUuid, LineageEvent event) { - datasets.stream() - .map(dataset -> buildDatasetIndexRequest(runUuid, dataset, event)) - .forEach( - jsonMap -> { - index( - IndexRequest.of( - i -> - i.index("datasets") - .id( - String.format( - "DATASET:%s:%s", - jsonMap.get("namespace"), jsonMap.get("name"))) - .document(jsonMap))); - }); - } - - private void index(IndexRequest> request) { - try { - if (this.openSearchClient != null) { - this.openSearchClient.index(request); - } - } catch (IOException e) { - log.info("Failed to index event OpenSearch not available.", e); - } - } - private void onComplete(Void result, Throwable err, AsyncResponse asyncResponse) { if (err != null) { log.error("Unexpected error while processing request", err); diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 1e5a9c7180..54727e67b3 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -14,7 +14,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; -import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -37,12 +36,10 @@ import marquez.api.models.SearchResult; import marquez.api.models.SearchSort; import marquez.db.SearchDao; +import marquez.service.SearchService; +import marquez.service.ServiceFactory; import org.opensearch.client.opensearch.OpenSearchClient; -import org.opensearch.client.opensearch._types.query_dsl.Operator; -import org.opensearch.client.opensearch._types.query_dsl.TextQueryType; import org.opensearch.client.opensearch.core.SearchResponse; -import org.opensearch.client.opensearch.core.search.BuiltinHighlighterType; -import org.opensearch.client.opensearch.core.search.HighlighterType; import org.opensearch.client.opensearch.core.search.Hit; @Slf4j @@ -53,13 +50,13 @@ public class SearchResource { private static final String DEFAULT_LIMIT = "10"; private static final int MIN_LIMIT = 0; + private final SearchService searchService; private final SearchDao searchDao; - private final OpenSearchClient openSearchClient; public SearchResource( - @NonNull final SearchDao searchDao, @Nullable final OpenSearchClient openSearchClient) { + @NonNull final ServiceFactory serviceFactory, @NonNull final SearchDao searchDao, @Nullable final OpenSearchClient openSearchClient) { + this.searchService = serviceFactory.getSearchService(); this.searchDao = searchDao; - this.openSearchClient = openSearchClient; } @Timed @@ -94,44 +91,7 @@ public Response search( @Produces(APPLICATION_JSON) @Path("/jobs") public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { - if (this.openSearchClient != null) { - String[] fields = { - "facets.sql.query", - "facets.sourceCode.sourceCode", - "facets.sourceCode.language", - "runFacets.processing_engine.name", - "run_id", - "name", - "namespace", - "type" - }; - SearchResponse response = - this.openSearchClient.search( - s -> { - s.index("jobs") - .query( - q -> - q.multiMatch( - m -> - m.query(query) - .type(TextQueryType.PhrasePrefix) - .fields(Arrays.stream(fields).toList()) - .operator(Operator.Or))); - s.highlight( - hl -> { - for (String field : fields) { - hl.fields(field, f -> f.type(HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); - } - return hl; - }); - return s; - }, - ObjectNode.class); - - return formatEsResponse(response); - } else { - return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); - } + return formatEsResponse(this.searchService.searchJobs(query)); } @Timed @@ -141,43 +101,7 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc @Produces(APPLICATION_JSON) @Path("/datasets") public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { - if (this.openSearchClient != null) { - String[] fields = { - "run_id", - "name", - "namespace", - "facets.schema.fields.name", - "facets.schema.fields.type", - "facets.columnLineage.fields.*.inputFields.name", - "facets.columnLineage.fields.*.inputFields.namespace", - "facets.columnLineage.fields.*.inputFields.field", - "facets.columnLineage.fields.*.transformationDescription", - "facets.columnLineage.fields.*.transformationType" - }; - SearchResponse response = - this.openSearchClient.search( - s -> - s.index("datasets") - .query( - q -> - q.multiMatch( - m -> m.query(query) - .type(TextQueryType.PhrasePrefix) - .fields(Arrays.stream(fields).toList()) - .operator(Operator.Or))) - .highlight( - hl -> { - for (String field : fields) { - hl.fields(field, f -> f.type(HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); - } - return hl; - }), - ObjectNode.class); - - return formatEsResponse(response); - } else { - return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); - } + return formatEsResponse(this.searchService.searchDatasets(query)); } private Response formatEsResponse(SearchResponse response) { diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java new file mode 100644 index 0000000000..865628119e --- /dev/null +++ b/api/src/main/java/marquez/service/SearchService.java @@ -0,0 +1,185 @@ +package marquez.service; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import marquez.service.models.LineageEvent; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.query_dsl.Operator; +import org.opensearch.client.opensearch._types.query_dsl.TextQueryType; +import org.opensearch.client.opensearch.core.IndexRequest; +import org.opensearch.client.opensearch.core.SearchResponse; +import org.opensearch.client.opensearch.core.search.BuiltinHighlighterType; +import org.opensearch.client.opensearch.core.search.HighlighterType; + +import javax.validation.Valid; +import javax.validation.constraints.NotNull; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.List; + +@Slf4j +public class SearchService { + + private final OpenSearchClient openSearchClient; + + public SearchService(@NonNull final OpenSearchClient openSearchClient) { + this.openSearchClient = openSearchClient; + } + + public SearchResponse searchDatasets(String query) throws IOException { + String[] fields = { + "run_id", + "name", + "namespace", + "facets.schema.fields.name", + "facets.schema.fields.type", + "facets.columnLineage.fields.*.inputFields.name", + "facets.columnLineage.fields.*.inputFields.namespace", + "facets.columnLineage.fields.*.inputFields.field", + "facets.columnLineage.fields.*.transformationDescription", + "facets.columnLineage.fields.*.transformationType" + }; + return this.openSearchClient.search( + s -> + s.index("datasets") + .query( + q -> + q.multiMatch( + m -> m.query(query) + .type(TextQueryType.PhrasePrefix) + .fields(Arrays.stream(fields).toList()) + .operator(Operator.Or))) + .highlight( + hl -> { + for (String field : fields) { + hl.fields(field, f -> f.type(HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); + } + return hl; + }), + ObjectNode.class); + } + + public SearchResponse searchJobs(String query) throws IOException { + String[] fields = { + "facets.sql.query", + "facets.sourceCode.sourceCode", + "facets.sourceCode.language", + "runFacets.processing_engine.name", + "run_id", + "name", + "namespace", + "type" + }; + return this.openSearchClient.search( + s -> { + s.index("jobs") + .query( + q -> + q.multiMatch( + m -> + m.query(query) + .type(TextQueryType.PhrasePrefix) + .fields(Arrays.stream(fields).toList()) + .operator(Operator.Or))); + s.highlight( + hl -> { + for (String field : fields) { + hl.fields(field, f -> f.type(HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); + } + return hl; + }); + return s; + }, + ObjectNode.class); + } + + public void indexEvent(@Valid @NotNull LineageEvent event) { + UUID runUuid = runUuidFromEvent(event.getRun()); + log.info("Indexing event {}", event); + + if (event.getInputs() != null) { + indexDatasets(event.getInputs(), runUuid, event); + } + if (event.getOutputs() != null) { + indexDatasets(event.getOutputs(), runUuid, event); + } + indexJob(runUuid, event); + } + + private UUID runUuidFromEvent(LineageEvent.Run run) { + UUID runUuid; + try { + runUuid = UUID.fromString(run.getRunId()); + } catch (Exception e) { + runUuid = UUID.nameUUIDFromBytes(run.getRunId().getBytes(StandardCharsets.UTF_8)); + } + return runUuid; + } + + private Map buildJobIndexRequest(UUID runUuid, LineageEvent event) { + Map jsonMap = new HashMap<>(); + + jsonMap.put("run_id", runUuid.toString()); + jsonMap.put("eventType", event.getEventType()); + jsonMap.put("name", event.getJob().getName()); + jsonMap.put("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH"); + jsonMap.put("namespace", event.getJob().getNamespace()); + jsonMap.put("facets", event.getJob().getFacets()); + jsonMap.put("runFacets", event.getRun().getFacets()); + return jsonMap; + } + + private Map buildDatasetIndexRequest( + UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { + Map jsonMap = new HashMap<>(); + jsonMap.put("run_id", runUuid.toString()); + jsonMap.put("eventType", event.getEventType()); + jsonMap.put("name", dataset.getName()); + jsonMap.put("inputFacets", dataset.getInputFacets()); + jsonMap.put("outputFacets", dataset.getOutputFacets()); + jsonMap.put("namespace", dataset.getNamespace()); + jsonMap.put("facets", dataset.getFacets()); + return jsonMap; + } + + private void indexJob(UUID runUuid, LineageEvent event) { + index( + IndexRequest.of( + i -> + i.index("jobs") + .id( + String.format( + "JOB:%s:%s", event.getJob().getNamespace(), event.getJob().getName())) + .document(buildJobIndexRequest(runUuid, event)))); + } + + private void indexDatasets( + List datasets, UUID runUuid, LineageEvent event) { + datasets.stream() + .map(dataset -> buildDatasetIndexRequest(runUuid, dataset, event)) + .forEach( + jsonMap -> index( + IndexRequest.of( + i -> + i.index("datasets") + .id( + String.format( + "DATASET:%s:%s", + jsonMap.get("namespace"), jsonMap.get("name"))) + .document(jsonMap)))); + } + + private void index(IndexRequest> request) { + try { + this.openSearchClient.index(request); + } catch (IOException e) { + log.info("Failed to index event OpenSearch not available.", e); + } + } + +} diff --git a/api/src/main/java/marquez/service/ServiceFactory.java b/api/src/main/java/marquez/service/ServiceFactory.java index 37169f4c2a..86d067bf0f 100644 --- a/api/src/main/java/marquez/service/ServiceFactory.java +++ b/api/src/main/java/marquez/service/ServiceFactory.java @@ -23,4 +23,5 @@ public class ServiceFactory { @NonNull DatasetFieldService datasetFieldService; @NonNull LineageService lineageService; @NonNull ColumnLineageService columnLineageService; + @NonNull SearchService searchService; } diff --git a/api/src/test/java/marquez/api/OpenLineageResourceTest.java b/api/src/test/java/marquez/api/OpenLineageResourceTest.java index aa982e5e14..776c43a452 100644 --- a/api/src/test/java/marquez/api/OpenLineageResourceTest.java +++ b/api/src/test/java/marquez/api/OpenLineageResourceTest.java @@ -13,7 +13,6 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import co.elastic.clients.elasticsearch.ElasticsearchClient; import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.ImmutableSortedSet; import io.dropwizard.testing.junit5.DropwizardExtensionsSupport; @@ -30,7 +29,6 @@ import marquez.service.models.NodeId; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.opensearch.client.opensearch.OpenSearchClient; @ExtendWith(DropwizardExtensionsSupport.class) class OpenLineageResourceTest { @@ -58,7 +56,7 @@ class OpenLineageResourceTest { ResourceExtension.builder() .addResource( new OpenLineageResource( - serviceFactory, new OpenSearchClient(null), openLineageDao)) + serviceFactory, openLineageDao)) .build(); } From 843baaf7f47da5252e09d394cae36475fc93107a Mon Sep 17 00:00:00 2001 From: phixMe Date: Wed, 10 Jul 2024 16:14:23 -0700 Subject: [PATCH 35/87] Adding config to support multiple instances. --- .../java/marquez/search/SearchConfig.java | 2 +- docker-compose.yml | 57 ++++++++++++++++++- marquez.dev.yml | 2 +- .../components/search/es-search/EsSearch.tsx | 1 + 4 files changed, 58 insertions(+), 4 deletions(-) diff --git a/api/src/main/java/marquez/search/SearchConfig.java b/api/src/main/java/marquez/search/SearchConfig.java index b15c6e6c75..427a76701d 100644 --- a/api/src/main/java/marquez/search/SearchConfig.java +++ b/api/src/main/java/marquez/search/SearchConfig.java @@ -6,7 +6,7 @@ public class SearchConfig { public static final boolean ENABLED = false; public static final String SCHEME = "http"; - public static final String HOST = "marquez-opensearch"; + public static final String HOST = "opensearch"; public static final int PORT = 9200; public static final String USERNAME = "admin"; public static final String PASSWORD = "admin"; diff --git a/docker-compose.yml b/docker-compose.yml index abb61ba045..874390b115 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,6 +25,8 @@ services: - opensearch:${SEARCH_PORT} - -- - ./entrypoint.sh + networks: + - opensearch-net db: image: postgres:14 @@ -44,10 +46,13 @@ services: command: ["postgres", "-c", "config_file=/etc/postgresql/postgresql.conf"] # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT) # command: ["postgres", "-c", "log_statement=all"] + networks: + - opensearch-net opensearch: image: opensearchproject/opensearch:2.5.0 - container_name: marquez-opensearch + container_name: opensearch + hostname: opensearch ulimits: memlock: soft: -1 @@ -56,8 +61,13 @@ services: soft: 65536 hard: 65536 environment: + - cluster.name=opensearch-cluster + - node.name=opensearch + - discovery.seed_hosts=opensearch,opensearch-2 + - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 + - bootstrap.memory_lock=true - plugins.security.ssl.http.enabled=false - - discovery.type=single-node + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" - OPENSEARCH_PASSWORD=admin volumes: - opensearch-data:/usr/share/opensearch/data @@ -66,10 +76,53 @@ services: ports: - "9200:9200" - "9300:9300" + networks: + - opensearch-net + + opensearch-2: + image: opensearchproject/opensearch:2.5.0 + container_name: opensearch-2 + environment: + - cluster.name=opensearch-cluster + - node.name=opensearch-2 + - discovery.seed_hosts=opensearch,opensearch-2 + - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 + - bootstrap.memory_lock=true + - plugins.security.ssl.http.enabled=false + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + - OPENSEARCH_PASSWORD=admin + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - opensearch-2-data:/usr/share/opensearch/data + networks: + - opensearch-net + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:2.5.0 + container_name: opensearch-dashboards + ports: + - 5601:5601 # Map host port 5601 to container port 5601 + expose: + - "5601" # Expose port 5601 for web access to OpenSearch Dashboards + environment: + - 'OPENSEARCH_HOSTS=["http://opensearch:9200","http://opensearch-2:9200"]' + - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards + networks: + - opensearch-net volumes: data: opensearch-data: + opensearch-2-data: db-conf: db-init: db-backup: + +networks: + opensearch-net: diff --git a/marquez.dev.yml b/marquez.dev.yml index 1ec5770bd0..78fcbb3d64 100644 --- a/marquez.dev.yml +++ b/marquez.dev.yml @@ -26,7 +26,7 @@ logging: search: enabled: true scheme: http - host: marquez-opensearch + host: opensearch port: 9200 username: admin password: admin diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 27eb2b1810..30a9888819 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -363,6 +363,7 @@ const EsSearch: React.FC = ({ key={field.name} label={field.name} variant={'outlined'} + color={field.name.toLowerCase().includes(search.toLowerCase()) ? 'primary' : 'default'} size={'small'} sx={{ mr: 1 }} /> From 4faba34558bbe00f0a9ce06883cb74a9aa06944a Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 12 Jul 2024 11:42:16 -0700 Subject: [PATCH 36/87] Spotless --- api/src/main/java/marquez/MarquezApp.java | 29 +- api/src/main/java/marquez/MarquezContext.java | 18 +- .../java/marquez/api/OpenLineageResource.java | 4 +- .../main/java/marquez/api/SearchResource.java | 6 +- .../java/marquez/service/SearchService.java | 333 +++++++++--------- .../marquez/api/OpenLineageResourceTest.java | 4 +- 6 files changed, 210 insertions(+), 184 deletions(-) diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 1360dbfc1e..3fd1785368 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -22,7 +22,6 @@ import io.prometheus.client.exporter.MetricsServlet; import io.prometheus.client.hotspot.DefaultExports; import io.sentry.Sentry; - import java.io.IOException; import java.util.EnumSet; import javax.servlet.DispatcherType; @@ -166,11 +165,17 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { } private OpenSearchClient newOpenSearchClient(SearchConfig searchConfig) { - final HttpHost host = new HttpHost(searchConfig.getHost(), searchConfig.getPort(), searchConfig.getScheme()); + final HttpHost host = + new HttpHost(searchConfig.getHost(), searchConfig.getPort(), searchConfig.getScheme()); final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); - credentialsProvider.setCredentials(new AuthScope(host), new UsernamePasswordCredentials(searchConfig.getUsername(), searchConfig.getPassword())); - final RestClient restClient = RestClient.builder(host) - .setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) + credentialsProvider.setCredentials( + new AuthScope(host), + new UsernamePasswordCredentials(searchConfig.getUsername(), searchConfig.getPassword())); + final RestClient restClient = + RestClient.builder(host) + .setHttpClientConfigCallback( + httpClientBuilder -> + httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) .build(); JacksonJsonpMapper jsonpMapper = new JacksonJsonpMapper(); @@ -178,13 +183,13 @@ private OpenSearchClient newOpenSearchClient(SearchConfig searchConfig) { jsonpMapper.objectMapper().registerModule(new JavaTimeModule()); final OpenSearchTransport transport = new RestClientTransport(restClient, jsonpMapper); OpenSearchClient openSearchClient = new OpenSearchClient(transport); - BooleanResponse booleanResponse; - try { - booleanResponse = openSearchClient.ping(); - log.info("OpenSearch Active: {}", booleanResponse.value()); - } catch (IOException e) { - log.warn("Search not configured"); - } + BooleanResponse booleanResponse; + try { + booleanResponse = openSearchClient.ping(); + log.info("OpenSearch Active: {}", booleanResponse.value()); + } catch (IOException e) { + log.warn("Search not configured"); + } return openSearchClient; } diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index a95f78425f..bef6eddb85 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -43,7 +43,20 @@ import marquez.db.TagDao; import marquez.graphql.GraphqlSchemaBuilder; import marquez.graphql.MarquezGraphqlServletBuilder; -import marquez.service.*; +import marquez.service.ColumnLineageService; +import marquez.service.DatasetFieldService; +import marquez.service.DatasetService; +import marquez.service.DatasetVersionService; +import marquez.service.JobService; +import marquez.service.LineageService; +import marquez.service.NamespaceService; +import marquez.service.OpenLineageService; +import marquez.service.RunService; +import marquez.service.RunTransitionListener; +import marquez.service.SearchService; +import marquez.service.ServiceFactory; +import marquez.service.SourceService; +import marquez.service.TagService; import marquez.service.models.Tag; import org.jdbi.v3.core.Jdbi; import org.opensearch.client.opensearch.OpenSearchClient; @@ -158,8 +171,7 @@ private MarquezContext( this.columnLineageResource = new ColumnLineageResource(serviceFactory); this.jobResource = new JobResource(serviceFactory, jobVersionDao, jobFacetsDao, runFacetsDao); this.tagResource = new TagResource(serviceFactory); - this.openLineageResource = - new OpenLineageResource(serviceFactory, openLineageDao); + this.openLineageResource = new OpenLineageResource(serviceFactory, openLineageDao); this.searchResource = new SearchResource(serviceFactory, searchDao, openSearchClient); this.resources = diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index b7f8f9e233..f5686c3941 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -44,6 +44,7 @@ import marquez.service.models.BaseEvent; import marquez.service.models.LineageEvent; import marquez.service.models.NodeId; + @Slf4j @Path("/api/v1") public class OpenLineageResource extends BaseResource { @@ -52,8 +53,7 @@ public class OpenLineageResource extends BaseResource { private final OpenLineageDao openLineageDao; public OpenLineageResource( - @NonNull final ServiceFactory serviceFactory, - @NonNull final OpenLineageDao openLineageDao) { + @NonNull final ServiceFactory serviceFactory, @NonNull final OpenLineageDao openLineageDao) { super(serviceFactory); this.openLineageDao = openLineageDao; } diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 54727e67b3..1eda85fa8c 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -54,7 +54,9 @@ public class SearchResource { private final SearchDao searchDao; public SearchResource( - @NonNull final ServiceFactory serviceFactory, @NonNull final SearchDao searchDao, @Nullable final OpenSearchClient openSearchClient) { + @NonNull final ServiceFactory serviceFactory, + @NonNull final SearchDao searchDao, + @Nullable final OpenSearchClient openSearchClient) { this.searchService = serviceFactory.getSearchService(); this.searchDao = searchDao; } @@ -91,7 +93,7 @@ public Response search( @Produces(APPLICATION_JSON) @Path("/jobs") public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { - return formatEsResponse(this.searchService.searchJobs(query)); + return formatEsResponse(this.searchService.searchJobs(query)); } @Timed diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index 865628119e..4a88c2dee7 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -1,6 +1,15 @@ package marquez.service; import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import javax.validation.Valid; +import javax.validation.constraints.NotNull; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import marquez.service.models.LineageEvent; @@ -12,174 +21,174 @@ import org.opensearch.client.opensearch.core.search.BuiltinHighlighterType; import org.opensearch.client.opensearch.core.search.HighlighterType; -import javax.validation.Valid; -import javax.validation.constraints.NotNull; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.UUID; -import java.util.List; - @Slf4j public class SearchService { - private final OpenSearchClient openSearchClient; - - public SearchService(@NonNull final OpenSearchClient openSearchClient) { - this.openSearchClient = openSearchClient; - } - - public SearchResponse searchDatasets(String query) throws IOException { - String[] fields = { - "run_id", - "name", - "namespace", - "facets.schema.fields.name", - "facets.schema.fields.type", - "facets.columnLineage.fields.*.inputFields.name", - "facets.columnLineage.fields.*.inputFields.namespace", - "facets.columnLineage.fields.*.inputFields.field", - "facets.columnLineage.fields.*.transformationDescription", - "facets.columnLineage.fields.*.transformationType" - }; - return this.openSearchClient.search( - s -> - s.index("datasets") - .query( - q -> - q.multiMatch( - m -> m.query(query) - .type(TextQueryType.PhrasePrefix) - .fields(Arrays.stream(fields).toList()) - .operator(Operator.Or))) - .highlight( - hl -> { - for (String field : fields) { - hl.fields(field, f -> f.type(HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); - } - return hl; - }), - ObjectNode.class); - } - - public SearchResponse searchJobs(String query) throws IOException { - String[] fields = { - "facets.sql.query", - "facets.sourceCode.sourceCode", - "facets.sourceCode.language", - "runFacets.processing_engine.name", - "run_id", - "name", - "namespace", - "type" - }; - return this.openSearchClient.search( - s -> { - s.index("jobs") - .query( - q -> - q.multiMatch( - m -> - m.query(query) - .type(TextQueryType.PhrasePrefix) - .fields(Arrays.stream(fields).toList()) - .operator(Operator.Or))); - s.highlight( - hl -> { - for (String field : fields) { - hl.fields(field, f -> f.type(HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); - } - return hl; - }); - return s; - }, - ObjectNode.class); + private final OpenSearchClient openSearchClient; + + public SearchService(@NonNull final OpenSearchClient openSearchClient) { + this.openSearchClient = openSearchClient; + } + + public SearchResponse searchDatasets(String query) throws IOException { + String[] fields = { + "run_id", + "name", + "namespace", + "facets.schema.fields.name", + "facets.schema.fields.type", + "facets.columnLineage.fields.*.inputFields.name", + "facets.columnLineage.fields.*.inputFields.namespace", + "facets.columnLineage.fields.*.inputFields.field", + "facets.columnLineage.fields.*.transformationDescription", + "facets.columnLineage.fields.*.transformationType" + }; + return this.openSearchClient.search( + s -> + s.index("datasets") + .query( + q -> + q.multiMatch( + m -> + m.query(query) + .type(TextQueryType.PhrasePrefix) + .fields(Arrays.stream(fields).toList()) + .operator(Operator.Or))) + .highlight( + hl -> { + for (String field : fields) { + hl.fields( + field, + f -> + f.type( + HighlighterType.of( + fn -> fn.builtin(BuiltinHighlighterType.Plain)))); + } + return hl; + }), + ObjectNode.class); + } + + public SearchResponse searchJobs(String query) throws IOException { + String[] fields = { + "facets.sql.query", + "facets.sourceCode.sourceCode", + "facets.sourceCode.language", + "runFacets.processing_engine.name", + "run_id", + "name", + "namespace", + "type" + }; + return this.openSearchClient.search( + s -> { + s.index("jobs") + .query( + q -> + q.multiMatch( + m -> + m.query(query) + .type(TextQueryType.PhrasePrefix) + .fields(Arrays.stream(fields).toList()) + .operator(Operator.Or))); + s.highlight( + hl -> { + for (String field : fields) { + hl.fields( + field, + f -> + f.type( + HighlighterType.of(fn -> fn.builtin(BuiltinHighlighterType.Plain)))); + } + return hl; + }); + return s; + }, + ObjectNode.class); + } + + public void indexEvent(@Valid @NotNull LineageEvent event) { + UUID runUuid = runUuidFromEvent(event.getRun()); + log.info("Indexing event {}", event); + + if (event.getInputs() != null) { + indexDatasets(event.getInputs(), runUuid, event); } - - public void indexEvent(@Valid @NotNull LineageEvent event) { - UUID runUuid = runUuidFromEvent(event.getRun()); - log.info("Indexing event {}", event); - - if (event.getInputs() != null) { - indexDatasets(event.getInputs(), runUuid, event); - } - if (event.getOutputs() != null) { - indexDatasets(event.getOutputs(), runUuid, event); - } - indexJob(runUuid, event); + if (event.getOutputs() != null) { + indexDatasets(event.getOutputs(), runUuid, event); } - - private UUID runUuidFromEvent(LineageEvent.Run run) { - UUID runUuid; - try { - runUuid = UUID.fromString(run.getRunId()); - } catch (Exception e) { - runUuid = UUID.nameUUIDFromBytes(run.getRunId().getBytes(StandardCharsets.UTF_8)); - } - return runUuid; + indexJob(runUuid, event); + } + + private UUID runUuidFromEvent(LineageEvent.Run run) { + UUID runUuid; + try { + runUuid = UUID.fromString(run.getRunId()); + } catch (Exception e) { + runUuid = UUID.nameUUIDFromBytes(run.getRunId().getBytes(StandardCharsets.UTF_8)); } - - private Map buildJobIndexRequest(UUID runUuid, LineageEvent event) { - Map jsonMap = new HashMap<>(); - - jsonMap.put("run_id", runUuid.toString()); - jsonMap.put("eventType", event.getEventType()); - jsonMap.put("name", event.getJob().getName()); - jsonMap.put("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH"); - jsonMap.put("namespace", event.getJob().getNamespace()); - jsonMap.put("facets", event.getJob().getFacets()); - jsonMap.put("runFacets", event.getRun().getFacets()); - return jsonMap; - } - - private Map buildDatasetIndexRequest( - UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { - Map jsonMap = new HashMap<>(); - jsonMap.put("run_id", runUuid.toString()); - jsonMap.put("eventType", event.getEventType()); - jsonMap.put("name", dataset.getName()); - jsonMap.put("inputFacets", dataset.getInputFacets()); - jsonMap.put("outputFacets", dataset.getOutputFacets()); - jsonMap.put("namespace", dataset.getNamespace()); - jsonMap.put("facets", dataset.getFacets()); - return jsonMap; - } - - private void indexJob(UUID runUuid, LineageEvent event) { - index( - IndexRequest.of( + return runUuid; + } + + private Map buildJobIndexRequest(UUID runUuid, LineageEvent event) { + Map jsonMap = new HashMap<>(); + + jsonMap.put("run_id", runUuid.toString()); + jsonMap.put("eventType", event.getEventType()); + jsonMap.put("name", event.getJob().getName()); + jsonMap.put("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH"); + jsonMap.put("namespace", event.getJob().getNamespace()); + jsonMap.put("facets", event.getJob().getFacets()); + jsonMap.put("runFacets", event.getRun().getFacets()); + return jsonMap; + } + + private Map buildDatasetIndexRequest( + UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { + Map jsonMap = new HashMap<>(); + jsonMap.put("run_id", runUuid.toString()); + jsonMap.put("eventType", event.getEventType()); + jsonMap.put("name", dataset.getName()); + jsonMap.put("inputFacets", dataset.getInputFacets()); + jsonMap.put("outputFacets", dataset.getOutputFacets()); + jsonMap.put("namespace", dataset.getNamespace()); + jsonMap.put("facets", dataset.getFacets()); + return jsonMap; + } + + private void indexJob(UUID runUuid, LineageEvent event) { + index( + IndexRequest.of( + i -> + i.index("jobs") + .id( + String.format( + "JOB:%s:%s", event.getJob().getNamespace(), event.getJob().getName())) + .document(buildJobIndexRequest(runUuid, event)))); + } + + private void indexDatasets( + List datasets, UUID runUuid, LineageEvent event) { + datasets.stream() + .map(dataset -> buildDatasetIndexRequest(runUuid, dataset, event)) + .forEach( + jsonMap -> + index( + IndexRequest.of( i -> - i.index("jobs") - .id( - String.format( - "JOB:%s:%s", event.getJob().getNamespace(), event.getJob().getName())) - .document(buildJobIndexRequest(runUuid, event)))); + i.index("datasets") + .id( + String.format( + "DATASET:%s:%s", + jsonMap.get("namespace"), jsonMap.get("name"))) + .document(jsonMap)))); + } + + private void index(IndexRequest> request) { + try { + this.openSearchClient.index(request); + } catch (IOException e) { + log.info("Failed to index event OpenSearch not available.", e); } - - private void indexDatasets( - List datasets, UUID runUuid, LineageEvent event) { - datasets.stream() - .map(dataset -> buildDatasetIndexRequest(runUuid, dataset, event)) - .forEach( - jsonMap -> index( - IndexRequest.of( - i -> - i.index("datasets") - .id( - String.format( - "DATASET:%s:%s", - jsonMap.get("namespace"), jsonMap.get("name"))) - .document(jsonMap)))); - } - - private void index(IndexRequest> request) { - try { - this.openSearchClient.index(request); - } catch (IOException e) { - log.info("Failed to index event OpenSearch not available.", e); - } - } - + } } diff --git a/api/src/test/java/marquez/api/OpenLineageResourceTest.java b/api/src/test/java/marquez/api/OpenLineageResourceTest.java index 776c43a452..9174e520cb 100644 --- a/api/src/test/java/marquez/api/OpenLineageResourceTest.java +++ b/api/src/test/java/marquez/api/OpenLineageResourceTest.java @@ -54,9 +54,7 @@ class OpenLineageResourceTest { UNDER_TEST = ResourceExtension.builder() - .addResource( - new OpenLineageResource( - serviceFactory, openLineageDao)) + .addResource(new OpenLineageResource(serviceFactory, openLineageDao)) .build(); } From df0bc84d1d86ac930cf818d75d0da5c33b5aabac Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 12 Jul 2024 12:38:53 -0700 Subject: [PATCH 37/87] Adding helm files. --- chart/Chart.yaml | 4 ++++ chart/README.md | 22 ++++++++++++---------- chart/values.yaml | 7 +++++++ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 9a8479a12f..e1753c2228 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -10,6 +10,10 @@ dependencies: name: postgresql repository: https://charts.bitnami.com/bitnami version: 11.8.1 + - condition: opensearch.enabled + name: opensearch + repository: https://opensearch-project.github.io/helm-charts + version: 2.21.0 description: Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata. home: https://github.com/MarquezProject/marquez/tree/main/chart icon: https://raw.githubusercontent.com/MarquezProject/marquez/main/web/src/img/marquez-logo.png diff --git a/chart/README.md b/chart/README.md index d36f273371..d7127e7291 100644 --- a/chart/README.md +++ b/chart/README.md @@ -19,7 +19,7 @@ helm install marquez . --dependency-update To install the chart with the release name `marquez` using a fresh Postgres instance. ```bash -helm install marquez . --dependency-update --set postgresql.enabled=true +helm install marquez . --dependency-update --set postgresql.enabled=true --set opensearch.enabled=true ``` > **Note:** For a list of parameters that can be overridden during installation, see the [configuration](#configuration) section. @@ -84,14 +84,16 @@ helm delete marquez ### [Postgres](https://github.com/bitnami/charts/blob/master/bitnami/postgresql/values.yaml) (sub-chart) **parameters** -| Parameter | Description | Default | -|----------------------------------|---------------------------------|-----------| -| `postgresql.enabled` | Deploy PostgreSQL container(s) | `false` | -| `postgresql.image.tag` | PostgreSQL image version | `12.1.0` | -| `postgresql.auth.username` | PostgreSQL username | `buendia` | -| `postgresql.auth.password` | PostgreSQL password | `macondo` | -| `postgresql.auth.database` | PostgreSQL database | `marquez` | -| `postgresql.auth.existingSecret` | Name of existing secret object | `nil` | +| Parameter | Description | Default | +|----------------------------------|--------------------------------|-----------| +| `postgresql.enabled` | Deploy PostgreSQL container(s) | `false` | +| `opensearch.enabled` | Deploy Opensearch container(s) | `false` | +| `postgresql.image.tag` | PostgreSQL image version | `12.1.0` | +| `postgresql.auth.username` | PostgreSQL username | `buendia` | +| `postgresql.auth.password` | PostgreSQL password | `macondo` | +| `postgresql.auth.database` | PostgreSQL database | `marquez` | +| `opensearch.auth.database` | OpenSearch password | `marquez` | +| `postgresql.auth.existingSecret` | Name of existing secret object | `nil` | ### Common **parameters** @@ -128,7 +130,7 @@ helm delete marquez The quickest way to install Marquez via Kubernetes is to create a local Postgres instance. ```bash -helm install marquez . --dependency-update --set postgresql.enabled=true +helm install marquez . --dependency-update --set postgresql.enabled=true --set opensearch.enabled=true ``` ### Docker Postgres diff --git a/chart/values.yaml b/chart/values.yaml index 3e6eee6606..c1cbf66712 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -139,6 +139,13 @@ postgresql: ## existingSecret: "" +opensearch: + enabled: false + ## Opensearch password + extraEnvs: + - name: OPENSEARCH_INITIAL_ADMIN_PASSWORD + value: admin + ## Additional labels to all the deployed resources; note that ## the following standard labels will automatically be applied. ## app.kubernetes.io/name, helm.sh/chart, From bf459efa1d18c9ad9cca8ed1e476007132032273 Mon Sep 17 00:00:00 2001 From: phixMe Date: Mon, 15 Jul 2024 16:05:45 -0700 Subject: [PATCH 38/87] Adding in stronger password for search. --- chart/Chart.lock | 7 +++++-- chart/templates/marquez/deployment.yaml | 2 ++ chart/values.yaml | 2 +- marquez.dev.yml | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/chart/Chart.lock b/chart/Chart.lock index 58554a821c..0b5dcb679d 100644 --- a/chart/Chart.lock +++ b/chart/Chart.lock @@ -5,5 +5,8 @@ dependencies: - name: postgresql repository: https://charts.bitnami.com/bitnami version: 11.8.1 -digest: sha256:5d4b20341df7c1d2a1e1e16a9e3248a5e4eabf765b307bb05acf13447ff51ae5 -generated: "2022-11-10T20:03:21.425592157Z" +- name: opensearch + repository: https://opensearch-project.github.io/helm-charts + version: 2.21.0 +digest: sha256:b7f71608c49ded8cd4ea7658d253f6b36f120e3a934281b9c70657115d7cdf9d +generated: "2024-07-15T13:21:32.634905-07:00" diff --git a/chart/templates/marquez/deployment.yaml b/chart/templates/marquez/deployment.yaml index 9fc5bbc8f1..a327c879ed 100644 --- a/chart/templates/marquez/deployment.yaml +++ b/chart/templates/marquez/deployment.yaml @@ -91,6 +91,8 @@ spec: key: {{ include "marquez.database.existingsecret.key" . }} - name: MIGRATE_ON_STARTUP value: {{ .Values.marquez.migrateOnStartup | quote }} + - name: SEARCH_PASSWORD + value: = {{ "Marquez4Ever!" }} {{- if .Values.marquez.resources }} resources: {{- toYaml .Values.marquez.resources | nindent 12 }} {{- end }} diff --git a/chart/values.yaml b/chart/values.yaml index c1cbf66712..9877250506 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -144,7 +144,7 @@ opensearch: ## Opensearch password extraEnvs: - name: OPENSEARCH_INITIAL_ADMIN_PASSWORD - value: admin + value: Marquez4Ever! ## Additional labels to all the deployed resources; note that ## the following standard labels will automatically be applied. diff --git a/marquez.dev.yml b/marquez.dev.yml index 78fcbb3d64..77a787216f 100644 --- a/marquez.dev.yml +++ b/marquez.dev.yml @@ -29,7 +29,7 @@ search: host: opensearch port: 9200 username: admin - password: admin + password: ${SEARCH_PASSWORD:-admin} tags: - name: PII From 5d40b2e489c4c63bc3f776e47662a6520cb6cc27 Mon Sep 17 00:00:00 2001 From: phixMe Date: Sun, 21 Jul 2024 18:00:15 -0700 Subject: [PATCH 39/87] Handling debouncing. --- .../components/search/es-search/EsSearch.tsx | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/es-search/EsSearch.tsx index 30a9888819..bfce77db94 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/es-search/EsSearch.tsx @@ -62,10 +62,6 @@ function parseStringToSegments(input: string): TextSegment[] { }) } -// function getValueAfterLastPeriod(s: string) { -// return s.split('.').pop() -// } - const useArrowKeys = (callback: (key: 'up' | 'down' | 'enter') => void) => { useEffect(() => { const handleKeyDown = (event: KeyboardEvent) => { @@ -88,7 +84,7 @@ const useArrowKeys = (callback: (key: 'up' | 'down' | 'enter') => void) => { } const FIELDS_TO_PRINT = 5 -const DEBOUNCE_TIME_MS = 500 +const DEBOUNCE_TIME_MS = 200 const EsSearch: React.FC = ({ search, @@ -98,6 +94,7 @@ const EsSearch: React.FC = ({ esSearchDatasets, }) => { const [selectedIndex, setSelectedIndex] = React.useState>(null) + const [isDebouncing, setIsDebouncing] = React.useState(true) const navigate = useNavigate() useArrowKeys((key) => { @@ -124,16 +121,24 @@ const EsSearch: React.FC = ({ }) const debouncedFetchJobs = useCallback( - debounce((searchTerm) => fetchEsSearchJobs(searchTerm), DEBOUNCE_TIME_MS), + debounce(async (searchTerm) => { + fetchEsSearchJobs(searchTerm); + setIsDebouncing(false); // Set loading to false after the fetch completes + }, DEBOUNCE_TIME_MS), [] - ) + ); + const debouncedFetchDatasets = useCallback( - debounce((searchTerm) => fetchEsSearchDatasets(searchTerm), DEBOUNCE_TIME_MS), + debounce(async (searchTerm) => { + fetchEsSearchDatasets(searchTerm); + setIsDebouncing(false); // Set loading to false after the fetch completes + }, DEBOUNCE_TIME_MS), [] - ) + ); useEffect(() => { + setIsDebouncing(true) debouncedFetchJobs(search) debouncedFetchDatasets(search) }, [search, debouncedFetchJobs, debouncedFetchDatasets]) @@ -142,7 +147,7 @@ const EsSearch: React.FC = ({ setSelectedIndex(null) }, [esSearchJobs.data.hits, esSearchDatasets.data.hits]) - if (esSearchJobs.data.hits.length === 0 && esSearchDatasets.data.hits.length === 0) { + if (esSearchJobs.data.hits.length === 0 && esSearchDatasets.data.hits.length === 0 && !isDebouncing) { return ( From 085a3ef5f07b5d30c6a7b302369818aae6984751 Mon Sep 17 00:00:00 2001 From: phixMe Date: Mon, 29 Jul 2024 12:28:17 -0700 Subject: [PATCH 40/87] Adding "ADVANCED_SEARCH" configurable variable for web. --- docker-compose.web.yml | 1 + examples/airflow/docker-compose.yml | 1 + web/src/components/search/Search.tsx | 5 ++--- web/src/globals.ts | 2 ++ web/webpack.dev.js | 1 + web/webpack.prod.js | 1 + 6 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docker-compose.web.yml b/docker-compose.web.yml index 72ddd08706..7e9c168796 100644 --- a/docker-compose.web.yml +++ b/docker-compose.web.yml @@ -6,6 +6,7 @@ services: environment: - MARQUEZ_HOST=api - MARQUEZ_PORT=${API_PORT} + - REACT_APP_ADVANCED_SEARCH=false ports: - "${WEB_PORT}:${WEB_PORT}" depends_on: diff --git a/examples/airflow/docker-compose.yml b/examples/airflow/docker-compose.yml index ee7216a41d..49de928f9b 100644 --- a/examples/airflow/docker-compose.yml +++ b/examples/airflow/docker-compose.yml @@ -87,6 +87,7 @@ services: environment: - MARQUEZ_HOST=marquez - MARQUEZ_PORT=5000 + - REACT_APP_ADVANCED_SEARCH=false ports: - "3000:3000" stdin_open: true diff --git a/web/src/components/search/Search.tsx b/web/src/components/search/Search.tsx index 0de3ac208c..fe1ec58d55 100644 --- a/web/src/components/search/Search.tsx +++ b/web/src/components/search/Search.tsx @@ -15,6 +15,7 @@ import EsSearch from './es-search/EsSearch' import IconButton from '@mui/material/IconButton' import React, { useEffect, useRef, useState } from 'react' import SearchPlaceholder from './SearchPlaceholder' +import {REACT_APP_ADVANCED_SEARCH} from "../../globals"; const useCmdKShortcut = (callback: () => void) => { useEffect(() => { @@ -50,8 +51,6 @@ const useEscapeShortcut = (callback: () => void) => { }, [callback]) } -const isAdvancedSearchEnabled = true - interface StateProps { isLoading: boolean } @@ -188,7 +187,7 @@ const Search: React.FC = ({ isLoading }: StateProps) => { overflow={'auto'} maxHeight={`calc(100vh - ${HEADER_HEIGHT}px - 24px)`} > - {isAdvancedSearchEnabled ? ( + {REACT_APP_ADVANCED_SEARCH ? ( ) : ( diff --git a/web/src/globals.ts b/web/src/globals.ts index d3fcc8307b..82d3852d22 100644 --- a/web/src/globals.ts +++ b/web/src/globals.ts @@ -6,6 +6,7 @@ declare const __NODE_ENV__: string declare const __DEVELOPMENT__: boolean declare const __API_URL__: string +declare const __REACT_APP_ADVANCED_SEARCH__: boolean; declare const __FEEDBACK_FORM_URL__: string declare const __API_DOCS_URL__: string @@ -13,3 +14,4 @@ declare const __API_DOCS_URL__: string export const API_URL = __API_URL__ export const FEEDBACK_FORM_URL = __FEEDBACK_FORM_URL__ export const API_DOCS_URL = __API_DOCS_URL__ +export const REACT_APP_ADVANCED_SEARCH = __REACT_APP_ADVANCED_SEARCH__; diff --git a/web/webpack.dev.js b/web/webpack.dev.js index 990c999da4..8f73c6bf1a 100644 --- a/web/webpack.dev.js +++ b/web/webpack.dev.js @@ -41,6 +41,7 @@ const webpackDev = { plugins: [ new webpack.DefinePlugin({ __DEVELOPMENT__: JSON.stringify(true), + __REACT_APP_ADVANCED_SEARCH__: JSON.stringify(process.env.REACT_APP_ADVANCED_SEARCH || true), __API_URL__: JSON.stringify('/api/v1'), __NODE_ENV__: JSON.stringify('development'), __TEMP_ACTOR_STR__: JSON.stringify('me'), diff --git a/web/webpack.prod.js b/web/webpack.prod.js index bf3d994577..0a37d03bc3 100644 --- a/web/webpack.prod.js +++ b/web/webpack.prod.js @@ -25,6 +25,7 @@ const webpackProd = { new webpack.DefinePlugin({ __DEVELOPMENT__: JSON.stringify(false), __NODE_ENV__: JSON.stringify('production'), + __REACT_APP_ADVANCED_SEARCH__: JSON.stringify(process.env.REACT_APP_ADVANCED_SEARCH || true), __API_URL__: JSON.stringify('/api/v1'), __TEMP_ACTOR_STR__: JSON.stringify('me'), __ROLLBAR__: JSON.stringify(true), From 635c27bc1f29630c68fd32606c7c5a6a57700c46 Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 2 Aug 2024 14:15:13 -0700 Subject: [PATCH 41/87] Fixing some tests. --- api/src/test/java/marquez/api/ApiTestUtils.java | 3 +++ web/jest.config.js | 1 + web/src/globals.ts | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/api/src/test/java/marquez/api/ApiTestUtils.java b/api/src/test/java/marquez/api/ApiTestUtils.java index 30071dffa3..3cf2c2c0b3 100644 --- a/api/src/test/java/marquez/api/ApiTestUtils.java +++ b/api/src/test/java/marquez/api/ApiTestUtils.java @@ -17,6 +17,7 @@ import marquez.service.NamespaceService; import marquez.service.OpenLineageService; import marquez.service.RunService; +import marquez.service.SearchService; import marquez.service.ServiceFactory; import marquez.service.SourceService; import marquez.service.TagService; @@ -57,6 +58,8 @@ public static ServiceFactory mockServiceFactory(Map mocks) { (SourceService) mocks.getOrDefault(SourceService.class, (mock(SourceService.class)))) .datasetService( (DatasetService) mocks.getOrDefault(DatasetService.class, (mock(DatasetService.class)))) + .searchService( + (SearchService) mocks.getOrDefault(SearchService.class, (mock(SearchService.class)))) .build(); } } diff --git a/web/jest.config.js b/web/jest.config.js index a27517a4f3..67e1e828ee 100644 --- a/web/jest.config.js +++ b/web/jest.config.js @@ -17,6 +17,7 @@ module.exports = { globals: { __API_URL__: '/api/v1', __FEEDBACK_FORM_URL__: 'https://forms.gle/f3tTSrZ8wPj3sHTA7', + __REACT_APP_ADVANCED_SEARCH__: true, __API_DOCS_URL__: 'https://marquezproject.github.io/marquez/openapi.html', __TEMP_ACTOR_STR__: 'me' }, diff --git a/web/src/globals.ts b/web/src/globals.ts index 82d3852d22..2c4098b0fb 100644 --- a/web/src/globals.ts +++ b/web/src/globals.ts @@ -6,7 +6,7 @@ declare const __NODE_ENV__: string declare const __DEVELOPMENT__: boolean declare const __API_URL__: string -declare const __REACT_APP_ADVANCED_SEARCH__: boolean; +declare const __REACT_APP_ADVANCED_SEARCH__: boolean declare const __FEEDBACK_FORM_URL__: string declare const __API_DOCS_URL__: string From df446e738c4f096b89a2a01c4ea2c75021acbe62 Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 2 Aug 2024 15:30:58 -0700 Subject: [PATCH 42/87] Moving indexing down a row. --- .../main/java/marquez/api/OpenLineageResource.java | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index f5686c3941..591217e984 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -67,8 +67,8 @@ public OpenLineageResource( @Path("/lineage") public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncResponse asyncResponse) throws JsonProcessingException, SQLException { - serviceFactory.getSearchService().indexEvent((LineageEvent) event); if (event instanceof LineageEvent) { + serviceFactory.getSearchService().indexEvent((LineageEvent) event); openLineageService .createAsync((LineageEvent) event) .whenComplete((result, err) -> onComplete(result, err, asyncResponse)); @@ -80,16 +80,6 @@ public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncRespon } } - private UUID runUuidFromEvent(LineageEvent.Run run) { - UUID runUuid; - try { - runUuid = UUID.fromString(run.getRunId()); - } catch (Exception e) { - runUuid = UUID.nameUUIDFromBytes(run.getRunId().getBytes(StandardCharsets.UTF_8)); - } - return runUuid; - } - private void onComplete(Void result, Throwable err, AsyncResponse asyncResponse) { if (err != null) { log.error("Unexpected error while processing request", err); From a8cee81162febe761ce94bfab8740ebb5717521e Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 2 Aug 2024 15:48:28 -0700 Subject: [PATCH 43/87] Spotless --- api/src/main/java/marquez/api/OpenLineageResource.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index 591217e984..5932ce88a8 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -15,11 +15,9 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import io.dropwizard.jersey.jsr310.ZonedDateTimeParam; -import java.nio.charset.StandardCharsets; import java.sql.SQLException; import java.util.Collections; import java.util.List; -import java.util.UUID; import java.util.concurrent.CompletionException; import javax.validation.Valid; import javax.validation.constraints.Min; From 986cd22191b20173705afbd31cdef05a559c55fe Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 2 Aug 2024 16:13:29 -0700 Subject: [PATCH 44/87] Putting back removed code. --- api/src/main/java/marquez/api/OpenLineageResource.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index 5932ce88a8..764e01df5b 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -40,6 +40,8 @@ import marquez.db.OpenLineageDao; import marquez.service.ServiceFactory; import marquez.service.models.BaseEvent; +import marquez.service.models.DatasetEvent; +import marquez.service.models.JobEvent; import marquez.service.models.LineageEvent; import marquez.service.models.NodeId; @@ -70,6 +72,14 @@ public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncRespon openLineageService .createAsync((LineageEvent) event) .whenComplete((result, err) -> onComplete(result, err, asyncResponse)); + } else if (event instanceof DatasetEvent) { + openLineageService + .createAsync((DatasetEvent) event) + .whenComplete((result, err) -> onComplete(result, err, asyncResponse)); + } else if (event instanceof JobEvent) { + openLineageService + .createAsync((JobEvent) event) + .whenComplete((result, err) -> onComplete(result, err, asyncResponse)); } else { log.warn("Unsupported event type {}. Skipping without error", event.getClass().getName()); From 3db2bdab28d188f93d75d81944da4160d563422d Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 2 Aug 2024 17:40:06 -0700 Subject: [PATCH 45/87] Merge spotless resolution. --- api/src/main/java/marquez/MarquezApp.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 4d3bc49077..09e003000b 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -40,8 +40,8 @@ import marquez.logging.DelegatingSqlLogger; import marquez.logging.LabelledSqlLogger; import marquez.logging.LoggingMdcFilter; -import marquez.service.DatabaseMetrics; import marquez.search.SearchConfig; +import marquez.service.DatabaseMetrics; import marquez.tracing.SentryConfig; import marquez.tracing.TracingContainerResponseFilter; import marquez.tracing.TracingSQLLogger; From 22c0be4eacc1e7c7a3ce3e20964c6f328a12bf41 Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 2 Aug 2024 19:54:39 -0700 Subject: [PATCH 46/87] Skipping over search for db migration tests. --- .circleci/db-migration.sh | 2 ++ docker-compose.search.yml | 68 +++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 67 -------------------------------------- docker/up.sh | 8 +++++ 4 files changed, 78 insertions(+), 67 deletions(-) create mode 100644 docker-compose.search.yml diff --git a/.circleci/db-migration.sh b/.circleci/db-migration.sh index e622a2a299..171f47d7f0 100755 --- a/.circleci/db-migration.sh +++ b/.circleci/db-migration.sh @@ -64,6 +64,7 @@ if ! ./docker/up.sh \ --args "--exit-code-from seed_marquez" \ --tag "${MARQUEZ_VERSION}" \ --no-web \ + --no-search \ --seed > /dev/null; then error "failed to start db using backup!" exit_with_cause @@ -77,6 +78,7 @@ log "start db using backup (marquez=${MARQUEZ_BUILD_VERSION}):" if ! ./docker/up.sh \ --args "--exit-code-from seed_marquez" \ --no-web \ + --no-search \ --no-volumes \ --build \ --seed > /dev/null; then diff --git a/docker-compose.search.yml b/docker-compose.search.yml new file mode 100644 index 0000000000..cdeaf46f23 --- /dev/null +++ b/docker-compose.search.yml @@ -0,0 +1,68 @@ +version: "3.7" +services: + opensearch: + image: opensearchproject/opensearch:2.5.0 + container_name: opensearch + hostname: opensearch + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + environment: + - cluster.name=opensearch-cluster + - node.name=opensearch + - discovery.seed_hosts=opensearch,opensearch-2 + - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 + - bootstrap.memory_lock=true + - plugins.security.ssl.http.enabled=false + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + - OPENSEARCH_PASSWORD=admin + volumes: + - opensearch-data:/usr/share/opensearch/data + cap_add: + - IPC_LOCK + ports: + - "9200:9200" + - "9300:9300" + networks: + - opensearch-net + + opensearch-2: + image: opensearchproject/opensearch:2.5.0 + container_name: opensearch-2 + environment: + - cluster.name=opensearch-cluster + - node.name=opensearch-2 + - discovery.seed_hosts=opensearch,opensearch-2 + - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 + - bootstrap.memory_lock=true + - plugins.security.ssl.http.enabled=false + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + - OPENSEARCH_PASSWORD=admin + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - opensearch-2-data:/usr/share/opensearch/data + networks: + - opensearch-net + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:2.5.0 + container_name: opensearch-dashboards + ports: + - 5601:5601 # Map host port 5601 to container port 5601 + expose: + - "5601" # Expose port 5601 for web access to OpenSearch Dashboards + environment: + - 'OPENSEARCH_HOSTS=["http://opensearch:9200","http://opensearch-2:9200"]' + - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards + networks: + - opensearch-net diff --git a/docker-compose.yml b/docker-compose.yml index 874390b115..fe88b37e8b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,73 +49,6 @@ services: networks: - opensearch-net - opensearch: - image: opensearchproject/opensearch:2.5.0 - container_name: opensearch - hostname: opensearch - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 65536 - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch - - discovery.seed_hosts=opensearch,opensearch-2 - - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 - - bootstrap.memory_lock=true - - plugins.security.ssl.http.enabled=false - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" - - OPENSEARCH_PASSWORD=admin - volumes: - - opensearch-data:/usr/share/opensearch/data - cap_add: - - IPC_LOCK - ports: - - "9200:9200" - - "9300:9300" - networks: - - opensearch-net - - opensearch-2: - image: opensearchproject/opensearch:2.5.0 - container_name: opensearch-2 - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-2 - - discovery.seed_hosts=opensearch,opensearch-2 - - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 - - bootstrap.memory_lock=true - - plugins.security.ssl.http.enabled=false - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" - - OPENSEARCH_PASSWORD=admin - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 65536 - volumes: - - opensearch-2-data:/usr/share/opensearch/data - networks: - - opensearch-net - - opensearch-dashboards: - image: opensearchproject/opensearch-dashboards:2.5.0 - container_name: opensearch-dashboards - ports: - - 5601:5601 # Map host port 5601 to container port 5601 - expose: - - "5601" # Expose port 5601 for web access to OpenSearch Dashboards - environment: - - 'OPENSEARCH_HOSTS=["http://opensearch:9200","http://opensearch-2:9200"]' - - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards - networks: - - opensearch-net - volumes: data: opensearch-data: diff --git a/docker/up.sh b/docker/up.sh index d6e1d69e62..43f4418a8c 100755 --- a/docker/up.sh +++ b/docker/up.sh @@ -49,6 +49,7 @@ usage() { echo " -s, --seed seed HTTP API server with metadata" echo " -d, --detach run in the background" echo " --no-web don't start the web UI" + echo " --no-search don't start search" echo " --no-volumes don't create volumes" echo " -h, --help show help for script" echo @@ -68,6 +69,7 @@ WEB_PORT=3000 POSTGRES_PORT=5432 SEARCH_PORT=9200 NO_WEB="false" +NO_SEARCH="false" NO_VOLUMES="false" TAG="${VERSION}" BUILD="false" @@ -112,6 +114,7 @@ while [ $# -gt 0 ]; do ;; -d|'--detach') DETACH='true' ;; --no-web) NO_WEB='true' ;; + --no-search) NO_SEARCH='true' ;; --no-volumes) NO_VOLUMES='true' ;; -h|'--help') usage @@ -147,6 +150,11 @@ if [[ "${NO_WEB}" = "false" ]]; then [[ "${BUILD}" = "true" ]] && compose_files+=" -f docker-compose.web-dev.yml" fi +# Enable search UI +if [[ "${NO_SEARCH}" = "false" ]]; then + compose_files+=" -f docker-compose.search.yml" +fi + # Create docker volumes for Marquez if [[ "${NO_VOLUMES}" = "false" ]]; then ./docker/volumes.sh marquez From cd3fa50e57ee6f5b1a92196a830ff8c257fab711 Mon Sep 17 00:00:00 2001 From: phixMe Date: Sat, 3 Aug 2024 08:57:19 -0700 Subject: [PATCH 47/87] Adding search back to migration --- .circleci/db-migration.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/.circleci/db-migration.sh b/.circleci/db-migration.sh index 171f47d7f0..e622a2a299 100755 --- a/.circleci/db-migration.sh +++ b/.circleci/db-migration.sh @@ -64,7 +64,6 @@ if ! ./docker/up.sh \ --args "--exit-code-from seed_marquez" \ --tag "${MARQUEZ_VERSION}" \ --no-web \ - --no-search \ --seed > /dev/null; then error "failed to start db using backup!" exit_with_cause @@ -78,7 +77,6 @@ log "start db using backup (marquez=${MARQUEZ_BUILD_VERSION}):" if ! ./docker/up.sh \ --args "--exit-code-from seed_marquez" \ --no-web \ - --no-search \ --no-volumes \ --build \ --seed > /dev/null; then From 06dadb3ea96f347995b0c3422f1b6998edda4242 Mon Sep 17 00:00:00 2001 From: phixMe Date: Sat, 3 Aug 2024 09:26:30 -0700 Subject: [PATCH 48/87] Trying out ci config setting. --- docker-compose.search.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker-compose.search.yml b/docker-compose.search.yml index cdeaf46f23..3fa24bbcc4 100644 --- a/docker-compose.search.yml +++ b/docker-compose.search.yml @@ -4,6 +4,9 @@ services: image: opensearchproject/opensearch:2.5.0 container_name: opensearch hostname: opensearch + sysctls: + - net.core.somaxconn=65535 + - vm.max_map_count=262144 ulimits: memlock: soft: -1 @@ -33,6 +36,9 @@ services: opensearch-2: image: opensearchproject/opensearch:2.5.0 container_name: opensearch-2 + sysctls: + - net.core.somaxconn=65535 + - vm.max_map_count=262144 environment: - cluster.name=opensearch-cluster - node.name=opensearch-2 From 04d954685031d5d2e5bd39c07b6ffb6e930c22ce Mon Sep 17 00:00:00 2001 From: phixMe Date: Sun, 4 Aug 2024 09:13:57 -0700 Subject: [PATCH 49/87] Removing search from base config as a whole. --- .circleci/db-migration.sh | 2 ++ docker-compose.search.yml | 66 +++++++++++++++++++++++++++++++++++---- docker-compose.yml | 14 --------- 3 files changed, 62 insertions(+), 20 deletions(-) diff --git a/.circleci/db-migration.sh b/.circleci/db-migration.sh index e622a2a299..171f47d7f0 100755 --- a/.circleci/db-migration.sh +++ b/.circleci/db-migration.sh @@ -64,6 +64,7 @@ if ! ./docker/up.sh \ --args "--exit-code-from seed_marquez" \ --tag "${MARQUEZ_VERSION}" \ --no-web \ + --no-search \ --seed > /dev/null; then error "failed to start db using backup!" exit_with_cause @@ -77,6 +78,7 @@ log "start db using backup (marquez=${MARQUEZ_BUILD_VERSION}):" if ! ./docker/up.sh \ --args "--exit-code-from seed_marquez" \ --no-web \ + --no-search \ --no-volumes \ --build \ --seed > /dev/null; then diff --git a/docker-compose.search.yml b/docker-compose.search.yml index 3fa24bbcc4..a1625698b4 100644 --- a/docker-compose.search.yml +++ b/docker-compose.search.yml @@ -1,12 +1,58 @@ version: "3.7" services: + api: + image: "marquezproject/marquez:${TAG}" + container_name: marquez-api + environment: + - MARQUEZ_PORT=${API_PORT} + - MARQUEZ_ADMIN_PORT=${API_ADMIN_PORT} + ports: + - "${API_PORT}:${API_PORT}" + - "${API_ADMIN_PORT}:${API_ADMIN_PORT}" + volumes: + - data:/opt/marquez + links: + - "db:postgres" + - opensearch + depends_on: + - opensearch + - db + entrypoint: + - /opt/marquez/wait-for-it.sh + - db:${POSTGRES_PORT} + - -- + - /opt/marquez/wait-for-it.sh + - opensearch:${SEARCH_PORT} + - -- + - ./entrypoint.sh + networks: + - opensearch-net + + db: + image: postgres:14 + container_name: marquez-db + ports: + - "${POSTGRES_PORT}:${POSTGRES_PORT}" + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=password + - MARQUEZ_DB=marquez + - MARQUEZ_USER=marquez + - MARQUEZ_PASSWORD=marquez + volumes: + - db-conf:/etc/postgresql + - db-init:/docker-entrypoint-initdb.d + - db-backup:/var/lib/postgresql/data + command: [ "postgres", "-c", "config_file=/etc/postgresql/postgresql.conf" ] + # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT) + # command: ["postgres", "-c", "log_statement=all"] + networks: + - opensearch-net + opensearch: image: opensearchproject/opensearch:2.5.0 container_name: opensearch hostname: opensearch - sysctls: - - net.core.somaxconn=65535 - - vm.max_map_count=262144 ulimits: memlock: soft: -1 @@ -36,9 +82,6 @@ services: opensearch-2: image: opensearchproject/opensearch:2.5.0 container_name: opensearch-2 - sysctls: - - net.core.somaxconn=65535 - - vm.max_map_count=262144 environment: - cluster.name=opensearch-cluster - node.name=opensearch-2 @@ -72,3 +115,14 @@ services: - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards networks: - opensearch-net + +volumes: + data: + opensearch-data: + opensearch-2-data: + db-conf: + db-init: + db-backup: + +networks: + opensearch-net: \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index fe88b37e8b..b8b4403d4a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,20 +13,13 @@ services: - data:/opt/marquez links: - "db:postgres" - - opensearch depends_on: - - opensearch - db entrypoint: - /opt/marquez/wait-for-it.sh - db:${POSTGRES_PORT} - -- - - /opt/marquez/wait-for-it.sh - - opensearch:${SEARCH_PORT} - - -- - ./entrypoint.sh - networks: - - opensearch-net db: image: postgres:14 @@ -46,16 +39,9 @@ services: command: ["postgres", "-c", "config_file=/etc/postgresql/postgresql.conf"] # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT) # command: ["postgres", "-c", "log_statement=all"] - networks: - - opensearch-net volumes: data: - opensearch-data: - opensearch-2-data: db-conf: db-init: db-backup: - -networks: - opensearch-net: From 038cfc9da9695315a934d8844d557a58f100eaaf Mon Sep 17 00:00:00 2001 From: phixMe Date: Sun, 4 Aug 2024 09:20:14 -0700 Subject: [PATCH 50/87] Pushing out header updates. --- api/src/main/java/marquez/search/SearchConfig.java | 5 +++++ api/src/main/java/marquez/service/SearchService.java | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/api/src/main/java/marquez/search/SearchConfig.java b/api/src/main/java/marquez/search/SearchConfig.java index 427a76701d..a1a7a2ed48 100644 --- a/api/src/main/java/marquez/search/SearchConfig.java +++ b/api/src/main/java/marquez/search/SearchConfig.java @@ -1,3 +1,8 @@ +/* + * Copyright 2018-2024 contributors to the Marquez project + * SPDX-License-Identifier: Apache-2.0 + */ + package marquez.search; import com.fasterxml.jackson.annotation.JsonProperty; diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index 4a88c2dee7..c5d648769f 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -1,3 +1,8 @@ +/* + * Copyright 2018-2024 contributors to the Marquez project + * SPDX-License-Identifier: Apache-2.0 + */ + package marquez.service; import com.fasterxml.jackson.databind.node.ObjectNode; From 2767eda94e759736a4e2b6c8fe0c0bbe0c8d6399 Mon Sep 17 00:00:00 2001 From: phixMe Date: Sun, 4 Aug 2024 20:47:34 -0700 Subject: [PATCH 51/87] Review comment on search service init. --- api/src/main/java/marquez/MarquezApp.java | 44 +----------------- api/src/main/java/marquez/MarquezContext.java | 20 ++++---- .../main/java/marquez/api/SearchResource.java | 5 +- .../java/marquez/service/SearchService.java | 46 +++++++++++++++++-- docker-compose.search.yml | 4 ++ 5 files changed, 58 insertions(+), 61 deletions(-) diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 09e003000b..5ca4999dae 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -7,7 +7,6 @@ import com.codahale.metrics.jdbi3.InstrumentedSqlLogger; import com.fasterxml.jackson.databind.SerializationFeature; -import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import io.dropwizard.Application; import io.dropwizard.assets.AssetsBundle; import io.dropwizard.configuration.EnvironmentVariableSubstitutor; @@ -22,7 +21,6 @@ import io.prometheus.client.exporter.MetricsServlet; import io.prometheus.client.hotspot.DefaultExports; import io.sentry.Sentry; -import java.io.IOException; import java.util.EnumSet; import javax.servlet.DispatcherType; import lombok.NonNull; @@ -40,16 +38,11 @@ import marquez.logging.DelegatingSqlLogger; import marquez.logging.LabelledSqlLogger; import marquez.logging.LoggingMdcFilter; -import marquez.search.SearchConfig; import marquez.service.DatabaseMetrics; import marquez.tracing.SentryConfig; import marquez.tracing.TracingContainerResponseFilter; import marquez.tracing.TracingSQLLogger; import marquez.tracing.TracingServletFilter; -import org.apache.http.HttpHost; -import org.apache.http.auth.AuthScope; -import org.apache.http.auth.UsernamePasswordCredentials; -import org.apache.http.impl.client.BasicCredentialsProvider; import org.flywaydb.core.api.FlywayException; import org.jdbi.v3.core.Jdbi; import org.jdbi.v3.core.statement.SqlLogger; @@ -57,12 +50,6 @@ import org.jdbi.v3.jackson2.Jackson2Plugin; import org.jdbi.v3.postgres.PostgresPlugin; import org.jdbi.v3.sqlobject.SqlObjectPlugin; -import org.opensearch.client.RestClient; -import org.opensearch.client.json.jackson.JacksonJsonpMapper; -import org.opensearch.client.opensearch.OpenSearchClient; -import org.opensearch.client.transport.OpenSearchTransport; -import org.opensearch.client.transport.endpoints.BooleanResponse; -import org.opensearch.client.transport.rest_client.RestClientTransport; @Slf4j public final class MarquezApp extends Application { @@ -152,7 +139,7 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { final MarquezContext marquezContext = MarquezContext.builder() .jdbi(jdbi) - .openSearchClient(newOpenSearchClient(config.getSearchConfig())) + .searchConfig(config.getSearchConfig()) .tags(config.getTags()) .build(); @@ -171,35 +158,6 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { Exclusions.use(exclusions); } - private OpenSearchClient newOpenSearchClient(SearchConfig searchConfig) { - final HttpHost host = - new HttpHost(searchConfig.getHost(), searchConfig.getPort(), searchConfig.getScheme()); - final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); - credentialsProvider.setCredentials( - new AuthScope(host), - new UsernamePasswordCredentials(searchConfig.getUsername(), searchConfig.getPassword())); - final RestClient restClient = - RestClient.builder(host) - .setHttpClientConfigCallback( - httpClientBuilder -> - httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) - .build(); - - JacksonJsonpMapper jsonpMapper = new JacksonJsonpMapper(); - // register JavaTimeModule to handle ZonedDateTime - jsonpMapper.objectMapper().registerModule(new JavaTimeModule()); - final OpenSearchTransport transport = new RestClientTransport(restClient, jsonpMapper); - OpenSearchClient openSearchClient = new OpenSearchClient(transport); - BooleanResponse booleanResponse; - try { - booleanResponse = openSearchClient.ping(); - log.info("OpenSearch Active: {}", booleanResponse.value()); - } catch (IOException e) { - log.warn("Search not configured"); - } - return openSearchClient; - } - private boolean isSentryEnabled(MarquezConfig config) { return config.getSentry() != null && !config.getSentry().getDsn().equals(SentryConfig.DEFAULT_DSN); diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index bef6eddb85..42688a8319 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -43,6 +43,7 @@ import marquez.db.TagDao; import marquez.graphql.GraphqlSchemaBuilder; import marquez.graphql.MarquezGraphqlServletBuilder; +import marquez.search.SearchConfig; import marquez.service.ColumnLineageService; import marquez.service.DatasetFieldService; import marquez.service.DatasetService; @@ -59,7 +60,6 @@ import marquez.service.TagService; import marquez.service.models.Tag; import org.jdbi.v3.core.Jdbi; -import org.opensearch.client.opensearch.OpenSearchClient; @Getter public final class MarquezContext { @@ -104,17 +104,17 @@ public final class MarquezContext { @Getter private final JdbiExceptionExceptionMapper jdbiException; @Getter private final JsonProcessingExceptionMapper jsonException; @Getter private final GraphQLHttpServlet graphqlServlet; - @Getter private final OpenSearchClient openSearchClient; + @Getter private final SearchConfig searchConfig; private MarquezContext( @NonNull final Jdbi jdbi, - @NonNull final OpenSearchClient openSearchClient, + @NonNull final SearchConfig searchConfig, @NonNull final ImmutableSet tags, List runTransitionListeners) { if (runTransitionListeners == null) { runTransitionListeners = new ArrayList<>(); } - this.openSearchClient = openSearchClient; + this.searchConfig = searchConfig; final BaseDao baseDao = jdbi.onDemand(NamespaceDao.class); this.namespaceDao = jdbi.onDemand(NamespaceDao.class); @@ -147,7 +147,7 @@ private MarquezContext( this.openLineageService = new OpenLineageService(baseDao, runService); this.lineageService = new LineageService(lineageDao, jobDao); this.columnLineageService = new ColumnLineageService(columnLineageDao, datasetFieldDao); - this.searchService = new SearchService(openSearchClient); + this.searchService = new SearchService(searchConfig); this.jdbiException = new JdbiExceptionExceptionMapper(); this.jsonException = new JsonProcessingExceptionMapper(); final ServiceFactory serviceFactory = @@ -172,7 +172,7 @@ private MarquezContext( this.jobResource = new JobResource(serviceFactory, jobVersionDao, jobFacetsDao, runFacetsDao); this.tagResource = new TagResource(serviceFactory); this.openLineageResource = new OpenLineageResource(serviceFactory, openLineageDao); - this.searchResource = new SearchResource(serviceFactory, searchDao, openSearchClient); + this.searchResource = new SearchResource(serviceFactory, searchDao); this.resources = ImmutableList.of( @@ -198,7 +198,7 @@ public static Builder builder() { public static class Builder { private Jdbi jdbi; - private OpenSearchClient openSearchClient; + private SearchConfig searchConfig; private ImmutableSet tags; private List runTransitionListeners; @@ -212,8 +212,8 @@ public Builder jdbi(@NonNull Jdbi jdbi) { return this; } - public Builder openSearchClient(@NonNull OpenSearchClient openSearchClient) { - this.openSearchClient = openSearchClient; + public Builder searchConfig(@NonNull SearchConfig searchConfig) { + this.searchConfig = searchConfig; return this; } @@ -233,7 +233,7 @@ public Builder runTransitionListeners( } public MarquezContext build() { - return new MarquezContext(jdbi, openSearchClient, tags, runTransitionListeners); + return new MarquezContext(jdbi, searchConfig, tags, runTransitionListeners); } } } diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 1eda85fa8c..aec19356d8 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -38,7 +38,6 @@ import marquez.db.SearchDao; import marquez.service.SearchService; import marquez.service.ServiceFactory; -import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.opensearch.core.SearchResponse; import org.opensearch.client.opensearch.core.search.Hit; @@ -54,9 +53,7 @@ public class SearchResource { private final SearchDao searchDao; public SearchResource( - @NonNull final ServiceFactory serviceFactory, - @NonNull final SearchDao searchDao, - @Nullable final OpenSearchClient openSearchClient) { + @NonNull final ServiceFactory serviceFactory, @NonNull final SearchDao searchDao) { this.searchService = serviceFactory.getSearchService(); this.searchDao = searchDao; } diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index c5d648769f..8d3426f9c5 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -6,6 +6,7 @@ package marquez.service; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -15,9 +16,15 @@ import java.util.UUID; import javax.validation.Valid; import javax.validation.constraints.NotNull; -import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import marquez.search.SearchConfig; import marquez.service.models.LineageEvent; +import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.opensearch.client.RestClient; +import org.opensearch.client.json.jackson.JacksonJsonpMapper; import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.opensearch._types.query_dsl.Operator; import org.opensearch.client.opensearch._types.query_dsl.TextQueryType; @@ -25,14 +32,45 @@ import org.opensearch.client.opensearch.core.SearchResponse; import org.opensearch.client.opensearch.core.search.BuiltinHighlighterType; import org.opensearch.client.opensearch.core.search.HighlighterType; +import org.opensearch.client.transport.OpenSearchTransport; +import org.opensearch.client.transport.endpoints.BooleanResponse; +import org.opensearch.client.transport.rest_client.RestClientTransport; @Slf4j public class SearchService { - private final OpenSearchClient openSearchClient; + private OpenSearchClient openSearchClient; + + public SearchService(SearchConfig searchConfig) { + final HttpHost host = + new HttpHost(searchConfig.getHost(), searchConfig.getPort(), searchConfig.getScheme()); + final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + credentialsProvider.setCredentials( + new AuthScope(host), + new UsernamePasswordCredentials(searchConfig.getUsername(), searchConfig.getPassword())); + final RestClient restClient = + RestClient.builder(host) + .setHttpClientConfigCallback( + httpClientBuilder -> + httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) + .build(); + + JacksonJsonpMapper jsonpMapper = new JacksonJsonpMapper(); + // register JavaTimeModule to handle ZonedDateTime + jsonpMapper.objectMapper().registerModule(new JavaTimeModule()); + final OpenSearchTransport transport = new RestClientTransport(restClient, jsonpMapper); + this.openSearchClient = new OpenSearchClient(transport); + BooleanResponse booleanResponse; + try { + booleanResponse = openSearchClient.ping(); + log.info("OpenSearch Active: {}", booleanResponse.value()); + } catch (IOException e) { + log.warn("Search not configured"); + } + } - public SearchService(@NonNull final OpenSearchClient openSearchClient) { - this.openSearchClient = openSearchClient; + public OpenSearchClient getClient() { + return this.openSearchClient; } public SearchResponse searchDatasets(String query) throws IOException { diff --git a/docker-compose.search.yml b/docker-compose.search.yml index a1625698b4..9b71ae59a3 100644 --- a/docker-compose.search.yml +++ b/docker-compose.search.yml @@ -116,6 +116,10 @@ services: networks: - opensearch-net + web: + networks: + - opensearch-net + volumes: data: opensearch-data: From ec4ab4180c2eaa8cb71dd1b411c09f37996b1a6a Mon Sep 17 00:00:00 2001 From: phixMe Date: Mon, 5 Aug 2024 11:26:39 -0700 Subject: [PATCH 52/87] Fixing up dependencies in docker to apply migrations. --- api/src/main/java/marquez/service/SearchService.java | 2 +- docker-compose.seed.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index 8d3426f9c5..73dd9ba45e 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -39,7 +39,7 @@ @Slf4j public class SearchService { - private OpenSearchClient openSearchClient; + private final OpenSearchClient openSearchClient; public SearchService(SearchConfig searchConfig) { final HttpHost host = diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index f88ed7ca11..466e998cd1 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -13,5 +13,4 @@ services: - "db:postgres" depends_on: - api - - opensearch entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "opensearch:${SEARCH_PORT}", "--", "./seed.sh"] From 593d7b8538ef87b94e18c38f0573c2b0db042345 Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 6 Aug 2024 12:59:37 -0700 Subject: [PATCH 53/87] Code review updates and naming changes. --- .../main/java/marquez/api/SearchResource.java | 8 +-- .../java/marquez/service/SearchService.java | 58 +++++++++--------- docker-compose.search.yml | 4 ++ web/jest.config.js | 1 + web/src/components/search/Search.tsx | 6 +- .../OpenSearch.tsx} | 60 +++++++++---------- .../airlfow-logo.svg | 0 .../{es-search => open-search}/spark-logo.svg | 0 web/src/globals.ts | 2 + web/src/store/actionCreators/actionTypes.ts | 10 ++-- web/src/store/actionCreators/index.ts | 20 +++---- web/src/store/reducers/esSearch.ts | 37 ------------ web/src/store/reducers/esSearchDatasets.ts | 40 ------------- web/src/store/reducers/index.ts | 12 ++-- web/src/store/reducers/openSearch.ts | 37 ++++++++++++ web/src/store/reducers/openSearchDatasets.ts | 40 +++++++++++++ web/src/store/requests/search.ts | 14 ++--- web/src/store/sagas/index.ts | 36 +++++------ web/src/types/api.ts | 18 +++--- web/webpack.dev.js | 1 + web/webpack.prod.js | 1 + 21 files changed, 208 insertions(+), 197 deletions(-) rename web/src/components/search/{es-search/EsSearch.tsx => open-search/OpenSearch.tsx} (88%) rename web/src/components/search/{es-search => open-search}/airlfow-logo.svg (100%) rename web/src/components/search/{es-search => open-search}/spark-logo.svg (100%) delete mode 100644 web/src/store/reducers/esSearch.ts delete mode 100644 web/src/store/reducers/esSearchDatasets.ts create mode 100644 web/src/store/reducers/openSearch.ts create mode 100644 web/src/store/reducers/openSearchDatasets.ts diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index aec19356d8..2f316289bd 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -42,7 +42,7 @@ import org.opensearch.client.opensearch.core.search.Hit; @Slf4j -@Path("/api/v1/search") +@Path("/api/v2beta/search") public class SearchResource { private static final String YYYY_MM_DD = "^\\d{4}-\\d{2}-\\d{2}$"; private static final String DEFAULT_SORT = "name"; @@ -109,16 +109,16 @@ private Response formatEsResponse(SearchResponse response) { List>> highlights = response.hits().hits().stream().map(Hit::highlight).collect(Collectors.toList()); - return Response.ok(new EsResult(hits, highlights)).build(); + return Response.ok(new OpenSearchResult(hits, highlights)).build(); } @ToString - public static final class EsResult { + public static final class OpenSearchResult { @Getter private final List hits; @Getter private final List>> highlights; @JsonCreator - public EsResult( + public OpenSearchResult( @NonNull List hits, @NonNull List>> highlights) { this.hits = hits; this.highlights = highlights; diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index 73dd9ba45e..b1d1405149 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -39,6 +39,30 @@ @Slf4j public class SearchService { + String[] DATASET_FIELDS = { + "run_id", + "name", + "namespace", + "facets.schema.fields.name", + "facets.schema.fields.type", + "facets.columnLineage.fields.*.inputFields.name", + "facets.columnLineage.fields.*.inputFields.namespace", + "facets.columnLineage.fields.*.inputFields.field", + "facets.columnLineage.fields.*.transformationDescription", + "facets.columnLineage.fields.*.transformationType" + }; + + String[] JOB_FIELDS = { + "facets.sql.query", + "facets.sourceCode.sourceCode", + "facets.sourceCode.language", + "runFacets.processing_engine.name", + "run_id", + "name", + "namespace", + "type" + }; + private final OpenSearchClient openSearchClient; public SearchService(SearchConfig searchConfig) { @@ -74,18 +98,6 @@ public OpenSearchClient getClient() { } public SearchResponse searchDatasets(String query) throws IOException { - String[] fields = { - "run_id", - "name", - "namespace", - "facets.schema.fields.name", - "facets.schema.fields.type", - "facets.columnLineage.fields.*.inputFields.name", - "facets.columnLineage.fields.*.inputFields.namespace", - "facets.columnLineage.fields.*.inputFields.field", - "facets.columnLineage.fields.*.transformationDescription", - "facets.columnLineage.fields.*.transformationType" - }; return this.openSearchClient.search( s -> s.index("datasets") @@ -95,11 +107,11 @@ public SearchResponse searchDatasets(String query) throws IOExceptio m -> m.query(query) .type(TextQueryType.PhrasePrefix) - .fields(Arrays.stream(fields).toList()) + .fields(Arrays.stream(DATASET_FIELDS).toList()) .operator(Operator.Or))) .highlight( hl -> { - for (String field : fields) { + for (String field : DATASET_FIELDS) { hl.fields( field, f -> @@ -113,16 +125,6 @@ public SearchResponse searchDatasets(String query) throws IOExceptio } public SearchResponse searchJobs(String query) throws IOException { - String[] fields = { - "facets.sql.query", - "facets.sourceCode.sourceCode", - "facets.sourceCode.language", - "runFacets.processing_engine.name", - "run_id", - "name", - "namespace", - "type" - }; return this.openSearchClient.search( s -> { s.index("jobs") @@ -132,11 +134,11 @@ public SearchResponse searchJobs(String query) throws IOException { m -> m.query(query) .type(TextQueryType.PhrasePrefix) - .fields(Arrays.stream(fields).toList()) + .fields(Arrays.stream(JOB_FIELDS).toList()) .operator(Operator.Or))); s.highlight( hl -> { - for (String field : fields) { + for (String field : JOB_FIELDS) { hl.fields( field, f -> @@ -152,7 +154,7 @@ public SearchResponse searchJobs(String query) throws IOException { public void indexEvent(@Valid @NotNull LineageEvent event) { UUID runUuid = runUuidFromEvent(event.getRun()); - log.info("Indexing event {}", event); + log.debug("Indexing event {}", event); if (event.getInputs() != null) { indexDatasets(event.getInputs(), runUuid, event); @@ -231,7 +233,7 @@ private void index(IndexRequest> request) { try { this.openSearchClient.index(request); } catch (IOException e) { - log.info("Failed to index event OpenSearch not available.", e); + log.error("Failed to index event OpenSearch not available.", e); } } } diff --git a/docker-compose.search.yml b/docker-compose.search.yml index 9b71ae59a3..72c7ee2d67 100644 --- a/docker-compose.search.yml +++ b/docker-compose.search.yml @@ -120,6 +120,10 @@ services: networks: - opensearch-net + seed_marquez: + depends_on: + - opensearch + volumes: data: opensearch-data: diff --git a/web/jest.config.js b/web/jest.config.js index 67e1e828ee..317f4b510e 100644 --- a/web/jest.config.js +++ b/web/jest.config.js @@ -16,6 +16,7 @@ module.exports = { testEnvironment: 'jsdom', globals: { __API_URL__: '/api/v1', + __API_BETA_URL__: '/api/v2beta', __FEEDBACK_FORM_URL__: 'https://forms.gle/f3tTSrZ8wPj3sHTA7', __REACT_APP_ADVANCED_SEARCH__: true, __API_DOCS_URL__: 'https://marquezproject.github.io/marquez/openapi.html', diff --git a/web/src/components/search/Search.tsx b/web/src/components/search/Search.tsx index fe1ec58d55..59036df390 100644 --- a/web/src/components/search/Search.tsx +++ b/web/src/components/search/Search.tsx @@ -11,7 +11,7 @@ import { useLocation } from 'react-router' import BaseSearch from './base-search/BaseSearch' import CircularProgress from '@mui/material/CircularProgress/CircularProgress' import ClickAwayListener from '@mui/material/ClickAwayListener' -import EsSearch from './es-search/EsSearch' +import OpenSearch from './open-search/OpenSearch' import IconButton from '@mui/material/IconButton' import React, { useEffect, useRef, useState } from 'react' import SearchPlaceholder from './SearchPlaceholder' @@ -188,7 +188,7 @@ const Search: React.FC = ({ isLoading }: StateProps) => { maxHeight={`calc(100vh - ${HEADER_HEIGHT}px - 24px)`} > {REACT_APP_ADVANCED_SEARCH ? ( - + ) : ( )} @@ -203,7 +203,7 @@ const Search: React.FC = ({ isLoading }: StateProps) => { } const mapStateToProps = (state: IState) => ({ - isLoading: state.esSearchJobs.isLoading || state.esSearchDatasets.isLoading, + isLoading: state.openSearchJobs.isLoading || state.openSearchDatasets.isLoading, }) export default connect(mapStateToProps)(Search) diff --git a/web/src/components/search/es-search/EsSearch.tsx b/web/src/components/search/open-search/OpenSearch.tsx similarity index 88% rename from web/src/components/search/es-search/EsSearch.tsx rename to web/src/components/search/open-search/OpenSearch.tsx index bfce77db94..ced6a54c59 100644 --- a/web/src/components/search/es-search/EsSearch.tsx +++ b/web/src/components/search/open-search/OpenSearch.tsx @@ -4,8 +4,8 @@ import * as Redux from 'redux' import { Chip, Divider } from '@mui/material' import { FontAwesomeIcon } from '@fortawesome/react-fontawesome' -import { IEsSearchDatasetsState } from '../../../store/reducers/esSearchDatasets' -import { IEsSearchJobsState } from '../../../store/reducers/esSearch' +import { IOpenSearchDatasetsState } from '../../../store/reducers/openSearchDatasets' +import { IOpenSearchJobsState } from '../../../store/reducers/openSearch' import { IState } from '../../../store/reducers' import { Nullable } from '../../../types/util/Nullable' import { bindActionCreators } from 'redux' @@ -14,7 +14,7 @@ import { debounce } from 'lodash' import { encodeNode, eventTypeColor } from '../../../helpers/nodes' import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' import { faDatabase } from '@fortawesome/free-solid-svg-icons' -import { fetchEsSearchDatasets, fetchEsSearchJobs } from '../../../store/actionCreators' +import { fetchOpenSearchDatasets, fetchOpenSearchJobs } from '../../../store/actionCreators' import { theme } from '../../../helpers/theme' import { truncateText } from '../../../helpers/text' import { useNavigate } from 'react-router-dom' @@ -28,13 +28,13 @@ import airflow_logo from './airlfow-logo.svg' import spark_logo from './spark-logo.svg' interface StateProps { - esSearchJobs: IEsSearchJobsState - esSearchDatasets: IEsSearchDatasetsState + openSearchJobs: IOpenSearchJobsState + openSearchDatasets: IOpenSearchDatasetsState } interface DispatchProps { - fetchEsSearchJobs: typeof fetchEsSearchJobs - fetchEsSearchDatasets: typeof fetchEsSearchDatasets + fetchOpenSearchJobs: typeof fetchOpenSearchJobs + fetchOpenSearchDatasets: typeof fetchOpenSearchDatasets } interface Props { @@ -86,12 +86,12 @@ const useArrowKeys = (callback: (key: 'up' | 'down' | 'enter') => void) => { const FIELDS_TO_PRINT = 5 const DEBOUNCE_TIME_MS = 200 -const EsSearch: React.FC = ({ +const OpenSearch: React.FC = ({ search, - fetchEsSearchJobs, - fetchEsSearchDatasets, - esSearchJobs, - esSearchDatasets, + fetchOpenSearchJobs, + fetchOpenSearchDatasets, + openSearchJobs, + openSearchDatasets, }) => { const [selectedIndex, setSelectedIndex] = React.useState>(null) const [isDebouncing, setIsDebouncing] = React.useState(true) @@ -106,15 +106,15 @@ const EsSearch: React.FC = ({ ? 0 : Math.min( selectedIndex + 1, - esSearchJobs.data.hits.length + esSearchDatasets.data.hits.length - 1 + openSearchJobs.data.hits.length + openSearchDatasets.data.hits.length - 1 ) ) } else if (selectedIndex !== null) { - if (selectedIndex < esSearchJobs.data.hits.length) { - const jobHit = esSearchJobs.data.hits[selectedIndex] + if (selectedIndex < openSearchJobs.data.hits.length) { + const jobHit = openSearchJobs.data.hits[selectedIndex] navigate(`/lineage/${encodeNode('JOB', jobHit.namespace, jobHit.name)}`) } else { - const datasetHit = esSearchDatasets.data.hits[selectedIndex - esSearchJobs.data.hits.length] + const datasetHit = openSearchDatasets.data.hits[selectedIndex - openSearchJobs.data.hits.length] navigate(`/lineage/${encodeNode('DATASET', datasetHit.namespace, datasetHit.name)}`) } } @@ -122,7 +122,7 @@ const EsSearch: React.FC = ({ const debouncedFetchJobs = useCallback( debounce(async (searchTerm) => { - fetchEsSearchJobs(searchTerm); + fetchOpenSearchJobs(searchTerm); setIsDebouncing(false); // Set loading to false after the fetch completes }, DEBOUNCE_TIME_MS), [] @@ -131,7 +131,7 @@ const EsSearch: React.FC = ({ const debouncedFetchDatasets = useCallback( debounce(async (searchTerm) => { - fetchEsSearchDatasets(searchTerm); + fetchOpenSearchDatasets(searchTerm); setIsDebouncing(false); // Set loading to false after the fetch completes }, DEBOUNCE_TIME_MS), [] @@ -145,9 +145,9 @@ const EsSearch: React.FC = ({ useEffect(() => { setSelectedIndex(null) - }, [esSearchJobs.data.hits, esSearchDatasets.data.hits]) + }, [openSearchJobs.data.hits, openSearchDatasets.data.hits]) - if (esSearchJobs.data.hits.length === 0 && esSearchDatasets.data.hits.length === 0 && !isDebouncing) { + if (openSearchJobs.data.hits.length === 0 && openSearchDatasets.data.hits.length === 0 && !isDebouncing) { return ( @@ -157,7 +157,7 @@ const EsSearch: React.FC = ({ return ( - {esSearchJobs.data.hits.map((hit, index) => { + {openSearchJobs.data.hits.map((hit, index) => { return ( = ({ Match - {Object.entries(esSearchJobs.data.highlights[index]).map(([key, value]) => { + {Object.entries(openSearchJobs.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { return ( = ({ ) })} - {esSearchDatasets.data.hits.map((hit, index) => { + {openSearchDatasets.data.hits.map((hit, index) => { return ( = ({ backgroundColor: theme.palette.action.hover, }, backgroundColor: - selectedIndex === index + esSearchJobs.data.hits.length + selectedIndex === index + openSearchJobs.data.hits.length ? theme.palette.action.hover : undefined, }} @@ -326,7 +326,7 @@ const EsSearch: React.FC = ({ Match - {Object.entries(esSearchDatasets.data.highlights[index]).map(([key, value]) => { + {Object.entries(openSearchDatasets.data.highlights[index]).map(([key, value]) => { return value.map((highlightedString: any, idx: number) => { return ( = ({ const mapStateToProps = (state: IState) => { return { - esSearchJobs: state.esSearchJobs, - esSearchDatasets: state.esSearchDatasets, + openSearchJobs: state.openSearchJobs, + openSearchDatasets: state.openSearchDatasets, } } const mapDispatchToProps = (dispatch: Redux.Dispatch) => bindActionCreators( { - fetchEsSearchJobs: fetchEsSearchJobs, - fetchEsSearchDatasets: fetchEsSearchDatasets, + fetchOpenSearchJobs: fetchOpenSearchJobs, + fetchOpenSearchDatasets: fetchOpenSearchDatasets, }, dispatch ) -export default connect(mapStateToProps, mapDispatchToProps)(EsSearch) +export default connect(mapStateToProps, mapDispatchToProps)(OpenSearch) diff --git a/web/src/components/search/es-search/airlfow-logo.svg b/web/src/components/search/open-search/airlfow-logo.svg similarity index 100% rename from web/src/components/search/es-search/airlfow-logo.svg rename to web/src/components/search/open-search/airlfow-logo.svg diff --git a/web/src/components/search/es-search/spark-logo.svg b/web/src/components/search/open-search/spark-logo.svg similarity index 100% rename from web/src/components/search/es-search/spark-logo.svg rename to web/src/components/search/open-search/spark-logo.svg diff --git a/web/src/globals.ts b/web/src/globals.ts index 2c4098b0fb..7a1510b3fc 100644 --- a/web/src/globals.ts +++ b/web/src/globals.ts @@ -6,12 +6,14 @@ declare const __NODE_ENV__: string declare const __DEVELOPMENT__: boolean declare const __API_URL__: string +declare const __API_BETA_URL__: string declare const __REACT_APP_ADVANCED_SEARCH__: boolean declare const __FEEDBACK_FORM_URL__: string declare const __API_DOCS_URL__: string export const API_URL = __API_URL__ +export const API_BETA_URL = __API_BETA_URL__ export const FEEDBACK_FORM_URL = __FEEDBACK_FORM_URL__ export const API_DOCS_URL = __API_DOCS_URL__ export const REACT_APP_ADVANCED_SEARCH = __REACT_APP_ADVANCED_SEARCH__; diff --git a/web/src/store/actionCreators/actionTypes.ts b/web/src/store/actionCreators/actionTypes.ts index a5ca8ff71a..1b6b25867e 100644 --- a/web/src/store/actionCreators/actionTypes.ts +++ b/web/src/store/actionCreators/actionTypes.ts @@ -72,11 +72,11 @@ export const SET_SHOW_FULL_GRAPH = 'SET_SHOW_FULL_GRAPH' export const FETCH_SEARCH = 'FETCH_SEARCH' export const FETCH_SEARCH_SUCCESS = 'FETCH_SEARCH _SUCCESS' -// esSearch -export const FETCH_ES_SEARCH_JOBS = 'FETCH_ES_SEARCH_JOBS' -export const FETCH_ES_SEARCH_JOBS_SUCCESS = 'FETCH_ES_SEARCH_JOBS_SUCCESS' -export const FETCH_ES_SEARCH_DATASETS = 'FETCH_ES_SEARCH_DATASETS' -export const FETCH_ES_SEARCH_DATASETS_SUCCESS = 'FETCH_ES_SEARCH_DATASETS_SUCCESS' +// OpenSearch +export const FETCH_OPEN_SEARCH_JOBS = 'FETCH_OPEN_SEARCH_JOBS' +export const FETCH_OPEN_SEARCH_JOBS_SUCCESS = 'FETCH_OPEN_SEARCH_JOBS_SUCCESS' +export const FETCH_OPEN_SEARCH_DATASETS = 'FETCH_OPEN_SEARCH_DATASETS' +export const FETCH_OPEN_SEARCH_DATASETS_SUCCESS = 'FETCH_OPEN_SEARCH_DATASETS_SUCCESS' // facets export const FETCH_RUN_FACETS = 'FETCH_RUN_FACETS' diff --git a/web/src/store/actionCreators/index.ts b/web/src/store/actionCreators/index.ts index 71d764db24..ceaf9e69ba 100644 --- a/web/src/store/actionCreators/index.ts +++ b/web/src/store/actionCreators/index.ts @@ -7,8 +7,8 @@ import { ColumnLineageGraph, Dataset, DatasetVersion, - EsSearchResultDatasets, - EsSearchResultJobs, + OpenSearchResultDatasets, + OpenSearchResultJobs, Events, Facets, Job, @@ -519,26 +519,26 @@ export const setColumnLineageGraphDepth = (depth: number) => ({ payload: depth, }) -export const fetchEsSearchJobs = (q: string) => ({ - type: actionTypes.FETCH_ES_SEARCH_JOBS, +export const fetchOpenSearchJobs = (q: string) => ({ + type: actionTypes.FETCH_OPEN_SEARCH_JOBS, payload: { q, }, }) -export const fetchEsSearchJobsSuccess = (search: EsSearchResultJobs) => ({ - type: actionTypes.FETCH_ES_SEARCH_JOBS_SUCCESS, +export const fetchOpenSearchJobsSuccess = (search: OpenSearchResultJobs) => ({ + type: actionTypes.FETCH_OPEN_SEARCH_JOBS_SUCCESS, payload: search, }) -export const fetchEsSearchDatasets = (q: string) => ({ - type: actionTypes.FETCH_ES_SEARCH_DATASETS, +export const fetchOpenSearchDatasets = (q: string) => ({ + type: actionTypes.FETCH_OPEN_SEARCH_DATASETS, payload: { q, }, }) -export const fetchEsSearchDatasetsSuccess = (search: EsSearchResultDatasets) => ({ - type: actionTypes.FETCH_ES_SEARCH_DATASETS_SUCCESS, +export const fetchOpenSearchDatasetsSuccess = (search: OpenSearchResultDatasets) => ({ + type: actionTypes.FETCH_OPEN_SEARCH_DATASETS_SUCCESS, payload: search, }) diff --git a/web/src/store/reducers/esSearch.ts b/web/src/store/reducers/esSearch.ts deleted file mode 100644 index d6626ae8d5..0000000000 --- a/web/src/store/reducers/esSearch.ts +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2018-2023 contributors to the Marquez project -// SPDX-License-Identifier: Apache-2.0 - -import { FETCH_ES_SEARCH_JOBS, FETCH_ES_SEARCH_JOBS_SUCCESS } from '../actionCreators/actionTypes' - -import { EsSearchResultJobs } from '../../types/api' -import { fetchEsSearchJobs, fetchEsSearchJobsSuccess } from '../actionCreators' - -export type IEsSearchJobsState = { isLoading: boolean; data: EsSearchResultJobs; init: boolean } - -export const initialState: IEsSearchJobsState = { - isLoading: false, - data: { hits: [], highlights: [] }, - init: false, -} - -type IJobsAction = ReturnType & - ReturnType - -export default (state = initialState, action: IJobsAction): IEsSearchJobsState => { - const { type, payload } = action - - switch (type) { - case FETCH_ES_SEARCH_JOBS: - return { ...state, isLoading: true } - case FETCH_ES_SEARCH_JOBS_SUCCESS: { - return { - ...state, - isLoading: false, - init: true, - data: payload, - } - } - default: - return state - } -} diff --git a/web/src/store/reducers/esSearchDatasets.ts b/web/src/store/reducers/esSearchDatasets.ts deleted file mode 100644 index ff83607f6f..0000000000 --- a/web/src/store/reducers/esSearchDatasets.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { EsSearchResultDatasets } from '../../types/api' -import { - FETCH_ES_SEARCH_DATASETS, - FETCH_ES_SEARCH_DATASETS_SUCCESS, -} from '../actionCreators/actionTypes' -import { fetchEsSearchDatasets, fetchEsSearchDatasetsSuccess } from '../actionCreators' - -export type IEsSearchDatasetsState = { - isLoading: boolean - data: EsSearchResultDatasets - init: boolean -} - -export const initialState: IEsSearchDatasetsState = { - isLoading: false, - data: { hits: [], highlights: [] }, - init: false, -} - -type IDatasetsAction = ReturnType & - ReturnType - -export default (state = initialState, action: IDatasetsAction): IEsSearchDatasetsState => { - const { type, payload } = action - - switch (type) { - case FETCH_ES_SEARCH_DATASETS: - return { ...state, isLoading: true } - case FETCH_ES_SEARCH_DATASETS_SUCCESS: { - return { - ...state, - isLoading: false, - init: true, - data: payload, - } - } - default: - return state - } -} diff --git a/web/src/store/reducers/index.ts b/web/src/store/reducers/index.ts index aee8660353..3a6a66f8e4 100644 --- a/web/src/store/reducers/index.ts +++ b/web/src/store/reducers/index.ts @@ -10,8 +10,8 @@ import dataset, { IDatasetState } from './dataset' import datasetVersions, { IDatasetVersionsState } from './datasetVersions' import datasets, { IDatasetsState } from './datasets' import display, { IDisplayState } from './display' -import esSearchDatasets, { IEsSearchDatasetsState } from './esSearchDatasets' -import esSearchJobs, { IEsSearchJobsState } from './esSearch' +import openSearchDatasets, { IOpenSearchDatasetsState } from './openSearchDatasets' +import openSearchJobs, { IOpenSearchJobsState } from './openSearch' import events, { IEventsState } from './events' import facets, { IFacetsState } from './facets' import jobs, { IJobsState } from './jobs' @@ -26,8 +26,8 @@ export interface IState { datasets: IDatasetsState dataset: IDatasetState datasetVersions: IDatasetVersionsState - esSearchJobs: IEsSearchJobsState - esSearchDatasets: IEsSearchDatasetsState + openSearchJobs: IOpenSearchJobsState + openSearchDatasets: IOpenSearchDatasetsState events: IEventsState jobs: IJobsState runs: IRunsState @@ -54,8 +54,8 @@ export default (history: History): Reducer => display, lineage, search, - esSearchJobs, - esSearchDatasets, + openSearchJobs, + openSearchDatasets, facets, tags, }) diff --git a/web/src/store/reducers/openSearch.ts b/web/src/store/reducers/openSearch.ts new file mode 100644 index 0000000000..35d7007753 --- /dev/null +++ b/web/src/store/reducers/openSearch.ts @@ -0,0 +1,37 @@ +// Copyright 2018-2023 contributors to the Marquez project +// SPDX-License-Identifier: Apache-2.0 + +import { FETCH_OPEN_SEARCH_JOBS, FETCH_OPEN_SEARCH_JOBS_SUCCESS } from '../actionCreators/actionTypes' + +import { OpenSearchResultJobs } from '../../types/api' +import { fetchOpenSearchJobs, fetchOpenSearchJobsSuccess } from '../actionCreators' + +export type IOpenSearchJobsState = { isLoading: boolean; data: OpenSearchResultJobs; init: boolean } + +export const initialState: IOpenSearchJobsState = { + isLoading: false, + data: { hits: [], highlights: [] }, + init: false, +} + +type IJobsAction = ReturnType & + ReturnType + +export default (state = initialState, action: IJobsAction): IOpenSearchJobsState => { + const { type, payload } = action + + switch (type) { + case FETCH_OPEN_SEARCH_JOBS: + return { ...state, isLoading: true } + case FETCH_OPEN_SEARCH_JOBS_SUCCESS: { + return { + ...state, + isLoading: false, + init: true, + data: payload, + } + } + default: + return state + } +} diff --git a/web/src/store/reducers/openSearchDatasets.ts b/web/src/store/reducers/openSearchDatasets.ts new file mode 100644 index 0000000000..00ff606220 --- /dev/null +++ b/web/src/store/reducers/openSearchDatasets.ts @@ -0,0 +1,40 @@ +import { OpenSearchResultDatasets } from '../../types/api' +import { + FETCH_OPEN_SEARCH_DATASETS, + FETCH_OPEN_SEARCH_DATASETS_SUCCESS, +} from '../actionCreators/actionTypes' +import { fetchOpenSearchDatasets, fetchOpenSearchDatasetsSuccess } from '../actionCreators' + +export type IOpenSearchDatasetsState = { + isLoading: boolean + data: OpenSearchResultDatasets + init: boolean +} + +export const initialState: IOpenSearchDatasetsState = { + isLoading: false, + data: { hits: [], highlights: [] }, + init: false, +} + +type IDatasetsAction = ReturnType & + ReturnType + +export default (state = initialState, action: IDatasetsAction): IOpenSearchDatasetsState => { + const { type, payload } = action + + switch (type) { + case FETCH_OPEN_SEARCH_DATASETS: + return { ...state, isLoading: true } + case FETCH_OPEN_SEARCH_DATASETS_SUCCESS: { + return { + ...state, + isLoading: false, + init: true, + data: payload, + } + } + default: + return state + } +} diff --git a/web/src/store/requests/search.ts b/web/src/store/requests/search.ts index 99dd6de72f..9fbb3caef7 100644 --- a/web/src/store/requests/search.ts +++ b/web/src/store/requests/search.ts @@ -1,7 +1,7 @@ // Copyright 2018-2023 contributors to the Marquez project // SPDX-License-Identifier: Apache-2.0 -import { API_URL } from '../../globals' +import {API_BETA_URL, API_URL} from '../../globals' import { genericFetchWrapper } from './index' export const getSearch = async (q: string, filter = 'ALL', sort = 'NAME', limit = 100) => { @@ -12,12 +12,12 @@ export const getSearch = async (q: string, filter = 'ALL', sort = 'NAME', limit return genericFetchWrapper(url, { method: 'GET' }, 'fetchSearch') } -export const getEsSearchJobs = async (q: string) => { - const url = `${API_URL}/search/jobs?q=${q}` - return genericFetchWrapper(url, { method: 'GET' }, 'fetchEsSearchJobs') +export const getOpenSearchJobs = async (q: string) => { + const url = `${API_BETA_URL}/search/jobs?q=${q}` + return genericFetchWrapper(url, { method: 'GET' }, 'fetchOpenSearchJobs') } -export const getEsSearchDatasets = async (q: string) => { - const url = `${API_URL}/search/datasets?q=${q}` - return genericFetchWrapper(url, { method: 'GET' }, 'fetchEsSearchDatasets') +export const getOpenSearchDatasets = async (q: string) => { + const url = `${API_BETA_URL}/search/datasets?q=${q}` + return genericFetchWrapper(url, { method: 'GET' }, 'fetchOpenSearchDatasets') } diff --git a/web/src/store/sagas/index.ts b/web/src/store/sagas/index.ts index 341ec92701..d37f798562 100644 --- a/web/src/store/sagas/index.ts +++ b/web/src/store/sagas/index.ts @@ -16,8 +16,8 @@ import { FETCH_DATASET, FETCH_DATASETS, FETCH_DATASET_VERSIONS, - FETCH_ES_SEARCH_DATASETS, - FETCH_ES_SEARCH_JOBS, + FETCH_OPEN_SEARCH_DATASETS, + FETCH_OPEN_SEARCH_JOBS, FETCH_EVENTS, FETCH_INITIAL_DATASET_VERSIONS, FETCH_JOBS, @@ -34,8 +34,8 @@ import { Dataset, DatasetVersions, Datasets, - EsSearchResultDatasets, - EsSearchResultJobs, + OpenSearchResultDatasets, + OpenSearchResultJobs, Events, Facets, Jobs, @@ -88,8 +88,8 @@ import { fetchDatasetSuccess, fetchDatasetVersionsSuccess, fetchDatasetsSuccess, - fetchEsSearchDatasetsSuccess, - fetchEsSearchJobsSuccess, + fetchOpenSearchDatasetsSuccess, + fetchOpenSearchJobsSuccess, fetchEventsSuccess, fetchFacetsSuccess, fetchInitialDatasetVersionsSuccess, @@ -103,7 +103,7 @@ import { fetchTagsSuccess, } from '../actionCreators' import { getColumnLineage } from '../requests/columnlineage' -import { getEsSearchDatasets, getEsSearchJobs, getSearch } from '../requests/search' +import { getOpenSearchDatasets, getOpenSearchJobs, getSearch } from '../requests/search' import { getLineage } from '../requests/lineage' export function* fetchTags() { @@ -470,27 +470,27 @@ export function* fetchRunFacetsSaga() { } } -export function* fetchEsSearchJobsSaga() { +export function* fetchOpenSearchJobsSaga() { while (true) { try { - const { payload } = yield take(FETCH_ES_SEARCH_JOBS) - const EsSearchResultJobs: EsSearchResultJobs = yield call(getEsSearchJobs, payload.q) - yield put(fetchEsSearchJobsSuccess(EsSearchResultJobs)) + const { payload } = yield take(FETCH_OPEN_SEARCH_JOBS) + const OpenSearchResultJobs: OpenSearchResultJobs = yield call(getOpenSearchJobs, payload.q) + yield put(fetchOpenSearchJobsSuccess(OpenSearchResultJobs)) } catch (e) { yield put(applicationError('Something went wrong while searching')) } } } -export function* fetchEsSearchDatasetsSaga() { +export function* fetchOpenSearchDatasetsSaga() { while (true) { try { - const { payload } = yield take(FETCH_ES_SEARCH_DATASETS) - const EsSearchResultDatasets: EsSearchResultDatasets = yield call( - getEsSearchDatasets, + const { payload } = yield take(FETCH_OPEN_SEARCH_DATASETS) + const OpenSearchResultDatasets: OpenSearchResultDatasets = yield call( + getOpenSearchDatasets, payload.q ) - yield put(fetchEsSearchDatasetsSuccess(EsSearchResultDatasets)) + yield put(fetchOpenSearchDatasetsSuccess(OpenSearchResultDatasets)) } catch (e) { yield put(applicationError('Something went wrong while searching')) } @@ -514,8 +514,8 @@ export default function* rootSaga(): Generator { fetchColumnLineage(), fetchSearch(), deleteJobSaga(), - fetchEsSearchJobsSaga(), - fetchEsSearchDatasetsSaga(), + fetchOpenSearchJobsSaga(), + fetchOpenSearchDatasetsSaga(), deleteDatasetSaga(), deleteDatasetTagSaga(), deleteJobTagSaga(), diff --git a/web/src/types/api.ts b/web/src/types/api.ts index a464e10eed..4835d540bd 100644 --- a/web/src/types/api.ts +++ b/web/src/types/api.ts @@ -283,7 +283,7 @@ export interface ColumnLineageOutEdge { destination: string } -// esSearch +// OpenSearch // jobs interface SourceCodeFacet { @@ -293,7 +293,7 @@ interface SourceCodeFacet { sourceCode: string } -interface EsSearchFacet { +interface OpenSearchFacet { sourceCode?: SourceCodeFacet } @@ -303,8 +303,8 @@ interface JobHit { namespace: string eventType: EventType type: string - facets?: EsSearchFacet - runFacets: EsSearchRunFacet + facets?: OpenSearchFacet + runFacets: OpenSearchRunFacet } interface SparkLogicalPlan { @@ -362,7 +362,7 @@ interface EnvironmentProperties { environmentProperties: Record } -interface EsSearchRunFacet { +interface OpenSearchRunFacet { 'spark.logicalPlan'?: SparkLogicalPlan spark_version?: SparkVersion processing_engine?: ProcessingEngine @@ -373,7 +373,7 @@ interface JobHighlight { 'facets.sourceCode.sourceCode'?: string[] } -export interface EsSearchResultJobs { +export interface OpenSearchResultJobs { hits: JobHit[] highlights: JobHighlight[] } @@ -415,7 +415,7 @@ type ColumnLineageFacet = { } } -type EsSearchDatasetFacets = { +type OpenSearchDatasetFacets = { schema?: SchemaFacet columnLineage?: ColumnLineageFacet } @@ -425,10 +425,10 @@ type DatasetHit = { name: string namespace: string eventType: string - facets?: EsSearchDatasetFacets + facets?: OpenSearchDatasetFacets } -export type EsSearchResultDatasets = { +export type OpenSearchResultDatasets = { hits: DatasetHit[] highlights: DatasetHighlight[] } diff --git a/web/webpack.dev.js b/web/webpack.dev.js index 8f73c6bf1a..34bf6ffe16 100644 --- a/web/webpack.dev.js +++ b/web/webpack.dev.js @@ -43,6 +43,7 @@ const webpackDev = { __DEVELOPMENT__: JSON.stringify(true), __REACT_APP_ADVANCED_SEARCH__: JSON.stringify(process.env.REACT_APP_ADVANCED_SEARCH || true), __API_URL__: JSON.stringify('/api/v1'), + __API_BETA_URL__: JSON.stringify('/api/v2beta'), __NODE_ENV__: JSON.stringify('development'), __TEMP_ACTOR_STR__: JSON.stringify('me'), __FEEDBACK_FORM_URL__: JSON.stringify('https://forms.gle/f3tTSrZ8wPj3sHTA7'), diff --git a/web/webpack.prod.js b/web/webpack.prod.js index 0a37d03bc3..113e537537 100644 --- a/web/webpack.prod.js +++ b/web/webpack.prod.js @@ -27,6 +27,7 @@ const webpackProd = { __NODE_ENV__: JSON.stringify('production'), __REACT_APP_ADVANCED_SEARCH__: JSON.stringify(process.env.REACT_APP_ADVANCED_SEARCH || true), __API_URL__: JSON.stringify('/api/v1'), + __API_BETA_URL__: JSON.stringify('/api/v2beta'), __TEMP_ACTOR_STR__: JSON.stringify('me'), __ROLLBAR__: JSON.stringify(true), __FEEDBACK_FORM_URL__: JSON.stringify('https://forms.gle/f3tTSrZ8wPj3sHTA7'), From d13bb06162ae44eeaf1c009eba71e3874e5a8bb1 Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 6 Aug 2024 13:03:13 -0700 Subject: [PATCH 54/87] newline --- docker-compose.search.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.search.yml b/docker-compose.search.yml index 72c7ee2d67..f9443fe3f1 100644 --- a/docker-compose.search.yml +++ b/docker-compose.search.yml @@ -133,4 +133,4 @@ volumes: db-backup: networks: - opensearch-net: \ No newline at end of file + opensearch-net: From d099b777a3e812ff4b2a90c052ed3b0338752601 Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 6 Aug 2024 13:51:25 -0700 Subject: [PATCH 55/87] Updating for beta vs. non beta endpoints in search resource. --- api/src/main/java/marquez/api/SearchResource.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 2f316289bd..e255bba61b 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -42,7 +42,7 @@ import org.opensearch.client.opensearch.core.search.Hit; @Slf4j -@Path("/api/v2beta/search") +@Path("/api/") public class SearchResource { private static final String YYYY_MM_DD = "^\\d{4}-\\d{2}-\\d{2}$"; private static final String DEFAULT_SORT = "name"; @@ -63,6 +63,7 @@ public SearchResource( @ExceptionMetered @GET @Produces(APPLICATION_JSON) + @Path("v1/search") public Response search( @QueryParam("q") @NotBlank String query, @QueryParam("filter") @Nullable SearchFilter filter, @@ -88,9 +89,9 @@ public Response search( @ExceptionMetered @GET @Produces(APPLICATION_JSON) - @Path("/jobs") + @Path("v2beta/search/jobs") public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { - return formatEsResponse(this.searchService.searchJobs(query)); + return formatOpenSearchResponse(this.searchService.searchJobs(query)); } @Timed @@ -98,12 +99,12 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc @ExceptionMetered @GET @Produces(APPLICATION_JSON) - @Path("/datasets") + @Path("v2beta/search/datasets") public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { - return formatEsResponse(this.searchService.searchDatasets(query)); + return formatOpenSearchResponse(this.searchService.searchDatasets(query)); } - private Response formatEsResponse(SearchResponse response) { + private Response formatOpenSearchResponse(SearchResponse response) { List hits = response.hits().hits().stream().map(Hit::source).collect(Collectors.toList()); List>> highlights = From e8bfb3bc490fa8e53e0c60e406bb13fae0a5434f Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 6 Aug 2024 15:54:03 -0700 Subject: [PATCH 56/87] Moving search resource to its own place. --- api/src/main/java/marquez/MarquezContext.java | 8 +- .../java/marquez/api/OpenSearchResource.java | 80 +++++++++++++++++++ .../main/java/marquez/api/SearchResource.java | 58 +------------- .../main/java/marquez/client/MarquezUrl.java | 2 + 4 files changed, 90 insertions(+), 58 deletions(-) create mode 100644 api/src/main/java/marquez/api/OpenSearchResource.java diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index 42688a8319..e2e9f8cdcf 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -18,6 +18,7 @@ import marquez.api.JobResource; import marquez.api.NamespaceResource; import marquez.api.OpenLineageResource; +import marquez.api.OpenSearchResource; import marquez.api.SearchResource; import marquez.api.SourceResource; import marquez.api.TagResource; @@ -100,6 +101,7 @@ public final class MarquezContext { @Getter private final TagResource tagResource; @Getter private final OpenLineageResource openLineageResource; @Getter private final SearchResource searchResource; + @Getter private final OpenSearchResource openSearchResource; @Getter private final ImmutableList resources; @Getter private final JdbiExceptionExceptionMapper jdbiException; @Getter private final JsonProcessingExceptionMapper jsonException; @@ -172,7 +174,8 @@ private MarquezContext( this.jobResource = new JobResource(serviceFactory, jobVersionDao, jobFacetsDao, runFacetsDao); this.tagResource = new TagResource(serviceFactory); this.openLineageResource = new OpenLineageResource(serviceFactory, openLineageDao); - this.searchResource = new SearchResource(serviceFactory, searchDao); + this.searchResource = new SearchResource(searchDao); + this.openSearchResource = new OpenSearchResource(serviceFactory); this.resources = ImmutableList.of( @@ -185,7 +188,8 @@ private MarquezContext( jdbiException, jsonException, openLineageResource, - searchResource); + searchResource, + openSearchResource); final MarquezGraphqlServletBuilder servlet = new MarquezGraphqlServletBuilder(); this.graphqlServlet = servlet.getServlet(new GraphqlSchemaBuilder(jdbi)); diff --git a/api/src/main/java/marquez/api/OpenSearchResource.java b/api/src/main/java/marquez/api/OpenSearchResource.java new file mode 100644 index 0000000000..a4a0999bfd --- /dev/null +++ b/api/src/main/java/marquez/api/OpenSearchResource.java @@ -0,0 +1,80 @@ +package marquez.api; + +import static javax.ws.rs.core.MediaType.APPLICATION_JSON; + +import com.codahale.metrics.annotation.ExceptionMetered; +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import javax.validation.constraints.NotBlank; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Response; +import lombok.Getter; +import lombok.NonNull; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; +import marquez.service.SearchService; +import marquez.service.ServiceFactory; +import org.opensearch.client.opensearch.core.SearchResponse; +import org.opensearch.client.opensearch.core.search.Hit; + +@Slf4j +@Path("api/v2beta/search") +public class OpenSearchResource { + + private final SearchService searchService; + + public OpenSearchResource(@NonNull final ServiceFactory serviceFactory) { + this.searchService = serviceFactory.getSearchService(); + } + + @Timed + @ResponseMetered + @ExceptionMetered + @GET + @Produces(APPLICATION_JSON) + @Path("jobs") + public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { + return formatOpenSearchResponse(this.searchService.searchJobs(query)); + } + + @Timed + @ResponseMetered + @ExceptionMetered + @GET + @Produces(APPLICATION_JSON) + @Path("datasets") + public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { + return formatOpenSearchResponse(this.searchService.searchDatasets(query)); + } + + private Response formatOpenSearchResponse(SearchResponse response) { + List hits = + response.hits().hits().stream().map(Hit::source).collect(Collectors.toList()); + List>> highlights = + response.hits().hits().stream().map(Hit::highlight).collect(Collectors.toList()); + + return Response.ok(new OpenSearchResult(hits, highlights)).build(); + } + + @ToString + public static final class OpenSearchResult { + @Getter private final List hits; + @Getter private final List>> highlights; + + @JsonCreator + public OpenSearchResult( + @NonNull List hits, @NonNull List>> highlights) { + this.hits = hits; + this.highlights = highlights; + } + } +} diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index e255bba61b..9fbe4ff243 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -12,11 +12,7 @@ import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.databind.node.ObjectNode; -import java.io.IOException; import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; import javax.annotation.Nullable; import javax.validation.Valid; import javax.validation.constraints.Min; @@ -36,25 +32,18 @@ import marquez.api.models.SearchResult; import marquez.api.models.SearchSort; import marquez.db.SearchDao; -import marquez.service.SearchService; -import marquez.service.ServiceFactory; -import org.opensearch.client.opensearch.core.SearchResponse; -import org.opensearch.client.opensearch.core.search.Hit; @Slf4j -@Path("/api/") +@Path("api/v1/search") public class SearchResource { private static final String YYYY_MM_DD = "^\\d{4}-\\d{2}-\\d{2}$"; private static final String DEFAULT_SORT = "name"; private static final String DEFAULT_LIMIT = "10"; private static final int MIN_LIMIT = 0; - private final SearchService searchService; private final SearchDao searchDao; - public SearchResource( - @NonNull final ServiceFactory serviceFactory, @NonNull final SearchDao searchDao) { - this.searchService = serviceFactory.getSearchService(); + public SearchResource(@NonNull final SearchDao searchDao) { this.searchDao = searchDao; } @@ -63,7 +52,6 @@ public SearchResource( @ExceptionMetered @GET @Produces(APPLICATION_JSON) - @Path("v1/search") public Response search( @QueryParam("q") @NotBlank String query, @QueryParam("filter") @Nullable SearchFilter filter, @@ -84,48 +72,6 @@ public Response search( return Response.ok(new SearchResults(searchResults)).build(); } - @Timed - @ResponseMetered - @ExceptionMetered - @GET - @Produces(APPLICATION_JSON) - @Path("v2beta/search/jobs") - public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { - return formatOpenSearchResponse(this.searchService.searchJobs(query)); - } - - @Timed - @ResponseMetered - @ExceptionMetered - @GET - @Produces(APPLICATION_JSON) - @Path("v2beta/search/datasets") - public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { - return formatOpenSearchResponse(this.searchService.searchDatasets(query)); - } - - private Response formatOpenSearchResponse(SearchResponse response) { - List hits = - response.hits().hits().stream().map(Hit::source).collect(Collectors.toList()); - List>> highlights = - response.hits().hits().stream().map(Hit::highlight).collect(Collectors.toList()); - - return Response.ok(new OpenSearchResult(hits, highlights)).build(); - } - - @ToString - public static final class OpenSearchResult { - @Getter private final List hits; - @Getter private final List>> highlights; - - @JsonCreator - public OpenSearchResult( - @NonNull List hits, @NonNull List>> highlights) { - this.hits = hits; - this.highlights = highlights; - } - } - /** Wrapper for {@link SearchResult}s which also contains a {@code total count}. */ @ToString public static final class SearchResults { diff --git a/clients/java/src/main/java/marquez/client/MarquezUrl.java b/clients/java/src/main/java/marquez/client/MarquezUrl.java index 46740312bb..a38b9f12e8 100644 --- a/clients/java/src/main/java/marquez/client/MarquezUrl.java +++ b/clients/java/src/main/java/marquez/client/MarquezUrl.java @@ -211,6 +211,8 @@ URL toSearchUrl( queryParams.put("sort", sort); } queryParams.put("limit", limit); + System.out.println("peter"); + System.out.println(from(searchPath(), queryParams.build())); return from(searchPath(), queryParams.build()); } From 7608b584c7b75448bb7456646f97d8bd3e29d29c Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 6 Aug 2024 15:54:50 -0700 Subject: [PATCH 57/87] Removing prints. --- clients/java/src/main/java/marquez/client/MarquezUrl.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/clients/java/src/main/java/marquez/client/MarquezUrl.java b/clients/java/src/main/java/marquez/client/MarquezUrl.java index a38b9f12e8..46740312bb 100644 --- a/clients/java/src/main/java/marquez/client/MarquezUrl.java +++ b/clients/java/src/main/java/marquez/client/MarquezUrl.java @@ -211,8 +211,6 @@ URL toSearchUrl( queryParams.put("sort", sort); } queryParams.put("limit", limit); - System.out.println("peter"); - System.out.println(from(searchPath(), queryParams.build())); return from(searchPath(), queryParams.build()); } From 2c9cd6c2b691a123aafc31c09b10417f60d1855b Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 6 Aug 2024 16:24:50 -0700 Subject: [PATCH 58/87] Removing all helm changes for this work stream. --- chart/Chart.lock | 6 +----- chart/Chart.yaml | 4 ---- chart/README.md | 22 ++++++++++------------ chart/templates/marquez/deployment.yaml | 2 -- chart/values.yaml | 7 ------- 5 files changed, 11 insertions(+), 30 deletions(-) diff --git a/chart/Chart.lock b/chart/Chart.lock index 0b5dcb679d..169a67fb36 100644 --- a/chart/Chart.lock +++ b/chart/Chart.lock @@ -5,8 +5,4 @@ dependencies: - name: postgresql repository: https://charts.bitnami.com/bitnami version: 11.8.1 -- name: opensearch - repository: https://opensearch-project.github.io/helm-charts - version: 2.21.0 -digest: sha256:b7f71608c49ded8cd4ea7658d253f6b36f120e3a934281b9c70657115d7cdf9d -generated: "2024-07-15T13:21:32.634905-07:00" + diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 3736da4ec0..5623d2e246 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -10,10 +10,6 @@ dependencies: name: postgresql repository: https://charts.bitnami.com/bitnami version: 11.8.1 - - condition: opensearch.enabled - name: opensearch - repository: https://opensearch-project.github.io/helm-charts - version: 2.21.0 description: Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata. home: https://github.com/MarquezProject/marquez/tree/main/chart icon: https://raw.githubusercontent.com/MarquezProject/marquez/main/web/src/img/marquez-logo.png diff --git a/chart/README.md b/chart/README.md index d7127e7291..d36f273371 100644 --- a/chart/README.md +++ b/chart/README.md @@ -19,7 +19,7 @@ helm install marquez . --dependency-update To install the chart with the release name `marquez` using a fresh Postgres instance. ```bash -helm install marquez . --dependency-update --set postgresql.enabled=true --set opensearch.enabled=true +helm install marquez . --dependency-update --set postgresql.enabled=true ``` > **Note:** For a list of parameters that can be overridden during installation, see the [configuration](#configuration) section. @@ -84,16 +84,14 @@ helm delete marquez ### [Postgres](https://github.com/bitnami/charts/blob/master/bitnami/postgresql/values.yaml) (sub-chart) **parameters** -| Parameter | Description | Default | -|----------------------------------|--------------------------------|-----------| -| `postgresql.enabled` | Deploy PostgreSQL container(s) | `false` | -| `opensearch.enabled` | Deploy Opensearch container(s) | `false` | -| `postgresql.image.tag` | PostgreSQL image version | `12.1.0` | -| `postgresql.auth.username` | PostgreSQL username | `buendia` | -| `postgresql.auth.password` | PostgreSQL password | `macondo` | -| `postgresql.auth.database` | PostgreSQL database | `marquez` | -| `opensearch.auth.database` | OpenSearch password | `marquez` | -| `postgresql.auth.existingSecret` | Name of existing secret object | `nil` | +| Parameter | Description | Default | +|----------------------------------|---------------------------------|-----------| +| `postgresql.enabled` | Deploy PostgreSQL container(s) | `false` | +| `postgresql.image.tag` | PostgreSQL image version | `12.1.0` | +| `postgresql.auth.username` | PostgreSQL username | `buendia` | +| `postgresql.auth.password` | PostgreSQL password | `macondo` | +| `postgresql.auth.database` | PostgreSQL database | `marquez` | +| `postgresql.auth.existingSecret` | Name of existing secret object | `nil` | ### Common **parameters** @@ -130,7 +128,7 @@ helm delete marquez The quickest way to install Marquez via Kubernetes is to create a local Postgres instance. ```bash -helm install marquez . --dependency-update --set postgresql.enabled=true --set opensearch.enabled=true +helm install marquez . --dependency-update --set postgresql.enabled=true ``` ### Docker Postgres diff --git a/chart/templates/marquez/deployment.yaml b/chart/templates/marquez/deployment.yaml index a327c879ed..9fc5bbc8f1 100644 --- a/chart/templates/marquez/deployment.yaml +++ b/chart/templates/marquez/deployment.yaml @@ -91,8 +91,6 @@ spec: key: {{ include "marquez.database.existingsecret.key" . }} - name: MIGRATE_ON_STARTUP value: {{ .Values.marquez.migrateOnStartup | quote }} - - name: SEARCH_PASSWORD - value: = {{ "Marquez4Ever!" }} {{- if .Values.marquez.resources }} resources: {{- toYaml .Values.marquez.resources | nindent 12 }} {{- end }} diff --git a/chart/values.yaml b/chart/values.yaml index 918bae1980..4374e8d863 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -139,13 +139,6 @@ postgresql: ## existingSecret: "" -opensearch: - enabled: false - ## Opensearch password - extraEnvs: - - name: OPENSEARCH_INITIAL_ADMIN_PASSWORD - value: Marquez4Ever! - ## Additional labels to all the deployed resources; note that ## the following standard labels will automatically be applied. ## app.kubernetes.io/name, helm.sh/chart, From 3cb90b2d872c0d1fe12e1e8e7f10cdc874603c1d Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 6 Aug 2024 16:26:39 -0700 Subject: [PATCH 59/87] Adding back lock file contents. --- chart/Chart.lock | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chart/Chart.lock b/chart/Chart.lock index 169a67fb36..58554a821c 100644 --- a/chart/Chart.lock +++ b/chart/Chart.lock @@ -5,4 +5,5 @@ dependencies: - name: postgresql repository: https://charts.bitnami.com/bitnami version: 11.8.1 - +digest: sha256:5d4b20341df7c1d2a1e1e16a9e3248a5e4eabf765b307bb05acf13447ff51ae5 +generated: "2022-11-10T20:03:21.425592157Z" From 4158d54d8c7dc5d33e11e56be9e8854abc7e5045 Mon Sep 17 00:00:00 2001 From: phixMe Date: Tue, 6 Aug 2024 16:28:01 -0700 Subject: [PATCH 60/87] Adding header --- api/src/main/java/marquez/api/OpenSearchResource.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/src/main/java/marquez/api/OpenSearchResource.java b/api/src/main/java/marquez/api/OpenSearchResource.java index a4a0999bfd..05ec62c76e 100644 --- a/api/src/main/java/marquez/api/OpenSearchResource.java +++ b/api/src/main/java/marquez/api/OpenSearchResource.java @@ -1,3 +1,8 @@ +/* + * Copyright 2018-2024 contributors to the Marquez project + * SPDX-License-Identifier: Apache-2.0 + */ + package marquez.api; import static javax.ws.rs.core.MediaType.APPLICATION_JSON; From f05bd15a3caf5baa8f36429ba8fd9f658251c451 Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 9 Aug 2024 15:13:15 -0700 Subject: [PATCH 61/87] Adding middleware proxy. --- web/setupProxy.js | 1 + 1 file changed, 1 insertion(+) diff --git a/web/setupProxy.js b/web/setupProxy.js index b83f8ead23..61786ed6b1 100644 --- a/web/setupProxy.js +++ b/web/setupProxy.js @@ -15,6 +15,7 @@ app.use('/events', express.static(path)) app.use('/lineage/:type/:namespace/:name', express.static(path)) app.use('/datasets/column-level/:namespace/:name', express.static(path)) app.use(createProxyMiddleware('/api/v1', apiOptions)) +app.use(createProxyMiddleware('/api/v2beta', apiOptions)) router.get('/healthcheck', function (req, res) { res.send('OK') From 61672cd9a78f64e19bb52838995d96bbcdcbbcfc Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 9 Aug 2024 15:38:05 -0700 Subject: [PATCH 62/87] Code review updates. --- api/src/main/java/marquez/MarquezContext.java | 7 +++---- api/src/main/java/marquez/api/SearchResource.java | 2 +- .../SearchResource.java} | 8 ++++---- api/src/main/java/marquez/service/SearchService.java | 6 ++++++ build.gradle | 2 -- 5 files changed, 14 insertions(+), 11 deletions(-) rename api/src/main/java/marquez/api/{OpenSearchResource.java => v2beta/SearchResource.java} (94%) diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index e2e9f8cdcf..910a1b6a73 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -18,7 +18,6 @@ import marquez.api.JobResource; import marquez.api.NamespaceResource; import marquez.api.OpenLineageResource; -import marquez.api.OpenSearchResource; import marquez.api.SearchResource; import marquez.api.SourceResource; import marquez.api.TagResource; @@ -100,8 +99,8 @@ public final class MarquezContext { @Getter private final JobResource jobResource; @Getter private final TagResource tagResource; @Getter private final OpenLineageResource openLineageResource; + @Getter private final marquez.api.v2beta.SearchResource v2BetasearchResource; @Getter private final SearchResource searchResource; - @Getter private final OpenSearchResource openSearchResource; @Getter private final ImmutableList resources; @Getter private final JdbiExceptionExceptionMapper jdbiException; @Getter private final JsonProcessingExceptionMapper jsonException; @@ -175,7 +174,7 @@ private MarquezContext( this.tagResource = new TagResource(serviceFactory); this.openLineageResource = new OpenLineageResource(serviceFactory, openLineageDao); this.searchResource = new SearchResource(searchDao); - this.openSearchResource = new OpenSearchResource(serviceFactory); + this.v2BetasearchResource = new marquez.api.v2beta.SearchResource(serviceFactory); this.resources = ImmutableList.of( @@ -189,7 +188,7 @@ private MarquezContext( jsonException, openLineageResource, searchResource, - openSearchResource); + v2BetasearchResource); final MarquezGraphqlServletBuilder servlet = new MarquezGraphqlServletBuilder(); this.graphqlServlet = servlet.getServlet(new GraphqlSchemaBuilder(jdbi)); diff --git a/api/src/main/java/marquez/api/SearchResource.java b/api/src/main/java/marquez/api/SearchResource.java index 9fbe4ff243..dcc8d3206c 100644 --- a/api/src/main/java/marquez/api/SearchResource.java +++ b/api/src/main/java/marquez/api/SearchResource.java @@ -34,7 +34,7 @@ import marquez.db.SearchDao; @Slf4j -@Path("api/v1/search") +@Path("/api/v1/search") public class SearchResource { private static final String YYYY_MM_DD = "^\\d{4}-\\d{2}-\\d{2}$"; private static final String DEFAULT_SORT = "name"; diff --git a/api/src/main/java/marquez/api/OpenSearchResource.java b/api/src/main/java/marquez/api/v2beta/SearchResource.java similarity index 94% rename from api/src/main/java/marquez/api/OpenSearchResource.java rename to api/src/main/java/marquez/api/v2beta/SearchResource.java index 05ec62c76e..a6bb6521af 100644 --- a/api/src/main/java/marquez/api/OpenSearchResource.java +++ b/api/src/main/java/marquez/api/v2beta/SearchResource.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package marquez.api; +package marquez.api.v2beta; import static javax.ws.rs.core.MediaType.APPLICATION_JSON; @@ -32,12 +32,12 @@ import org.opensearch.client.opensearch.core.search.Hit; @Slf4j -@Path("api/v2beta/search") -public class OpenSearchResource { +@Path("/api/v2beta/search") +public class SearchResource { private final SearchService searchService; - public OpenSearchResource(@NonNull final ServiceFactory serviceFactory) { + public SearchResource(@NonNull final ServiceFactory serviceFactory) { this.searchService = serviceFactory.getSearchService(); } diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index b1d1405149..d57f5b35c1 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -64,8 +64,10 @@ public class SearchService { }; private final OpenSearchClient openSearchClient; + private final SearchConfig searchConfig; public SearchService(SearchConfig searchConfig) { + this.searchConfig = searchConfig; final HttpHost host = new HttpHost(searchConfig.getHost(), searchConfig.getPort(), searchConfig.getScheme()); final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); @@ -153,6 +155,10 @@ public SearchResponse searchJobs(String query) throws IOException { } public void indexEvent(@Valid @NotNull LineageEvent event) { + if (!searchConfig.isEnabled()) { + log.debug("Search is disabled, skipping indexing"); + return; + } UUID runUuid = runUuidFromEvent(event.getRun()); log.debug("Indexing event {}", event); diff --git a/build.gradle b/build.gradle index e6f0237ffb..45baf0edac 100644 --- a/build.gradle +++ b/build.gradle @@ -53,7 +53,6 @@ subprojects { ext { assertjVersion = '3.25.3' dropwizardVersion = '2.1.12' - jacksonDatabindVersion = '2.12.3' jacocoVersion = '0.8.12' junit5Version = '5.10.2' lombokVersion = '1.18.34' @@ -65,7 +64,6 @@ subprojects { dependencies { implementation "org.projectlombok:lombok:${lombokVersion}" - implementation "com.fasterxml.jackson.core:jackson-databind:${jacksonDatabindVersion}" implementation 'org.opensearch.client:opensearch-rest-client:2.15.0' implementation 'org.opensearch.client:opensearch-java:2.6.0' annotationProcessor "org.projectlombok:lombok:${lombokVersion}" From fdeaaa198853585d85b51d0363e2bbfe2e02291c Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 9 Aug 2024 15:49:29 -0700 Subject: [PATCH 63/87] Moving from outer gradle to api gradle. --- api/build.gradle | 3 +++ build.gradle | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/api/build.gradle b/api/build.gradle index b5a1495511..2d90f41958 100644 --- a/api/build.gradle +++ b/api/build.gradle @@ -51,6 +51,9 @@ dependencies { implementation 'com.graphql-java:graphql-java:20.9' implementation 'com.graphql-java-kickstart:graphql-java-servlet:12.0.0' + implementation 'org.opensearch.client:opensearch-rest-client:2.15.0' + implementation 'org.opensearch.client:opensearch-java:2.6.0' + testImplementation "io.dropwizard:dropwizard-testing:${dropwizardVersion}" testImplementation "org.jdbi:jdbi3-testing:${jdbi3Version}" testImplementation "org.jdbi:jdbi3-testcontainers:${jdbi3Version}" diff --git a/build.gradle b/build.gradle index 45baf0edac..2d9cea24fb 100644 --- a/build.gradle +++ b/build.gradle @@ -64,8 +64,6 @@ subprojects { dependencies { implementation "org.projectlombok:lombok:${lombokVersion}" - implementation 'org.opensearch.client:opensearch-rest-client:2.15.0' - implementation 'org.opensearch.client:opensearch-java:2.6.0' annotationProcessor "org.projectlombok:lombok:${lombokVersion}" testImplementation "org.assertj:assertj-core:${assertjVersion}" From dfd60e63ffbeeba4922a966527ecf5e4deacb5ed Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 9 Aug 2024 18:02:15 -0700 Subject: [PATCH 64/87] Removing extra containers. --- docker-compose.search.yml | 42 ++------------------------------------- 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/docker-compose.search.yml b/docker-compose.search.yml index f9443fe3f1..f37fb384c8 100644 --- a/docker-compose.search.yml +++ b/docker-compose.search.yml @@ -63,8 +63,8 @@ services: environment: - cluster.name=opensearch-cluster - node.name=opensearch - - discovery.seed_hosts=opensearch,opensearch-2 - - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 + - discovery.seed_hosts=opensearch + - cluster.initial_cluster_manager_nodes=opensearch - bootstrap.memory_lock=true - plugins.security.ssl.http.enabled=false - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" @@ -79,43 +79,6 @@ services: networks: - opensearch-net - opensearch-2: - image: opensearchproject/opensearch:2.5.0 - container_name: opensearch-2 - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-2 - - discovery.seed_hosts=opensearch,opensearch-2 - - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 - - bootstrap.memory_lock=true - - plugins.security.ssl.http.enabled=false - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" - - OPENSEARCH_PASSWORD=admin - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 65536 - volumes: - - opensearch-2-data:/usr/share/opensearch/data - networks: - - opensearch-net - - opensearch-dashboards: - image: opensearchproject/opensearch-dashboards:2.5.0 - container_name: opensearch-dashboards - ports: - - 5601:5601 # Map host port 5601 to container port 5601 - expose: - - "5601" # Expose port 5601 for web access to OpenSearch Dashboards - environment: - - 'OPENSEARCH_HOSTS=["http://opensearch:9200","http://opensearch-2:9200"]' - - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards - networks: - - opensearch-net - web: networks: - opensearch-net @@ -127,7 +90,6 @@ services: volumes: data: opensearch-data: - opensearch-2-data: db-conf: db-init: db-backup: From d4649ff431db48f838f22717e3c6ab752750972c Mon Sep 17 00:00:00 2001 From: phixMe Date: Fri, 9 Aug 2024 18:02:15 -0700 Subject: [PATCH 65/87] Removing extra containers. --- docker-compose.search.yml | 106 ++------------------------------------ docker-compose.seed.yml | 3 +- docker-compose.yml | 8 +-- docker/volumes.sh | 4 +- marquez.dev.yml | 8 +-- 5 files changed, 13 insertions(+), 116 deletions(-) diff --git a/docker-compose.search.yml b/docker-compose.search.yml index f9443fe3f1..28ee1623fd 100644 --- a/docker-compose.search.yml +++ b/docker-compose.search.yml @@ -1,53 +1,10 @@ version: "3.7" services: api: - image: "marquezproject/marquez:${TAG}" - container_name: marquez-api - environment: - - MARQUEZ_PORT=${API_PORT} - - MARQUEZ_ADMIN_PORT=${API_ADMIN_PORT} - ports: - - "${API_PORT}:${API_PORT}" - - "${API_ADMIN_PORT}:${API_ADMIN_PORT}" - volumes: - - data:/opt/marquez - links: - - "db:postgres" - - opensearch depends_on: - - opensearch - db - entrypoint: - - /opt/marquez/wait-for-it.sh - - db:${POSTGRES_PORT} - - -- - - /opt/marquez/wait-for-it.sh - - opensearch:${SEARCH_PORT} - - -- - - ./entrypoint.sh - networks: - - opensearch-net - - db: - image: postgres:14 - container_name: marquez-db - ports: - - "${POSTGRES_PORT}:${POSTGRES_PORT}" - environment: - - POSTGRES_USER=postgres - - POSTGRES_PASSWORD=password - - MARQUEZ_DB=marquez - - MARQUEZ_USER=marquez - - MARQUEZ_PASSWORD=marquez - volumes: - - db-conf:/etc/postgresql - - db-init:/docker-entrypoint-initdb.d - - db-backup:/var/lib/postgresql/data - command: [ "postgres", "-c", "config_file=/etc/postgresql/postgresql.conf" ] - # Enables SQL statement logging (see: https://www.postgresql.org/docs/12/runtime-config-logging.html#GUC-LOG-STATEMENT) - # command: ["postgres", "-c", "log_statement=all"] - networks: - - opensearch-net + - opensearch + entrypoint: ["/opt/marquez/wait-for-it.sh", "db:${POSTGRES_PORT}", "--", "/opt/marquez/wait-for-it.sh", "opensearch:${SEARCH_PORT}", "--", "./entrypoint.sh"] opensearch: image: opensearchproject/opensearch:2.5.0 @@ -63,8 +20,8 @@ services: environment: - cluster.name=opensearch-cluster - node.name=opensearch - - discovery.seed_hosts=opensearch,opensearch-2 - - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 + - discovery.seed_hosts=opensearch + - cluster.initial_cluster_manager_nodes=opensearch - bootstrap.memory_lock=true - plugins.security.ssl.http.enabled=false - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" @@ -76,61 +33,6 @@ services: ports: - "9200:9200" - "9300:9300" - networks: - - opensearch-net - - opensearch-2: - image: opensearchproject/opensearch:2.5.0 - container_name: opensearch-2 - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-2 - - discovery.seed_hosts=opensearch,opensearch-2 - - cluster.initial_cluster_manager_nodes=opensearch,opensearch-2 - - bootstrap.memory_lock=true - - plugins.security.ssl.http.enabled=false - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" - - OPENSEARCH_PASSWORD=admin - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 65536 - volumes: - - opensearch-2-data:/usr/share/opensearch/data - networks: - - opensearch-net - - opensearch-dashboards: - image: opensearchproject/opensearch-dashboards:2.5.0 - container_name: opensearch-dashboards - ports: - - 5601:5601 # Map host port 5601 to container port 5601 - expose: - - "5601" # Expose port 5601 for web access to OpenSearch Dashboards - environment: - - 'OPENSEARCH_HOSTS=["http://opensearch:9200","http://opensearch-2:9200"]' - - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards - networks: - - opensearch-net - - web: - networks: - - opensearch-net - - seed_marquez: - depends_on: - - opensearch volumes: - data: opensearch-data: - opensearch-2-data: - db-conf: - db-init: - db-backup: - -networks: - opensearch-net: diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index 466e998cd1..c2e6d7d6a6 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -9,8 +9,7 @@ services: - ./docker/wait-for-it.sh:/usr/src/app/wait-for-it.sh - ./docker/seed.sh:/usr/src/app/seed.sh - ./docker/metadata.json:/usr/src/app/metadata.json - links: - - "db:postgres" depends_on: - api + - opensearch entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "opensearch:${SEARCH_PORT}", "--", "./seed.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index b8b4403d4a..c2148aef0d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,15 +11,9 @@ services: - "${API_ADMIN_PORT}:${API_ADMIN_PORT}" volumes: - data:/opt/marquez - links: - - "db:postgres" depends_on: - db - entrypoint: - - /opt/marquez/wait-for-it.sh - - db:${POSTGRES_PORT} - - -- - - ./entrypoint.sh + entrypoint: ["/opt/marquez/wait-for-it.sh", "db:${POSTGRES_PORT}", "--", "./entrypoint.sh"] db: image: postgres:14 diff --git a/docker/volumes.sh b/docker/volumes.sh index 7eb8662d73..7f134e5cdc 100755 --- a/docker/volumes.sh +++ b/docker/volumes.sh @@ -41,14 +41,16 @@ data_volume="${volume_prefix}_data" db_conf_volume="${volume_prefix}_db-conf" db_init_volume="${volume_prefix}_db-init" db_backup_volume="${volume_prefix}_db-backup" +opensearch_volume="${volume_prefix}_opensearch-data" -echo "...creating volumes: ${data_volume}, ${db_conf_volume}, ${db_init_volume}, ${db_backup_volume}" +echo "...creating volumes: ${data_volume}, ${db_conf_volume}, ${db_init_volume}, ${db_backup_volume} ${opensearch_volume}" # Create persistent volumes for Marquez docker volume create "${data_volume}" > /dev/null docker volume create "${db_conf_volume}" > /dev/null docker volume create "${db_init_volume}" > /dev/null docker volume create "${db_backup_volume}" > /dev/null +docker volume create "${opensearch_volume}" > /dev/null # Provision persistent volumes for Marquez docker create --name volumes-provisioner \ diff --git a/marquez.dev.yml b/marquez.dev.yml index 77a787216f..b3b2bcf581 100644 --- a/marquez.dev.yml +++ b/marquez.dev.yml @@ -9,7 +9,7 @@ server: db: driverClass: org.postgresql.Driver - url: jdbc:postgresql://postgres:${POSTGRES_PORT:-5432}/marquez + url: jdbc:postgresql://${POSTGRES_HOST:-db}:${POSTGRES_PORT:-5432}/marquez user: marquez password: marquez @@ -26,10 +26,10 @@ logging: search: enabled: true scheme: http - host: opensearch - port: 9200 + host: ${SEARCH_HOST:-opensearch} + port: ${SEARCH_PORT:-9200} username: admin - password: ${SEARCH_PASSWORD:-admin} + password: admin tags: - name: PII From b83254cb40a23f24c693ad8793768d4d05be568b Mon Sep 17 00:00:00 2001 From: wslulciuc Date: Sat, 10 Aug 2024 11:17:56 -0700 Subject: [PATCH 66/87] Set timeout for seed container to 60s Signed-off-by: wslulciuc --- docker-compose.search.yml | 2 +- docker-compose.seed.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docker-compose.search.yml b/docker-compose.search.yml index 28ee1623fd..1ddbeb6499 100644 --- a/docker-compose.search.yml +++ b/docker-compose.search.yml @@ -8,7 +8,7 @@ services: opensearch: image: opensearchproject/opensearch:2.5.0 - container_name: opensearch + container_name: marquez-search hostname: opensearch ulimits: memlock: diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index c2e6d7d6a6..c3b5e1e468 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -11,5 +11,4 @@ services: - ./docker/metadata.json:/usr/src/app/metadata.json depends_on: - api - - opensearch - entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--", "./wait-for-it.sh", "opensearch:${SEARCH_PORT}", "--", "./seed.sh"] + entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--timeout=60", "--", "./seed.sh"] From 73726a7d1806a58e90a30a2c55e3386d4559cd0f Mon Sep 17 00:00:00 2001 From: phixMe Date: Sun, 11 Aug 2024 19:01:32 -0700 Subject: [PATCH 67/87] Fixing `--no-search` and frontend config. --- .../main/java/marquez/api/v2beta/SearchResource.java | 6 ++++++ api/src/main/java/marquez/service/SearchService.java | 10 ++++++++++ docker-compose.seed.yml | 1 + docker-compose.web.yml | 6 +++++- docker-compose.yml | 1 + docker/up.sh | 8 +++++++- marquez.dev.yml | 2 +- web/Dockerfile | 8 +++++++- web/src/components/search/Search.tsx | 2 +- web/webpack.dev.js | 2 +- web/webpack.prod.js | 2 +- 11 files changed, 41 insertions(+), 7 deletions(-) diff --git a/api/src/main/java/marquez/api/v2beta/SearchResource.java b/api/src/main/java/marquez/api/v2beta/SearchResource.java index a6bb6521af..527a50f7e5 100644 --- a/api/src/main/java/marquez/api/v2beta/SearchResource.java +++ b/api/src/main/java/marquez/api/v2beta/SearchResource.java @@ -48,6 +48,9 @@ public SearchResource(@NonNull final ServiceFactory serviceFactory) { @Produces(APPLICATION_JSON) @Path("jobs") public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { + if (searchService.isEnabled()) { + return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); + } return formatOpenSearchResponse(this.searchService.searchJobs(query)); } @@ -58,6 +61,9 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc @Produces(APPLICATION_JSON) @Path("datasets") public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { + if (searchService.isEnabled()) { + return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); + } return formatOpenSearchResponse(this.searchService.searchDatasets(query)); } diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index d57f5b35c1..1ca6145e55 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -68,6 +68,11 @@ public class SearchService { public SearchService(SearchConfig searchConfig) { this.searchConfig = searchConfig; + if (!searchConfig.isEnabled()) { + log.info("Search is disabled, skipping initialization"); + this.openSearchClient = null; + return; + } final HttpHost host = new HttpHost(searchConfig.getHost(), searchConfig.getPort(), searchConfig.getScheme()); final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); @@ -242,4 +247,9 @@ private void index(IndexRequest> request) { log.error("Failed to index event OpenSearch not available.", e); } } + + public boolean isEnabled() { + System.out.println("SearchConfig: " + searchConfig.isEnabled()); + return !searchConfig.isEnabled(); + } } diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index c2e6d7d6a6..0be7f73a5d 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -5,6 +5,7 @@ services: container_name: seed-marquez-with-metadata environment: - MARQUEZ_URL=http://api:${API_PORT} + - SEARCH_ENABLED=false volumes: - ./docker/wait-for-it.sh:/usr/src/app/wait-for-it.sh - ./docker/seed.sh:/usr/src/app/seed.sh diff --git a/docker-compose.web.yml b/docker-compose.web.yml index f1c615f062..9b46541a87 100644 --- a/docker-compose.web.yml +++ b/docker-compose.web.yml @@ -3,11 +3,15 @@ services: web: image: "marquezproject/marquez-web:${TAG}" container_name: marquez-web + build: + context: . + args: + REACT_APP_ADVANCED_SEARCH: ${SEARCH_ENABLED} environment: - MARQUEZ_HOST=api - MARQUEZ_PORT=${API_PORT} - WEB_PORT=${WEB_PORT} - - REACT_APP_ADVANCED_SEARCH=false + - REACT_APP_ADVANCED_SEARCH=${SEARCH_ENABLED} ports: - "${WEB_PORT}:${WEB_PORT}" depends_on: diff --git a/docker-compose.yml b/docker-compose.yml index c2148aef0d..1e03ea49ba 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,7 @@ services: environment: - MARQUEZ_PORT=${API_PORT} - MARQUEZ_ADMIN_PORT=${API_ADMIN_PORT} + - SEARCH_ENABLED=${SEARCH_ENABLED} ports: - "${API_PORT}:${API_PORT}" - "${API_ADMIN_PORT}:${API_ADMIN_PORT}" diff --git a/docker/up.sh b/docker/up.sh index eaecd8cbab..590a4e3a4c 100755 --- a/docker/up.sh +++ b/docker/up.sh @@ -160,6 +160,12 @@ if [[ "${NO_VOLUMES}" = "false" ]]; then ./docker/volumes.sh marquez fi +# Enable search in UI an API if search container is enabled +SEARCH_ENABLED="true" +if [[ "${NO_SEARCH}" = "true" ]]; then + SEARCH_ENABLED="false" +fi + # Run docker compose cmd with overrides -DOCKER_SCAN_SUGGEST="false" API_PORT=${API_PORT} API_ADMIN_PORT=${API_ADMIN_PORT} WEB_PORT=${WEB_PORT} POSTGRES_PORT=${POSTGRES_PORT} SEARCH_PORT=${SEARCH_PORT} TAG=${TAG} \ +DOCKER_SCAN_SUGGEST="false" API_PORT=${API_PORT} API_ADMIN_PORT=${API_ADMIN_PORT} WEB_PORT=${WEB_PORT} POSTGRES_PORT=${POSTGRES_PORT} SEARCH_ENABLED=${SEARCH_ENABLED} SEARCH_PORT=${SEARCH_PORT} TAG=${TAG} \ docker --log-level ERROR compose $compose_files up $compose_args diff --git a/marquez.dev.yml b/marquez.dev.yml index b3b2bcf581..2bc9bcd7ae 100644 --- a/marquez.dev.yml +++ b/marquez.dev.yml @@ -24,7 +24,7 @@ logging: - type: console search: - enabled: true + enabled: ${SEARCH_ENABLED:-true} scheme: http host: ${SEARCH_HOST:-opensearch} port: ${SEARCH_PORT:-9200} diff --git a/web/Dockerfile b/web/Dockerfile index 28a9841563..4c09ddb01b 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -1,11 +1,17 @@ FROM node:18-alpine WORKDIR /usr/src/app + +ARG REACT_APP_ADVANCED_SEARCH + RUN apk update && apk add --virtual bash coreutils RUN apk add --no-cache git COPY package*.json ./ + +ENV REACT_APP_ADVANCED_SEARCH=$REACT_APP_ADVANCED_SEARCH + RUN npm install COPY . . -RUN npm run build +RUN REACT_APP_ADVANCED_SEARCH=$REACT_APP_ADVANCED_SEARCH npm run build COPY docker/entrypoint.sh entrypoint.sh EXPOSE 3000 ENTRYPOINT ["/usr/src/app/entrypoint.sh"] diff --git a/web/src/components/search/Search.tsx b/web/src/components/search/Search.tsx index 59036df390..1f8b3831b4 100644 --- a/web/src/components/search/Search.tsx +++ b/web/src/components/search/Search.tsx @@ -15,7 +15,7 @@ import OpenSearch from './open-search/OpenSearch' import IconButton from '@mui/material/IconButton' import React, { useEffect, useRef, useState } from 'react' import SearchPlaceholder from './SearchPlaceholder' -import {REACT_APP_ADVANCED_SEARCH} from "../../globals"; +import { REACT_APP_ADVANCED_SEARCH } from "../../globals"; const useCmdKShortcut = (callback: () => void) => { useEffect(() => { diff --git a/web/webpack.dev.js b/web/webpack.dev.js index 34bf6ffe16..84449f2c9f 100644 --- a/web/webpack.dev.js +++ b/web/webpack.dev.js @@ -41,7 +41,7 @@ const webpackDev = { plugins: [ new webpack.DefinePlugin({ __DEVELOPMENT__: JSON.stringify(true), - __REACT_APP_ADVANCED_SEARCH__: JSON.stringify(process.env.REACT_APP_ADVANCED_SEARCH || true), + __REACT_APP_ADVANCED_SEARCH__: true, __API_URL__: JSON.stringify('/api/v1'), __API_BETA_URL__: JSON.stringify('/api/v2beta'), __NODE_ENV__: JSON.stringify('development'), diff --git a/web/webpack.prod.js b/web/webpack.prod.js index 113e537537..a20dfef90e 100644 --- a/web/webpack.prod.js +++ b/web/webpack.prod.js @@ -25,7 +25,7 @@ const webpackProd = { new webpack.DefinePlugin({ __DEVELOPMENT__: JSON.stringify(false), __NODE_ENV__: JSON.stringify('production'), - __REACT_APP_ADVANCED_SEARCH__: JSON.stringify(process.env.REACT_APP_ADVANCED_SEARCH || true), + __REACT_APP_ADVANCED_SEARCH__: process.env.REACT_APP_ADVANCED_SEARCH === 'true', __API_URL__: JSON.stringify('/api/v1'), __API_BETA_URL__: JSON.stringify('/api/v2beta'), __TEMP_ACTOR_STR__: JSON.stringify('me'), From 15c6697ac2af85c0cbd27aa8e7adf44b32997986 Mon Sep 17 00:00:00 2001 From: wslulciuc Date: Mon, 12 Aug 2024 10:36:18 -0700 Subject: [PATCH 68/87] Add check before indexing ol event Signed-off-by: wslulciuc --- api/src/main/java/marquez/api/OpenLineageResource.java | 4 +++- api/src/main/java/marquez/service/SearchService.java | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/marquez/api/OpenLineageResource.java b/api/src/main/java/marquez/api/OpenLineageResource.java index 764e01df5b..bb813b8fc8 100644 --- a/api/src/main/java/marquez/api/OpenLineageResource.java +++ b/api/src/main/java/marquez/api/OpenLineageResource.java @@ -68,7 +68,9 @@ public OpenLineageResource( public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncResponse asyncResponse) throws JsonProcessingException, SQLException { if (event instanceof LineageEvent) { - serviceFactory.getSearchService().indexEvent((LineageEvent) event); + if (serviceFactory.getSearchService().isEnabled()) { + serviceFactory.getSearchService().indexEvent((LineageEvent) event); + } openLineageService .createAsync((LineageEvent) event) .whenComplete((result, err) -> onComplete(result, err, asyncResponse)); diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index 1ca6145e55..ab7173c570 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -249,7 +249,6 @@ private void index(IndexRequest> request) { } public boolean isEnabled() { - System.out.println("SearchConfig: " + searchConfig.isEnabled()); return !searchConfig.isEnabled(); } } From d47a6c49e2d2f58f1d7daa4071f1a9a61204f9af Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Tue, 13 Aug 2024 11:58:19 +0200 Subject: [PATCH 69/87] feat: add search subproject Signed-off-by: Yannick Libert --- build.gradle | 5 +++++ search/build.gradle | 15 +++++++++++++++ .../searchengine/SearchServiceApplication.java | 3 +++ settings.gradle | 1 + 4 files changed, 24 insertions(+) create mode 100644 search/build.gradle create mode 100644 search/src/main/java/marquez/searchengine/SearchServiceApplication.java diff --git a/build.gradle b/build.gradle index 2d9cea24fb..2bb58b94f1 100644 --- a/build.gradle +++ b/build.gradle @@ -45,6 +45,11 @@ subprojects { archivesBaseName = 'marquez-api' } + project(':search') { + apply plugin: 'application' + archivesBaseName = 'marquez-search' + } + project(':clients:java') { apply plugin: 'java-library' archivesBaseName = 'marquez-java' diff --git a/search/build.gradle b/search/build.gradle new file mode 100644 index 0000000000..ea51513dc5 --- /dev/null +++ b/search/build.gradle @@ -0,0 +1,15 @@ +ext { + luceneVersion = '9.11.1' +} + +dependencies { + implementation "org.apache.lucene:lucene-core:${luceneVersion}" + implementation "org.apache.lucene:lucene-queryparser:${luceneVersion}" + implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" + + implementation "io.dropwizard:dropwizard-core:${dropwizardVersion}" +} + +application { + mainClassName = 'marquez.searchengine.SearchServiceApplication' +} \ No newline at end of file diff --git a/search/src/main/java/marquez/searchengine/SearchServiceApplication.java b/search/src/main/java/marquez/searchengine/SearchServiceApplication.java new file mode 100644 index 0000000000..76c73ad03e --- /dev/null +++ b/search/src/main/java/marquez/searchengine/SearchServiceApplication.java @@ -0,0 +1,3 @@ +package marquez.searchengine; + +public class SearchServiceApplication {} diff --git a/settings.gradle b/settings.gradle index 0dbff23054..6531556fe5 100644 --- a/settings.gradle +++ b/settings.gradle @@ -15,4 +15,5 @@ rootProject.name = 'marquez' include 'api' +include 'search' include 'clients:java' From 2c2040cb594e9031f0e32bc1a5d63cd22d4ebd77 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Tue, 13 Aug 2024 12:40:40 +0200 Subject: [PATCH 70/87] feat: init search engine Signed-off-by: Yannick Libert --- search/build.gradle | 4 +- .../searchengine/SearchApplication.java | 31 +++++ .../marquez/searchengine/SearchConfig.java | 19 +++ .../SearchServiceApplication.java | 3 - .../resources/SearchResource.java | 57 +++++++++ .../searchengine/services/SearchService.java | 119 ++++++++++++++++++ 6 files changed, 229 insertions(+), 4 deletions(-) create mode 100644 search/src/main/java/marquez/searchengine/SearchApplication.java create mode 100644 search/src/main/java/marquez/searchengine/SearchConfig.java delete mode 100644 search/src/main/java/marquez/searchengine/SearchServiceApplication.java create mode 100644 search/src/main/java/marquez/searchengine/resources/SearchResource.java create mode 100644 search/src/main/java/marquez/searchengine/services/SearchService.java diff --git a/search/build.gradle b/search/build.gradle index ea51513dc5..98c9ce1161 100644 --- a/search/build.gradle +++ b/search/build.gradle @@ -3,6 +3,8 @@ ext { } dependencies { + implementation project(':api') + implementation "org.apache.lucene:lucene-core:${luceneVersion}" implementation "org.apache.lucene:lucene-queryparser:${luceneVersion}" implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" @@ -11,5 +13,5 @@ dependencies { } application { - mainClassName = 'marquez.searchengine.SearchServiceApplication' + mainClassName = 'marquez.searchengine.SearchApplication' } \ No newline at end of file diff --git a/search/src/main/java/marquez/searchengine/SearchApplication.java b/search/src/main/java/marquez/searchengine/SearchApplication.java new file mode 100644 index 0000000000..9e372307c5 --- /dev/null +++ b/search/src/main/java/marquez/searchengine/SearchApplication.java @@ -0,0 +1,31 @@ +package marquez.searchengine; + +import io.dropwizard.Application; +import io.dropwizard.setup.Bootstrap; +import io.dropwizard.setup.Environment; +import java.io.IOException; +import marquez.searchengine.resources.SearchResource; + +public class SearchApplication extends Application { + + public static void main(String[] args) throws Exception { + new SearchApplication().run(args); + } + + @Override + public String getName() { + return "search-service"; + } + + @Override + public void initialize(Bootstrap bootstrap) { + // Any bootstrap initialization goes here + } + + @Override + public void run(SearchConfig configuration, Environment environment) throws IOException { + // Register resources + final SearchResource searchResource = new SearchResource(configuration); + environment.jersey().register(searchResource); + } +} diff --git a/search/src/main/java/marquez/searchengine/SearchConfig.java b/search/src/main/java/marquez/searchengine/SearchConfig.java new file mode 100644 index 0000000000..643e148d26 --- /dev/null +++ b/search/src/main/java/marquez/searchengine/SearchConfig.java @@ -0,0 +1,19 @@ +package marquez.searchengine; + +import com.fasterxml.jackson.annotation.JsonProperty; +import io.dropwizard.Configuration; + +public class SearchConfig extends Configuration { + + @JsonProperty private boolean enabled = true; + + @JsonProperty private String indexDirectory = "./index"; + + public boolean isEnabled() { + return enabled; + } + + public String getIndexDirectory() { + return indexDirectory; + } +} diff --git a/search/src/main/java/marquez/searchengine/SearchServiceApplication.java b/search/src/main/java/marquez/searchengine/SearchServiceApplication.java deleted file mode 100644 index 76c73ad03e..0000000000 --- a/search/src/main/java/marquez/searchengine/SearchServiceApplication.java +++ /dev/null @@ -1,3 +0,0 @@ -package marquez.searchengine; - -public class SearchServiceApplication {} diff --git a/search/src/main/java/marquez/searchengine/resources/SearchResource.java b/search/src/main/java/marquez/searchengine/resources/SearchResource.java new file mode 100644 index 0000000000..84dc2bc1cf --- /dev/null +++ b/search/src/main/java/marquez/searchengine/resources/SearchResource.java @@ -0,0 +1,57 @@ +package marquez.searchengine.resources; + +import java.io.IOException; +import javax.ws.rs.Consumes; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import marquez.searchengine.SearchConfig; +import marquez.searchengine.services.SearchService; +import marquez.service.models.LineageEvent; + +@Path("/search") +@Produces(MediaType.APPLICATION_JSON) +public class SearchResource { + + private final SearchService searchService; + + public SearchResource(SearchConfig config) throws IOException { + this.searchService = new SearchService(config); + } + + @GET + @Path("/datasets") + public Response searchDatasets(@QueryParam("query") String query) { + try { + return Response.ok(searchService.searchDatasets(query)).build(); + } catch (Exception e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + } + } + + @GET + @Path("/jobs") + public Response searchJobs(@QueryParam("query") String query) { + try { + return Response.ok(searchService.searchJobs(query)).build(); + } catch (Exception e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + } + } + + @POST + @Path("/index") + @Consumes(MediaType.APPLICATION_JSON) + public Response indexEvent(LineageEvent event) { + try { + searchService.indexEvent(event); + return Response.ok().build(); + } catch (Exception e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + } + } +} diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java new file mode 100644 index 0000000000..520b4870d6 --- /dev/null +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -0,0 +1,119 @@ +package marquez.searchengine.services; + +import java.io.IOException; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import marquez.searchengine.SearchConfig; +import marquez.service.models.LineageEvent; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; + +public class SearchService { + + private final SearchConfig config; + private final Directory indexDirectory; + private final StandardAnalyzer analyzer; + + public SearchService(SearchConfig config) throws IOException { + this.config = config; + this.analyzer = new StandardAnalyzer(); + this.indexDirectory = FSDirectory.open(Paths.get(config.getIndexDirectory())); + } + + public void indexEvent(LineageEvent event) throws IOException { + try (IndexWriter writer = new IndexWriter(indexDirectory, new IndexWriterConfig(analyzer))) { + UUID runUuid = UUID.fromString(event.getRun().getRunId()); + if (event.getInputs() != null) { + for (LineageEvent.Dataset dataset : event.getInputs()) { + writer.addDocument(buildDatasetDocument(runUuid, dataset, event)); + } + } + if (event.getOutputs() != null) { + for (LineageEvent.Dataset dataset : event.getOutputs()) { + writer.addDocument(buildDatasetDocument(runUuid, dataset, event)); + } + } + writer.addDocument(buildJobDocument(runUuid, event)); + } + } + + private Document buildJobDocument(UUID runUuid, LineageEvent event) { + Document doc = new Document(); + doc.add(new StringField("run_id", runUuid.toString(), Field.Store.YES)); + doc.add(new TextField("name", event.getJob().getName(), Field.Store.YES)); + doc.add( + new TextField( + "type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH", Field.Store.YES)); + doc.add(new TextField("namespace", event.getJob().getNamespace(), Field.Store.YES)); + doc.add(new TextField("facets", event.getJob().getFacets().toString(), Field.Store.YES)); + return doc; + } + + private Document buildDatasetDocument( + UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { + Document doc = new Document(); + doc.add(new StringField("run_id", runUuid.toString(), Field.Store.YES)); + doc.add(new TextField("name", dataset.getName(), Field.Store.YES)); + doc.add(new TextField("namespace", dataset.getNamespace(), Field.Store.YES)); + doc.add(new TextField("facets", dataset.getFacets().toString(), Field.Store.YES)); + return doc; + } + + public SearchResult searchDatasets(String query) throws Exception { + return search(query, new String[] {"name", "namespace", "facets"}); + } + + public SearchResult searchJobs(String query) throws Exception { + return search(query, new String[] {"name", "namespace", "facets"}); + } + + private SearchResult search(String query, String[] fields) throws Exception { + try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { + IndexSearcher searcher = new IndexSearcher(reader); + MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); + Query q = parser.parse(query); + TopDocs topDocs = searcher.search(q, 10); + + SearchResult result = new SearchResult(); + for (ScoreDoc sd : topDocs.scoreDocs) { + Document doc = searcher.doc(sd.doc); + result.addDocument(doc); + } + return result; + } + } + + public static class SearchResult { + private final List> results = new ArrayList<>(); + + public void addDocument(Document doc) { + Map map = new HashMap<>(); + for (IndexableField field : doc.getFields()) { + map.put(field.name(), doc.get(field.name())); + } + results.add(map); + } + + public List> getResults() { + return results; + } + } +} From f6dd50b82f465b5df0f055c8b1864ff4825eb203 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Tue, 13 Aug 2024 15:05:02 +0200 Subject: [PATCH 71/87] feat: add healthcheck Signed-off-by: Yannick Libert --- search/build.gradle | 32 +++++++++++++++++-- .../searchengine/SearchApplication.java | 9 +++--- .../marquez/searchengine/SearchConfig.java | 6 ---- .../health/SearchHealthCheck.java | 11 +++++++ .../resources/SearchResource.java | 6 ++-- .../searchengine/services/SearchService.java | 16 +++++----- 6 files changed, 55 insertions(+), 25 deletions(-) create mode 100644 search/src/main/java/marquez/searchengine/health/SearchHealthCheck.java diff --git a/search/build.gradle b/search/build.gradle index 98c9ce1161..57a39676b4 100644 --- a/search/build.gradle +++ b/search/build.gradle @@ -1,10 +1,12 @@ +import com.github.jengelman.gradle.plugins.shadow.transformers.ServiceFileTransformer + ext { luceneVersion = '9.11.1' } dependencies { implementation project(':api') - + implementation "org.apache.lucene:lucene-core:${luceneVersion}" implementation "org.apache.lucene:lucene-queryparser:${luceneVersion}" implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" @@ -14,4 +16,30 @@ dependencies { application { mainClassName = 'marquez.searchengine.SearchApplication' -} \ No newline at end of file +} + +runShadow { + args = ['server'] +} + +shadowJar { + archiveClassifier.set('') + transform(ServiceFileTransformer) + from(projectDir) { + include 'LICENSE' + } + manifest { + attributes( + 'Created-By': "Gradle ${gradle.gradleVersion}", + 'Built-By': System.getProperty('user.name'), + 'Build-Jdk': System.getProperty('java.version'), + 'Implementation-Title': project.name, + 'Implementation-Version': project.version, + 'Main-Class': application.mainClass) + } +} + +tasks.distZip.dependsOn tasks.shadowJar +tasks.distTar.dependsOn tasks.shadowJar +tasks.startScripts.dependsOn tasks.shadowJar +tasks.shadowJar.dependsOn tasks.jar \ No newline at end of file diff --git a/search/src/main/java/marquez/searchengine/SearchApplication.java b/search/src/main/java/marquez/searchengine/SearchApplication.java index 9e372307c5..44fd220a34 100644 --- a/search/src/main/java/marquez/searchengine/SearchApplication.java +++ b/search/src/main/java/marquez/searchengine/SearchApplication.java @@ -5,6 +5,7 @@ import io.dropwizard.setup.Environment; import java.io.IOException; import marquez.searchengine.resources.SearchResource; +import marquez.searchengine.health.SearchHealthCheck;; public class SearchApplication extends Application { @@ -18,14 +19,12 @@ public String getName() { } @Override - public void initialize(Bootstrap bootstrap) { - // Any bootstrap initialization goes here - } + public void initialize(Bootstrap bootstrap) {} @Override public void run(SearchConfig configuration, Environment environment) throws IOException { - // Register resources - final SearchResource searchResource = new SearchResource(configuration); + final SearchResource searchResource = new SearchResource(); environment.jersey().register(searchResource); + environment.healthChecks().register("search-health-check", new SearchHealthCheck()); } } diff --git a/search/src/main/java/marquez/searchengine/SearchConfig.java b/search/src/main/java/marquez/searchengine/SearchConfig.java index 643e148d26..6c90406c53 100644 --- a/search/src/main/java/marquez/searchengine/SearchConfig.java +++ b/search/src/main/java/marquez/searchengine/SearchConfig.java @@ -7,13 +7,7 @@ public class SearchConfig extends Configuration { @JsonProperty private boolean enabled = true; - @JsonProperty private String indexDirectory = "./index"; - public boolean isEnabled() { return enabled; } - - public String getIndexDirectory() { - return indexDirectory; - } } diff --git a/search/src/main/java/marquez/searchengine/health/SearchHealthCheck.java b/search/src/main/java/marquez/searchengine/health/SearchHealthCheck.java new file mode 100644 index 0000000000..20d6d969bb --- /dev/null +++ b/search/src/main/java/marquez/searchengine/health/SearchHealthCheck.java @@ -0,0 +1,11 @@ +package marquez.searchengine.health; + +import com.codahale.metrics.health.HealthCheck; + +public class SearchHealthCheck extends HealthCheck { + + @Override + protected Result check() throws Exception { + return Result.healthy(); + } +} \ No newline at end of file diff --git a/search/src/main/java/marquez/searchengine/resources/SearchResource.java b/search/src/main/java/marquez/searchengine/resources/SearchResource.java index 84dc2bc1cf..5840fc9dc7 100644 --- a/search/src/main/java/marquez/searchengine/resources/SearchResource.java +++ b/search/src/main/java/marquez/searchengine/resources/SearchResource.java @@ -1,6 +1,5 @@ package marquez.searchengine.resources; -import java.io.IOException; import javax.ws.rs.Consumes; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -9,7 +8,6 @@ import javax.ws.rs.QueryParam; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; -import marquez.searchengine.SearchConfig; import marquez.searchengine.services.SearchService; import marquez.service.models.LineageEvent; @@ -19,8 +17,8 @@ public class SearchResource { private final SearchService searchService; - public SearchResource(SearchConfig config) throws IOException { - this.searchService = new SearchService(config); + public SearchResource() { + this.searchService = new SearchService(); } @GET diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 520b4870d6..7ffa4f632c 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -1,13 +1,11 @@ package marquez.searchengine.services; import java.io.IOException; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; -import marquez.searchengine.SearchConfig; import marquez.service.models.LineageEvent; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -18,24 +16,24 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.StoredFields; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; public class SearchService { - private final SearchConfig config; private final Directory indexDirectory; private final StandardAnalyzer analyzer; - public SearchService(SearchConfig config) throws IOException { - this.config = config; + public SearchService() { + // Use ByteBuffersDirectory for in-memory indexing + this.indexDirectory = new ByteBuffersDirectory(); this.analyzer = new StandardAnalyzer(); - this.indexDirectory = FSDirectory.open(Paths.get(config.getIndexDirectory())); } public void indexEvent(LineageEvent event) throws IOException { @@ -93,8 +91,10 @@ private SearchResult search(String query, String[] fields) throws Exception { TopDocs topDocs = searcher.search(q, 10); SearchResult result = new SearchResult(); + StoredFields storedFields = searcher.storedFields(); + for (ScoreDoc sd : topDocs.scoreDocs) { - Document doc = searcher.doc(sd.doc); + Document doc = storedFields.document(sd.doc); result.addDocument(doc); } return result; From b4f6930964705afe692d9cc163235b4420fa1f0c Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Wed, 14 Aug 2024 00:18:28 +0200 Subject: [PATCH 72/87] feat: replicate Opensearch API behavior Signed-off-by: Yannick Libert --- search/build.gradle | 3 +- search/search.yml | 7 + .../searchengine/SearchApplication.java | 2 +- .../health/SearchHealthCheck.java | 10 +- .../searchengine/resources/SearchRequest.java | 92 ++++ .../resources/SearchResource.java | 114 +++-- .../searchengine/services/SearchService.java | 433 ++++++++++++++---- 7 files changed, 538 insertions(+), 123 deletions(-) create mode 100644 search/search.yml create mode 100644 search/src/main/java/marquez/searchengine/resources/SearchRequest.java diff --git a/search/build.gradle b/search/build.gradle index 57a39676b4..f0b65d5c2a 100644 --- a/search/build.gradle +++ b/search/build.gradle @@ -10,6 +10,7 @@ dependencies { implementation "org.apache.lucene:lucene-core:${luceneVersion}" implementation "org.apache.lucene:lucene-queryparser:${luceneVersion}" implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" + implementation "org.apache.lucene:lucene-highlighter:${luceneVersion}" implementation "io.dropwizard:dropwizard-core:${dropwizardVersion}" } @@ -19,7 +20,7 @@ application { } runShadow { - args = ['server'] + args = ['server', 'search.yml'] } shadowJar { diff --git a/search/search.yml b/search/search.yml new file mode 100644 index 0000000000..5eb6ed52d9 --- /dev/null +++ b/search/search.yml @@ -0,0 +1,7 @@ +server: + applicationConnectors: + - type: http + port: 9000 + adminConnectors: + - type: http + port: 9001 \ No newline at end of file diff --git a/search/src/main/java/marquez/searchengine/SearchApplication.java b/search/src/main/java/marquez/searchengine/SearchApplication.java index 44fd220a34..055132cdc5 100644 --- a/search/src/main/java/marquez/searchengine/SearchApplication.java +++ b/search/src/main/java/marquez/searchengine/SearchApplication.java @@ -4,8 +4,8 @@ import io.dropwizard.setup.Bootstrap; import io.dropwizard.setup.Environment; import java.io.IOException; +import marquez.searchengine.health.SearchHealthCheck; import marquez.searchengine.resources.SearchResource; -import marquez.searchengine.health.SearchHealthCheck;; public class SearchApplication extends Application { diff --git a/search/src/main/java/marquez/searchengine/health/SearchHealthCheck.java b/search/src/main/java/marquez/searchengine/health/SearchHealthCheck.java index 20d6d969bb..02b70f91f0 100644 --- a/search/src/main/java/marquez/searchengine/health/SearchHealthCheck.java +++ b/search/src/main/java/marquez/searchengine/health/SearchHealthCheck.java @@ -4,8 +4,8 @@ public class SearchHealthCheck extends HealthCheck { - @Override - protected Result check() throws Exception { - return Result.healthy(); - } -} \ No newline at end of file + @Override + protected Result check() throws Exception { + return Result.healthy(); + } +} diff --git a/search/src/main/java/marquez/searchengine/resources/SearchRequest.java b/search/src/main/java/marquez/searchengine/resources/SearchRequest.java new file mode 100644 index 0000000000..4c5c8eaa08 --- /dev/null +++ b/search/src/main/java/marquez/searchengine/resources/SearchRequest.java @@ -0,0 +1,92 @@ +package marquez.searchengine.resources; + +import java.util.Map; +import java.util.List; + +public class SearchRequest { + private Highlight highlight; + private Query query; + + public static class Highlight { + private Map> fields; + + // Getters and setters + public Map> getFields() { + return fields; + } + + public void setFields(Map> fields) { + this.fields = fields; + } + } + + public static class Query { + private MultiMatch multi_match; + + public static class MultiMatch { + private List fields; + private String operator; + private String query; + private String type; + + // Getters and setters + public List getFields() { + return fields; + } + + public void setFields(List fields) { + this.fields = fields; + } + + public String getOperator() { + return operator; + } + + public void setOperator(String operator) { + this.operator = operator; + } + + public String getQuery() { + return query; + } + + public void setQuery(String query) { + this.query = query; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + } + + // Getters and setters + public MultiMatch getMulti_match() { + return multi_match; + } + + public void setMulti_match(MultiMatch multi_match) { + this.multi_match = multi_match; + } + } + + // Getters and setters for SearchRequest + public Highlight getHighlight() { + return highlight; + } + + public void setHighlight(Highlight highlight) { + this.highlight = highlight; + } + + public Query getQuery() { + return query; + } + + public void setQuery(Query query) { + this.query = query; + } +} diff --git a/search/src/main/java/marquez/searchengine/resources/SearchResource.java b/search/src/main/java/marquez/searchengine/resources/SearchResource.java index 5840fc9dc7..9215c276b3 100644 --- a/search/src/main/java/marquez/searchengine/resources/SearchResource.java +++ b/search/src/main/java/marquez/searchengine/resources/SearchResource.java @@ -1,55 +1,103 @@ package marquez.searchengine.resources; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.List; +import java.util.stream.Collectors; + +import javax.servlet.http.HttpServletRequest; import javax.ws.rs.Consumes; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; +import com.fasterxml.jackson.databind.ObjectMapper; + import marquez.searchengine.services.SearchService; +import marquez.searchengine.resources.SearchRequest; import marquez.service.models.LineageEvent; -@Path("/search") +@Path("/") @Produces(MediaType.APPLICATION_JSON) public class SearchResource { - private final SearchService searchService; + private final SearchService searchService; + private final ObjectMapper objectMapper = new ObjectMapper(); - public SearchResource() { - this.searchService = new SearchService(); - } + public SearchResource() { + this.searchService = new SearchService(); + } - @GET - @Path("/datasets") - public Response searchDatasets(@QueryParam("query") String query) { - try { - return Response.ok(searchService.searchDatasets(query)).build(); - } catch (Exception e) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + @POST + @Path("/jobs/_search") + @Consumes(MediaType.APPLICATION_JSON) + public Response searchJobs(SearchRequest request) { + try { + // Extract the actual query and other details from the request object + String query = request.getQuery().getMulti_match().getQuery(); + List fields = request.getQuery().getMulti_match().getFields(); + String type = request.getQuery().getMulti_match().getType(); + String operator = request.getQuery().getMulti_match().getOperator(); + + // Log the extracted details for debugging + System.out.println("Received query: " + query); + System.out.println("Fields: " + fields); + System.out.println("Type: " + type); + System.out.println("Operator: " + operator); + + // Perform the search using the extracted query and fields + SearchService.SearchResult result = searchService.searchJobs(query, fields); + String jsonResponse = new ObjectMapper().writeValueAsString(result); + System.out.println("Serialized Response: " + jsonResponse); + + return Response.ok(result).build(); + } catch (Exception e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + } } - } - - @GET - @Path("/jobs") - public Response searchJobs(@QueryParam("query") String query) { - try { - return Response.ok(searchService.searchJobs(query)).build(); - } catch (Exception e) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + + @POST + @Path("/datasets/_search") + @Consumes(MediaType.APPLICATION_JSON) + public Response searchDatasets(SearchRequest request) { + try { + // Extract the actual query and other details from the request object + String query = request.getQuery().getMulti_match().getQuery(); + List fields = request.getQuery().getMulti_match().getFields(); + String type = request.getQuery().getMulti_match().getType(); + String operator = request.getQuery().getMulti_match().getOperator(); + + // Log the extracted details for debugging + System.out.println("Received query: " + query); + System.out.println("Fields: " + fields); + System.out.println("Type: " + type); + System.out.println("Operator: " + operator); + + // Perform the search using the extracted query and fields + SearchService.SearchResult result = searchService.searchDatasets(query, fields); + String jsonResponse = new ObjectMapper().writeValueAsString(result); + System.out.println("Serialized Response: " + jsonResponse); + + return Response.ok(result).build(); + } catch (Exception e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + } } - } - - @POST - @Path("/index") - @Consumes(MediaType.APPLICATION_JSON) - public Response indexEvent(LineageEvent event) { - try { - searchService.indexEvent(event); - return Response.ok().build(); - } catch (Exception e) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + + // Indexing endpoint remains the same + @POST + @Path("/index") + @Consumes(MediaType.APPLICATION_JSON) + public Response indexEvent(LineageEvent event) { + try { + searchService.indexEvent(event); + return Response.ok().build(); + } catch (Exception e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + } } - } } diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 7ffa4f632c..9c811e6359 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -13,107 +13,374 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexNotFoundException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StoredFields; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; +import com.fasterxml.jackson.annotation.JsonProperty; + public class SearchService { - private final Directory indexDirectory; - private final StandardAnalyzer analyzer; - - public SearchService() { - // Use ByteBuffersDirectory for in-memory indexing - this.indexDirectory = new ByteBuffersDirectory(); - this.analyzer = new StandardAnalyzer(); - } - - public void indexEvent(LineageEvent event) throws IOException { - try (IndexWriter writer = new IndexWriter(indexDirectory, new IndexWriterConfig(analyzer))) { - UUID runUuid = UUID.fromString(event.getRun().getRunId()); - if (event.getInputs() != null) { - for (LineageEvent.Dataset dataset : event.getInputs()) { - writer.addDocument(buildDatasetDocument(runUuid, dataset, event)); - } - } - if (event.getOutputs() != null) { - for (LineageEvent.Dataset dataset : event.getOutputs()) { - writer.addDocument(buildDatasetDocument(runUuid, dataset, event)); - } - } - writer.addDocument(buildJobDocument(runUuid, event)); + private final Directory indexDirectory; + private final StandardAnalyzer analyzer; + private static final int MAX_RESULTS = 10; + + public SearchService() { + this.indexDirectory = new ByteBuffersDirectory(); + this.analyzer = new StandardAnalyzer(); } - } - - private Document buildJobDocument(UUID runUuid, LineageEvent event) { - Document doc = new Document(); - doc.add(new StringField("run_id", runUuid.toString(), Field.Store.YES)); - doc.add(new TextField("name", event.getJob().getName(), Field.Store.YES)); - doc.add( - new TextField( - "type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH", Field.Store.YES)); - doc.add(new TextField("namespace", event.getJob().getNamespace(), Field.Store.YES)); - doc.add(new TextField("facets", event.getJob().getFacets().toString(), Field.Store.YES)); - return doc; - } - - private Document buildDatasetDocument( - UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { - Document doc = new Document(); - doc.add(new StringField("run_id", runUuid.toString(), Field.Store.YES)); - doc.add(new TextField("name", dataset.getName(), Field.Store.YES)); - doc.add(new TextField("namespace", dataset.getNamespace(), Field.Store.YES)); - doc.add(new TextField("facets", dataset.getFacets().toString(), Field.Store.YES)); - return doc; - } - - public SearchResult searchDatasets(String query) throws Exception { - return search(query, new String[] {"name", "namespace", "facets"}); - } - - public SearchResult searchJobs(String query) throws Exception { - return search(query, new String[] {"name", "namespace", "facets"}); - } - - private SearchResult search(String query, String[] fields) throws Exception { - try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { - IndexSearcher searcher = new IndexSearcher(reader); - MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); - Query q = parser.parse(query); - TopDocs topDocs = searcher.search(q, 10); - - SearchResult result = new SearchResult(); - StoredFields storedFields = searcher.storedFields(); - - for (ScoreDoc sd : topDocs.scoreDocs) { - Document doc = storedFields.document(sd.doc); - result.addDocument(doc); - } - return result; + + public void indexEvent(LineageEvent event) throws IOException { + try (IndexWriter writer = new IndexWriter(indexDirectory, new IndexWriterConfig(analyzer))) { + UUID runUuid = UUID.fromString(event.getRun().getRunId()); + if (event.getInputs() != null) { + for (LineageEvent.Dataset dataset : event.getInputs()) { + writer.addDocument(buildDatasetDocument(runUuid, dataset, event)); + } + } + if (event.getOutputs() != null) { + for (LineageEvent.Dataset dataset : event.getOutputs()) { + writer.addDocument(buildDatasetDocument(runUuid, dataset, event)); + } + } + writer.addDocument(buildJobDocument(runUuid, event)); + writer.commit(); + } } - } - public static class SearchResult { - private final List> results = new ArrayList<>(); + private Document buildJobDocument(UUID runUuid, LineageEvent event) { + Document doc = new Document(); + doc.add(new StringField("run_id", runUuid.toString(), Field.Store.YES)); + doc.add(new TextField("name", event.getJob().getName(), Field.Store.YES)); + doc.add(new TextField("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH", Field.Store.YES)); + doc.add(new TextField("namespace", event.getJob().getNamespace(), Field.Store.YES)); + doc.add(new TextField("facets", event.getJob().getFacets().toString(), Field.Store.YES)); + return doc; + } + + private Document buildDatasetDocument(UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { + Document doc = new Document(); + doc.add(new StringField("run_id", runUuid.toString(), Field.Store.YES)); + doc.add(new TextField("name", dataset.getName(), Field.Store.YES)); + doc.add(new TextField("namespace", dataset.getNamespace(), Field.Store.YES)); + doc.add(new TextField("facets", dataset.getFacets().toString(), Field.Store.YES)); + return doc; + } - public void addDocument(Document doc) { - Map map = new HashMap<>(); - for (IndexableField field : doc.getFields()) { - map.put(field.name(), doc.get(field.name())); - } - results.add(map); + private boolean isIndexEmpty() throws IOException { + try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { + return reader.numDocs() == 0; + } catch (IndexNotFoundException e) { + return true; + } + } + + public SearchResult searchDatasets(String query, List fields) throws Exception { + return search(query, fields); + } + + public SearchResult searchJobs(String query, List fields) throws Exception { + return search(query, fields); + } + + private SearchResult search(String query, List fields) throws Exception { + long startTime = System.currentTimeMillis(); + + if (isIndexEmpty()) { + return createEmptySearchResult(startTime); + } + + try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { + IndexSearcher searcher = new IndexSearcher(reader); + MultiFieldQueryParser parser = new MultiFieldQueryParser(fields.toArray(new String[0]), analyzer); + Query q = parser.parse(query); + + TopDocs topDocs = searcher.search(q, MAX_RESULTS); + long took = System.currentTimeMillis() - startTime; + + SearchResult result = new SearchResult(); + result.setTook(took); + result.getHitsMetadata().getTotalHits().setValue(topDocs.totalHits.value); + //result.setMaxScore(topDocs.getMaxScore()); + + StoredFields storedFields = searcher.storedFields(); + SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); + Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(q)); + + for (ScoreDoc sd : topDocs.scoreDocs) { + Document doc = storedFields.document(sd.doc); + Map highlightedDoc = new HashMap<>(); + + for (String field : fields) { + String text = doc.get(field); + if (text != null) { + String highlightedText = highlighter.getBestFragment(analyzer, field, text); + highlightedDoc.put(field, highlightedText != null ? highlightedText : text); + } else { + highlightedDoc.put(field, doc.get(field)); + } + } + + result.addDocument(highlightedDoc); + } + + return result; + } } - public List> getResults() { - return results; + private SearchResult createEmptySearchResult(long startTime) { + long took = System.currentTimeMillis() - startTime; + + SearchResult result = new SearchResult(); + result.setTook(took); + result.getHitsMetadata().getTotalHits().setValue(0); + //result.setMaxScore(0.0f); + result.setTimedOut(false); + + return result; + } + + + public static class SearchResult { + @JsonProperty("took") + private long took; + + @JsonProperty("timed_out") + private boolean timedOut = false; + + @JsonProperty("_shards") + private ShardStatistics shards; + + @JsonProperty("hits") + private HitsMetadata hitsMetadata; + + @JsonProperty("num_reduce_phases") + private long numberOfReducePhases; + + @JsonProperty("terminated_early") + private boolean terminatedEarly; + + @JsonProperty("suggest") + private Map suggest = new HashMap<>(); // Initialize as empty map + + // Constructor + public SearchResult() { + this.shards = new ShardStatistics(1, 1, 0, 0); // Assuming a single shard with no failures + this.hitsMetadata = new HitsMetadata(); + this.numberOfReducePhases = 0; // Default value + this.terminatedEarly = false; // Default value + this.suggest = new HashMap<>(); // Empty suggestion map + } + + // Add document to hits + public void addDocument(Map doc) { + Map hit = new HashMap<>(); + hit.put("_source", doc); + hitsMetadata.addHit(hit); + } + + // Getters and Setters for all fields + public long getTook() { + return took; + } + + public void setTook(long took) { + this.took = took; + } + + public boolean isTimedOut() { + return timedOut; + } + + public void setTimedOut(boolean timedOut) { + this.timedOut = timedOut; + } + + public ShardStatistics getShards() { + return shards; + } + + public void setShards(ShardStatistics shards) { + this.shards = shards; + } + + public HitsMetadata getHitsMetadata() { + return hitsMetadata; + } + + public void setHitsMetadata(HitsMetadata hitsMetadata) { + this.hitsMetadata = hitsMetadata; + } + + public long getNumberOfReducePhases() { + return numberOfReducePhases; + } + + public void setNumberOfReducePhases(long numberOfReducePhases) { + this.numberOfReducePhases = numberOfReducePhases; + } + + public boolean isTerminatedEarly() { + return terminatedEarly; + } + + public void setTerminatedEarly(boolean terminatedEarly) { + this.terminatedEarly = terminatedEarly; + } + + public Map getSuggest() { + return suggest; + } + + public void setSuggest(Map suggest) { + this.suggest = suggest; + } + + // ShardStatistics inner class + public static class ShardStatistics { + @JsonProperty("total") + private int total; + + @JsonProperty("successful") + private int successful; + + @JsonProperty("skipped") + private int skipped; + + @JsonProperty("failed") + private int failed; + + // Constructor + public ShardStatistics(int total, int successful, int skipped, int failed) { + this.total = total; + this.successful = successful; + this.skipped = skipped; + this.failed = failed; + } + + // Getters and Setters + public int getTotal() { + return total; + } + + public void setTotal(int total) { + this.total = total; + } + + public int getSuccessful() { + return successful; + } + + public void setSuccessful(int successful) { + this.successful = successful; + } + + public int getSkipped() { + return skipped; + } + + public void setSkipped(int skipped) { + this.skipped = skipped; + } + + public int getFailed() { + return failed; + } + + public void setFailed(int failed) { + this.failed = failed; + } + } + + // HitsMetadata inner class + public static class HitsMetadata { + @JsonProperty("total") + private TotalHits totalHits; + + @JsonProperty("max_score") + private Float maxScore; + + @JsonProperty("hits") + private List> hits; + + public HitsMetadata() { + this.totalHits = new TotalHits(0, "eq"); + this.maxScore = null; + this.hits = new ArrayList<>(); + } + + // Getters and Setters + public TotalHits getTotalHits() { + return totalHits; + } + + public void setTotalHits(TotalHits totalHits) { + this.totalHits = totalHits; + } + + public Float getMaxScore() { + return maxScore; + } + + public void setMaxScore(Float maxScore) { + this.maxScore = maxScore; + } + + public List> getHits() { + return hits; + } + + public void setHits(List> hits) { + this.hits = hits; + } + + // Add a hit to the hits list + public void addHit(Map hit) { + this.hits.add(hit); + } + } + + // TotalHits inner class + public static class TotalHits { + @JsonProperty("value") + private long value; + + @JsonProperty("relation") + private String relation; + + public TotalHits(long value, String relation) { + this.value = value; + this.relation = relation; + } + + // Getters and Setters + public long getValue() { + return value; + } + + public void setValue(long value) { + this.value = value; + } + + public String getRelation() { + return relation; + } + + public void setRelation(String relation) { + this.relation = relation; + } + } } - } + } From ecf172ae3bd8cbc3163cf50d542deffff64c44d0 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Thu, 22 Aug 2024 08:31:20 +0200 Subject: [PATCH 73/87] feat: properly index and search datasets and jobs Signed-off-by: Yannick Libert --- .../marquez/api/v2beta/SearchResource.java | 4 +- .../java/marquez/service/SearchService.java | 2 +- search/build.gradle | 1 + .../searchengine/models/IndexResponse.java | 98 ++++++ .../searchengine/models/SearchResult.java | 244 +++++++++++++ .../resources/SearchResource.java | 96 ++--- .../searchengine/services/SearchService.java | 329 ++++-------------- 7 files changed, 456 insertions(+), 318 deletions(-) create mode 100644 search/src/main/java/marquez/searchengine/models/IndexResponse.java create mode 100644 search/src/main/java/marquez/searchengine/models/SearchResult.java diff --git a/api/src/main/java/marquez/api/v2beta/SearchResource.java b/api/src/main/java/marquez/api/v2beta/SearchResource.java index 527a50f7e5..48e7e6d40f 100644 --- a/api/src/main/java/marquez/api/v2beta/SearchResource.java +++ b/api/src/main/java/marquez/api/v2beta/SearchResource.java @@ -48,7 +48,7 @@ public SearchResource(@NonNull final ServiceFactory serviceFactory) { @Produces(APPLICATION_JSON) @Path("jobs") public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOException { - if (searchService.isEnabled()) { + if (!searchService.isEnabled()) { return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); } return formatOpenSearchResponse(this.searchService.searchJobs(query)); @@ -61,7 +61,7 @@ public Response searchJobs(@QueryParam("q") @NotBlank String query) throws IOExc @Produces(APPLICATION_JSON) @Path("datasets") public Response searchDatasets(@QueryParam("q") @NotBlank String query) throws IOException { - if (searchService.isEnabled()) { + if (!searchService.isEnabled()) { return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); } return formatOpenSearchResponse(this.searchService.searchDatasets(query)); diff --git a/api/src/main/java/marquez/service/SearchService.java b/api/src/main/java/marquez/service/SearchService.java index ab7173c570..1684e54d37 100644 --- a/api/src/main/java/marquez/service/SearchService.java +++ b/api/src/main/java/marquez/service/SearchService.java @@ -249,6 +249,6 @@ private void index(IndexRequest> request) { } public boolean isEnabled() { - return !searchConfig.isEnabled(); + return searchConfig.isEnabled(); } } diff --git a/search/build.gradle b/search/build.gradle index f0b65d5c2a..4b3e08db51 100644 --- a/search/build.gradle +++ b/search/build.gradle @@ -11,6 +11,7 @@ dependencies { implementation "org.apache.lucene:lucene-queryparser:${luceneVersion}" implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" implementation "org.apache.lucene:lucene-highlighter:${luceneVersion}" + implementation 'org.opensearch.client:opensearch-rest-high-level-client:2.16.0' implementation "io.dropwizard:dropwizard-core:${dropwizardVersion}" } diff --git a/search/src/main/java/marquez/searchengine/models/IndexResponse.java b/search/src/main/java/marquez/searchengine/models/IndexResponse.java new file mode 100644 index 0000000000..56ce1fa5b0 --- /dev/null +++ b/search/src/main/java/marquez/searchengine/models/IndexResponse.java @@ -0,0 +1,98 @@ +package marquez.searchengine.models; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class IndexResponse { + + @JsonProperty("_index") + private final String index; + + @JsonProperty("_id") + private final String id; + + @JsonProperty("_version") + private final long version; + + @JsonProperty("result") + private final String result; + + @JsonProperty("_shards") + private final ShardInfo shardInfo; + + @JsonProperty("_seq_no") + private final long seqNo; + + @JsonProperty("_primary_term") + private final long primaryTerm; + + // Constructor to initialize all final fields + public IndexResponse(String index, String id, long version, String result, ShardInfo shardInfo, long seqNo, long primaryTerm) { + this.index = index; + this.id = id; + this.version = version; + this.result = result; + this.shardInfo = shardInfo; + this.seqNo = seqNo; + this.primaryTerm = primaryTerm; + } + + // Getters + public String getIndex() { + return index; + } + + public String getId() { + return id; + } + + public long getVersion() { + return version; + } + + public String getResult() { + return result; + } + + public ShardInfo getShardInfo() { + return shardInfo; + } + + public long getSeqNo() { + return seqNo; + } + + public long getPrimaryTerm() { + return primaryTerm; + } + + // ShardInfo inner class + public static class ShardInfo { + @JsonProperty("total") + private final int total; + + @JsonProperty("successful") + private final int successful; + + @JsonProperty("failed") + private final int failed; + + public ShardInfo(int total, int successful, int failed) { + this.total = total; + this.successful = successful; + this.failed = failed; + } + + // Getters for ShardInfo + public int getTotal() { + return total; + } + + public int getSuccessful() { + return successful; + } + + public int getFailed() { + return failed; + } + } +} diff --git a/search/src/main/java/marquez/searchengine/models/SearchResult.java b/search/src/main/java/marquez/searchengine/models/SearchResult.java new file mode 100644 index 0000000000..2d09eb613f --- /dev/null +++ b/search/src/main/java/marquez/searchengine/models/SearchResult.java @@ -0,0 +1,244 @@ +package marquez.searchengine.models; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +public class SearchResult { + @JsonProperty("took") + private long took; + + @JsonProperty("timed_out") + private boolean timedOut = false; + + @JsonProperty("_shards") + private ShardStatistics shards; + + @JsonProperty("hits") + private HitsMetadata hitsMetadata; + + @JsonProperty("num_reduce_phases") + private long numberOfReducePhases; + + @JsonProperty("terminated_early") + private boolean terminatedEarly; + + @JsonProperty("suggest") + private Map suggest = new HashMap<>(); // Initialize as empty map + + // Constructor + public SearchResult() { + this.shards = new ShardStatistics(1, 1, 0, 0); // Assuming a single shard with no failures + this.hitsMetadata = new HitsMetadata(); + this.numberOfReducePhases = 0; // Default value + this.terminatedEarly = false; // Default value + this.suggest = new HashMap<>(); // Empty suggestion map + } + + // Add document to hits + public void addDocument(String index, Map doc) { + Map hit = new HashMap<>(); + hit.put("_index", index); // Include the index name in the hit + hit.put("_source", doc); + hitsMetadata.addHit(index, hit); + } + + // Getters and Setters for all fields + public long getTook() { + return took; + } + + public void setTook(long took) { + this.took = took; + } + + public boolean isTimedOut() { + return timedOut; + } + + public void setTimedOut(boolean timedOut) { + this.timedOut = timedOut; + } + + public ShardStatistics getShards() { + return shards; + } + + public void setShards(ShardStatistics shards) { + this.shards = shards; + } + + public HitsMetadata getHitsMetadata() { + return hitsMetadata; + } + + public void setHitsMetadata(HitsMetadata hitsMetadata) { + this.hitsMetadata = hitsMetadata; + } + + public long getNumberOfReducePhases() { + return numberOfReducePhases; + } + + public void setNumberOfReducePhases(long numberOfReducePhases) { + this.numberOfReducePhases = numberOfReducePhases; + } + + public boolean isTerminatedEarly() { + return terminatedEarly; + } + + public void setTerminatedEarly(boolean terminatedEarly) { + this.terminatedEarly = terminatedEarly; + } + + public Map getSuggest() { + return suggest; + } + + public void setSuggest(Map suggest) { + this.suggest = suggest; + } + + // ShardStatistics inner class + public static class ShardStatistics { + @JsonProperty("total") + private int total; + + @JsonProperty("successful") + private int successful; + + @JsonProperty("skipped") + private int skipped; + + @JsonProperty("failed") + private int failed; + + // Constructor + public ShardStatistics(int total, int successful, int skipped, int failed) { + this.total = total; + this.successful = successful; + this.skipped = skipped; + this.failed = failed; + } + + // Getters and Setters + public int getTotal() { + return total; + } + + public void setTotal(int total) { + this.total = total; + } + + public int getSuccessful() { + return successful; + } + + public void setSuccessful(int successful) { + this.successful = successful; + } + + public int getSkipped() { + return skipped; + } + + public void setSkipped(int skipped) { + this.skipped = skipped; + } + + public int getFailed() { + return failed; + } + + public void setFailed(int failed) { + this.failed = failed; + } + } + + // HitsMetadata inner class + public static class HitsMetadata { + @JsonProperty("total") + private TotalHits totalHits; + + @JsonProperty("max_score") + private Float maxScore; + + @JsonProperty("hits") + private List> hits; + + public HitsMetadata() { + this.totalHits = new TotalHits(0, "eq"); + this.maxScore = null; + this.hits = new ArrayList<>(); + } + + // Getters and Setters + public TotalHits getTotalHits() { + return totalHits; + } + + public void setTotalHits(TotalHits totalHits) { + this.totalHits = totalHits; + } + + public Float getMaxScore() { + return maxScore; + } + + public void setMaxScore(Float maxScore) { + this.maxScore = maxScore; + } + + public List> getHits() { + return hits; + } + + public void setHits(List> hits) { + this.hits = hits; + } + + // Add a hit to the hits list + public void addHit(String index, Map doc) { + Map hit = new HashMap<>(); + hit.put("_index", index); + hit.putAll(doc); + hit.put("_id", "id"); + this.hits.add(hit); + } + } + + // TotalHits inner class + public static class TotalHits { + @JsonProperty("value") + private long value; + + @JsonProperty("relation") + private String relation; + + public TotalHits(long value, String relation) { + this.value = value; + this.relation = relation; + } + + // Getters and Setters + public long getValue() { + return value; + } + + public void setValue(long value) { + this.value = value; + } + + public String getRelation() { + return relation; + } + + public void setRelation(String relation) { + this.relation = relation; + } + } +} diff --git a/search/src/main/java/marquez/searchengine/resources/SearchResource.java b/search/src/main/java/marquez/searchengine/resources/SearchResource.java index 9215c276b3..b043c04d0d 100644 --- a/search/src/main/java/marquez/searchengine/resources/SearchResource.java +++ b/search/src/main/java/marquez/searchengine/resources/SearchResource.java @@ -1,32 +1,29 @@ package marquez.searchengine.resources; -import java.io.BufferedReader; -import java.io.InputStreamReader; +import java.io.IOException; import java.util.List; -import java.util.stream.Collectors; +import java.util.Map; -import javax.servlet.http.HttpServletRequest; import javax.ws.rs.Consumes; import javax.ws.rs.GET; import javax.ws.rs.POST; +import javax.ws.rs.PUT; import javax.ws.rs.Path; +import javax.ws.rs.PathParam; import javax.ws.rs.Produces; -import javax.ws.rs.QueryParam; -import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; -import com.fasterxml.jackson.databind.ObjectMapper; +//import com.fasterxml.jackson.databind.ObjectMapper; import marquez.searchengine.services.SearchService; -import marquez.searchengine.resources.SearchRequest; -import marquez.service.models.LineageEvent; +import marquez.searchengine.models.IndexResponse; +import marquez.searchengine.models.SearchResult; @Path("/") @Produces(MediaType.APPLICATION_JSON) public class SearchResource { private final SearchService searchService; - private final ObjectMapper objectMapper = new ObjectMapper(); public SearchResource() { this.searchService = new SearchService(); @@ -37,23 +34,13 @@ public SearchResource() { @Consumes(MediaType.APPLICATION_JSON) public Response searchJobs(SearchRequest request) { try { - // Extract the actual query and other details from the request object String query = request.getQuery().getMulti_match().getQuery(); List fields = request.getQuery().getMulti_match().getFields(); - String type = request.getQuery().getMulti_match().getType(); - String operator = request.getQuery().getMulti_match().getOperator(); - // Log the extracted details for debugging - System.out.println("Received query: " + query); - System.out.println("Fields: " + fields); - System.out.println("Type: " + type); - System.out.println("Operator: " + operator); - - // Perform the search using the extracted query and fields - SearchService.SearchResult result = searchService.searchJobs(query, fields); - String jsonResponse = new ObjectMapper().writeValueAsString(result); - System.out.println("Serialized Response: " + jsonResponse); - + //System.out.println("Received query: " + query); + SearchResult result = searchService.searchJobs(query, fields); + //String jsonResponse = new ObjectMapper().writeValueAsString(result); + //System.out.println("Serialized Response: " + jsonResponse); return Response.ok(result).build(); } catch (Exception e) { return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); @@ -65,39 +52,54 @@ public Response searchJobs(SearchRequest request) { @Consumes(MediaType.APPLICATION_JSON) public Response searchDatasets(SearchRequest request) { try { - // Extract the actual query and other details from the request object String query = request.getQuery().getMulti_match().getQuery(); List fields = request.getQuery().getMulti_match().getFields(); - String type = request.getQuery().getMulti_match().getType(); - String operator = request.getQuery().getMulti_match().getOperator(); - - // Log the extracted details for debugging - System.out.println("Received query: " + query); - System.out.println("Fields: " + fields); - System.out.println("Type: " + type); - System.out.println("Operator: " + operator); - - // Perform the search using the extracted query and fields - SearchService.SearchResult result = searchService.searchDatasets(query, fields); - String jsonResponse = new ObjectMapper().writeValueAsString(result); - System.out.println("Serialized Response: " + jsonResponse); - + // // Log the extracted details for debugging + //System.out.println("Received query: " + query); + SearchResult result = searchService.searchDatasets(query, fields); + //String jsonResponse = new ObjectMapper().writeValueAsString(result); + //System.out.println("Serialized Response: " + jsonResponse); return Response.ok(result).build(); } catch (Exception e) { return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); } } - // Indexing endpoint remains the same - @POST - @Path("/index") + @PUT + @Path("/jobs/_doc/{id}") @Consumes(MediaType.APPLICATION_JSON) - public Response indexEvent(LineageEvent event) { + public Response indexJob(@PathParam("id") String id, Map document) { try { - searchService.indexEvent(event); - return Response.ok().build(); - } catch (Exception e) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); + IndexResponse indexResponse = searchService.indexJobDocument(document); + return Response.ok(indexResponse).build(); + } catch (IOException e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR) + .entity("Failed to index job document: " + e.getMessage()) + .build(); + } + } + + @PUT + @Path("/datasets/_doc/{id}") + @Consumes(MediaType.APPLICATION_JSON) + public Response indexDataset(@PathParam("id") String id, Map document) { + try { + IndexResponse indexResponse = searchService.indexDatasetDocument(document); + return Response.ok(indexResponse).build(); + } catch (IOException e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR) + .entity("Failed to index dataset document: " + e.getMessage()) + .build(); + } + } + @GET + @Path("/ping") + public Response ping() { + boolean isHealthy = true; + if (isHealthy) { + return Response.ok().entity("{\"status\":\"true\"}").build(); + } else { + return Response.status(Response.Status.SERVICE_UNAVAILABLE).entity("{\"status\":\"false\"}").build(); } } } diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 9c811e6359..f72b8d9b8b 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -1,12 +1,9 @@ package marquez.searchengine.services; import java.io.IOException; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.UUID; -import marquez.service.models.LineageEvent; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -27,8 +24,8 @@ import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; - -import com.fasterxml.jackson.annotation.JsonProperty; +import marquez.searchengine.models.IndexResponse; +import marquez.searchengine.models.SearchResult; public class SearchService { @@ -41,41 +38,67 @@ public SearchService() { this.analyzer = new StandardAnalyzer(); } - public void indexEvent(LineageEvent event) throws IOException { + // Method to index a job document + public IndexResponse indexJobDocument(Map document) throws IOException { try (IndexWriter writer = new IndexWriter(indexDirectory, new IndexWriterConfig(analyzer))) { - UUID runUuid = UUID.fromString(event.getRun().getRunId()); - if (event.getInputs() != null) { - for (LineageEvent.Dataset dataset : event.getInputs()) { - writer.addDocument(buildDatasetDocument(runUuid, dataset, event)); - } + Document doc = new Document(); + + doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); + doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); + doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); + doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); + + if (document.containsKey("facets")) { + doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); } - if (event.getOutputs() != null) { - for (LineageEvent.Dataset dataset : event.getOutputs()) { - writer.addDocument(buildDatasetDocument(runUuid, dataset, event)); - } + if (document.containsKey("runFacets")) { + doc.add(new TextField("runFacets", document.get("runFacets").toString(), Field.Store.YES)); } - writer.addDocument(buildJobDocument(runUuid, event)); + + writer.addDocument(doc); writer.commit(); + return createIndexResponse("jobs", document.get("name").toString(), true); } } - private Document buildJobDocument(UUID runUuid, LineageEvent event) { - Document doc = new Document(); - doc.add(new StringField("run_id", runUuid.toString(), Field.Store.YES)); - doc.add(new TextField("name", event.getJob().getName(), Field.Store.YES)); - doc.add(new TextField("type", event.getJob().isStreamingJob() ? "STREAM" : "BATCH", Field.Store.YES)); - doc.add(new TextField("namespace", event.getJob().getNamespace(), Field.Store.YES)); - doc.add(new TextField("facets", event.getJob().getFacets().toString(), Field.Store.YES)); - return doc; + // Method to index a dataset document + public IndexResponse indexDatasetDocument(Map document) throws IOException { + try (IndexWriter writer = new IndexWriter(indexDirectory, new IndexWriterConfig(analyzer))) { + Document doc = new Document(); + + doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); + doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); + doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); + doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); + + if (document.containsKey("facets")) { + doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); + } + if (document.containsKey("inputFacets")) { + doc.add(new TextField("inputFacets", document.get("inputFacets").toString(), Field.Store.YES)); + } + if (document.containsKey("outputFacets")) { + doc.add(new TextField("outputFacets", document.get("outputFacets").toString(), Field.Store.YES)); + } + + //System.out.println("Indexing document: " + doc); + writer.addDocument(doc); + writer.commit(); + + return createIndexResponse("datasets", document.get("name").toString(), true); + } } - private Document buildDatasetDocument(UUID runUuid, LineageEvent.Dataset dataset, LineageEvent event) { - Document doc = new Document(); - doc.add(new StringField("run_id", runUuid.toString(), Field.Store.YES)); - doc.add(new TextField("name", dataset.getName(), Field.Store.YES)); - doc.add(new TextField("namespace", dataset.getNamespace(), Field.Store.YES)); - doc.add(new TextField("facets", dataset.getFacets().toString(), Field.Store.YES)); - return doc; + private IndexResponse createIndexResponse(String index, String id, boolean created) { + long version = 1L; // Simulated version number + String result = created ? "created" : "updated"; + + IndexResponse.ShardInfo shardInfo = new IndexResponse.ShardInfo(1, 1, 0); // 1 shard, 1 successful, 0 failed + + long seqNo = 1L; // Simulated sequence number + long primaryTerm = 1L; // Simulated primary term + + return new IndexResponse(index, id, version, result, shardInfo, seqNo, primaryTerm); } private boolean isIndexEmpty() throws IOException { @@ -98,6 +121,7 @@ private SearchResult search(String query, List fields) throws Exception long startTime = System.currentTimeMillis(); if (isIndexEmpty()) { + System.out.println("Index is empty"); return createEmptySearchResult(startTime); } @@ -105,17 +129,19 @@ private SearchResult search(String query, List fields) throws Exception IndexSearcher searcher = new IndexSearcher(reader); MultiFieldQueryParser parser = new MultiFieldQueryParser(fields.toArray(new String[0]), analyzer); Query q = parser.parse(query); + //System.out.println("Executing query: " + q.toString()); TopDocs topDocs = searcher.search(q, MAX_RESULTS); + //System.out.println("Total hits: " + topDocs.totalHits); long took = System.currentTimeMillis() - startTime; SearchResult result = new SearchResult(); result.setTook(took); result.getHitsMetadata().getTotalHits().setValue(topDocs.totalHits.value); - //result.setMaxScore(topDocs.getMaxScore()); + // result.setMaxScore(topDocs.getMaxScore()); StoredFields storedFields = searcher.storedFields(); - SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); + SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("",""); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(q)); for (ScoreDoc sd : topDocs.scoreDocs) { @@ -132,7 +158,7 @@ private SearchResult search(String query, List fields) throws Exception } } - result.addDocument(highlightedDoc); + result.addDocument("index",highlightedDoc); } return result; @@ -145,242 +171,9 @@ private SearchResult createEmptySearchResult(long startTime) { SearchResult result = new SearchResult(); result.setTook(took); result.getHitsMetadata().getTotalHits().setValue(0); - //result.setMaxScore(0.0f); + // result.setMaxScore(0.0f); result.setTimedOut(false); return result; } - - - public static class SearchResult { - @JsonProperty("took") - private long took; - - @JsonProperty("timed_out") - private boolean timedOut = false; - - @JsonProperty("_shards") - private ShardStatistics shards; - - @JsonProperty("hits") - private HitsMetadata hitsMetadata; - - @JsonProperty("num_reduce_phases") - private long numberOfReducePhases; - - @JsonProperty("terminated_early") - private boolean terminatedEarly; - - @JsonProperty("suggest") - private Map suggest = new HashMap<>(); // Initialize as empty map - - // Constructor - public SearchResult() { - this.shards = new ShardStatistics(1, 1, 0, 0); // Assuming a single shard with no failures - this.hitsMetadata = new HitsMetadata(); - this.numberOfReducePhases = 0; // Default value - this.terminatedEarly = false; // Default value - this.suggest = new HashMap<>(); // Empty suggestion map - } - - // Add document to hits - public void addDocument(Map doc) { - Map hit = new HashMap<>(); - hit.put("_source", doc); - hitsMetadata.addHit(hit); - } - - // Getters and Setters for all fields - public long getTook() { - return took; - } - - public void setTook(long took) { - this.took = took; - } - - public boolean isTimedOut() { - return timedOut; - } - - public void setTimedOut(boolean timedOut) { - this.timedOut = timedOut; - } - - public ShardStatistics getShards() { - return shards; - } - - public void setShards(ShardStatistics shards) { - this.shards = shards; - } - - public HitsMetadata getHitsMetadata() { - return hitsMetadata; - } - - public void setHitsMetadata(HitsMetadata hitsMetadata) { - this.hitsMetadata = hitsMetadata; - } - - public long getNumberOfReducePhases() { - return numberOfReducePhases; - } - - public void setNumberOfReducePhases(long numberOfReducePhases) { - this.numberOfReducePhases = numberOfReducePhases; - } - - public boolean isTerminatedEarly() { - return terminatedEarly; - } - - public void setTerminatedEarly(boolean terminatedEarly) { - this.terminatedEarly = terminatedEarly; - } - - public Map getSuggest() { - return suggest; - } - - public void setSuggest(Map suggest) { - this.suggest = suggest; - } - - // ShardStatistics inner class - public static class ShardStatistics { - @JsonProperty("total") - private int total; - - @JsonProperty("successful") - private int successful; - - @JsonProperty("skipped") - private int skipped; - - @JsonProperty("failed") - private int failed; - - // Constructor - public ShardStatistics(int total, int successful, int skipped, int failed) { - this.total = total; - this.successful = successful; - this.skipped = skipped; - this.failed = failed; - } - - // Getters and Setters - public int getTotal() { - return total; - } - - public void setTotal(int total) { - this.total = total; - } - - public int getSuccessful() { - return successful; - } - - public void setSuccessful(int successful) { - this.successful = successful; - } - - public int getSkipped() { - return skipped; - } - - public void setSkipped(int skipped) { - this.skipped = skipped; - } - - public int getFailed() { - return failed; - } - - public void setFailed(int failed) { - this.failed = failed; - } - } - - // HitsMetadata inner class - public static class HitsMetadata { - @JsonProperty("total") - private TotalHits totalHits; - - @JsonProperty("max_score") - private Float maxScore; - - @JsonProperty("hits") - private List> hits; - - public HitsMetadata() { - this.totalHits = new TotalHits(0, "eq"); - this.maxScore = null; - this.hits = new ArrayList<>(); - } - - // Getters and Setters - public TotalHits getTotalHits() { - return totalHits; - } - - public void setTotalHits(TotalHits totalHits) { - this.totalHits = totalHits; - } - - public Float getMaxScore() { - return maxScore; - } - - public void setMaxScore(Float maxScore) { - this.maxScore = maxScore; - } - - public List> getHits() { - return hits; - } - - public void setHits(List> hits) { - this.hits = hits; - } - - // Add a hit to the hits list - public void addHit(Map hit) { - this.hits.add(hit); - } - } - - // TotalHits inner class - public static class TotalHits { - @JsonProperty("value") - private long value; - - @JsonProperty("relation") - private String relation; - - public TotalHits(long value, String relation) { - this.value = value; - this.relation = relation; - } - - // Getters and Setters - public long getValue() { - return value; - } - - public void setValue(long value) { - this.value = value; - } - - public String getRelation() { - return relation; - } - - public void setRelation(String relation) { - this.relation = relation; - } - } - } - } From e9609a4fc11ec8abe8981f245dbfcedcbd8a85f6 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Wed, 4 Sep 2024 11:18:25 +0200 Subject: [PATCH 74/87] feat: working search response Signed-off-by: Yannick Libert --- .../{resources => models}/SearchRequest.java | 2 +- .../searchengine/models/SearchResult.java | 16 ++++-- .../resources/SearchResource.java | 7 +-- .../searchengine/services/SearchService.java | 51 +++++++++++-------- 4 files changed, 46 insertions(+), 30 deletions(-) rename search/src/main/java/marquez/searchengine/{resources => models}/SearchRequest.java (98%) diff --git a/search/src/main/java/marquez/searchengine/resources/SearchRequest.java b/search/src/main/java/marquez/searchengine/models/SearchRequest.java similarity index 98% rename from search/src/main/java/marquez/searchengine/resources/SearchRequest.java rename to search/src/main/java/marquez/searchengine/models/SearchRequest.java index 4c5c8eaa08..ac718ae6b6 100644 --- a/search/src/main/java/marquez/searchengine/resources/SearchRequest.java +++ b/search/src/main/java/marquez/searchengine/models/SearchRequest.java @@ -1,4 +1,4 @@ -package marquez.searchengine.resources; +package marquez.searchengine.models; import java.util.Map; import java.util.List; diff --git a/search/src/main/java/marquez/searchengine/models/SearchResult.java b/search/src/main/java/marquez/searchengine/models/SearchResult.java index 2d09eb613f..5f46ded8be 100644 --- a/search/src/main/java/marquez/searchengine/models/SearchResult.java +++ b/search/src/main/java/marquez/searchengine/models/SearchResult.java @@ -29,6 +29,10 @@ public class SearchResult { @JsonProperty("suggest") private Map suggest = new HashMap<>(); // Initialize as empty map + @JsonProperty("highlights") + private List>> highlights; // Add this field for highlights + + // Constructor public SearchResult() { this.shards = new ShardStatistics(1, 1, 0, 0); // Assuming a single shard with no failures @@ -36,14 +40,17 @@ public SearchResult() { this.numberOfReducePhases = 0; // Default value this.terminatedEarly = false; // Default value this.suggest = new HashMap<>(); // Empty suggestion map + this.highlights = new ArrayList<>(); } // Add document to hits - public void addDocument(String index, Map doc) { + public void addDocument(String index, Map doc, Map> highlight, int indexPosition) { Map hit = new HashMap<>(); hit.put("_index", index); // Include the index name in the hit hit.put("_source", doc); - hitsMetadata.addHit(index, hit); + hit.putAll(doc); + hitsMetadata.addHit(index, hit, indexPosition); + highlights.add(highlight); } // Getters and Setters for all fields @@ -202,11 +209,12 @@ public void setHits(List> hits) { } // Add a hit to the hits list - public void addHit(String index, Map doc) { + public void addHit(String index, Map doc, int indexPosition) { Map hit = new HashMap<>(); hit.put("_index", index); hit.putAll(doc); - hit.put("_id", "id"); + //String uniqueId = ((Map) doc.get("_source")).get("run_id") + "-" + indexPosition; + hit.put("_id", "id"); // Ensure the `_id` is unique this.hits.add(hit); } } diff --git a/search/src/main/java/marquez/searchengine/resources/SearchResource.java b/search/src/main/java/marquez/searchengine/resources/SearchResource.java index b043c04d0d..cd09cd88b0 100644 --- a/search/src/main/java/marquez/searchengine/resources/SearchResource.java +++ b/search/src/main/java/marquez/searchengine/resources/SearchResource.java @@ -14,10 +14,11 @@ import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; //import com.fasterxml.jackson.databind.ObjectMapper; - import marquez.searchengine.services.SearchService; import marquez.searchengine.models.IndexResponse; import marquez.searchengine.models.SearchResult; +import marquez.searchengine.models.SearchRequest; + @Path("/") @Produces(MediaType.APPLICATION_JSON) @@ -37,7 +38,7 @@ public Response searchJobs(SearchRequest request) { String query = request.getQuery().getMulti_match().getQuery(); List fields = request.getQuery().getMulti_match().getFields(); // Log the extracted details for debugging - //System.out.println("Received query: " + query); + //System.out.println("Received query: " + query + fields); SearchResult result = searchService.searchJobs(query, fields); //String jsonResponse = new ObjectMapper().writeValueAsString(result); //System.out.println("Serialized Response: " + jsonResponse); @@ -54,7 +55,7 @@ public Response searchDatasets(SearchRequest request) { try { String query = request.getQuery().getMulti_match().getQuery(); List fields = request.getQuery().getMulti_match().getFields(); - // // Log the extracted details for debugging + // Log the extracted details for debugging //System.out.println("Received query: " + query); SearchResult result = searchService.searchDatasets(query, fields); //String jsonResponse = new ObjectMapper().writeValueAsString(result); diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index f72b8d9b8b..7b479cf339 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -1,6 +1,7 @@ package marquez.searchengine.services; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -13,6 +14,7 @@ import org.apache.lucene.index.IndexNotFoundException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StoredFields; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.search.IndexSearcher; @@ -24,23 +26,26 @@ import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; + import marquez.searchengine.models.IndexResponse; import marquez.searchengine.models.SearchResult; public class SearchService { - private final Directory indexDirectory; + private final Directory jobIndexDirectory; + private final Directory datasetIndexDirectory; private final StandardAnalyzer analyzer; private static final int MAX_RESULTS = 10; public SearchService() { - this.indexDirectory = new ByteBuffersDirectory(); + this.jobIndexDirectory = new ByteBuffersDirectory(); + this.datasetIndexDirectory = new ByteBuffersDirectory(); this.analyzer = new StandardAnalyzer(); } // Method to index a job document public IndexResponse indexJobDocument(Map document) throws IOException { - try (IndexWriter writer = new IndexWriter(indexDirectory, new IndexWriterConfig(analyzer))) { + try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { Document doc = new Document(); doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); @@ -63,7 +68,7 @@ public IndexResponse indexJobDocument(Map document) throws IOExc // Method to index a dataset document public IndexResponse indexDatasetDocument(Map document) throws IOException { - try (IndexWriter writer = new IndexWriter(indexDirectory, new IndexWriterConfig(analyzer))) { + try (IndexWriter writer = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { Document doc = new Document(); doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); @@ -81,7 +86,6 @@ public IndexResponse indexDatasetDocument(Map document) throws I doc.add(new TextField("outputFacets", document.get("outputFacets").toString(), Field.Store.YES)); } - //System.out.println("Indexing document: " + doc); writer.addDocument(doc); writer.commit(); @@ -101,7 +105,7 @@ private IndexResponse createIndexResponse(String index, String id, boolean creat return new IndexResponse(index, id, version, result, shardInfo, seqNo, primaryTerm); } - private boolean isIndexEmpty() throws IOException { + private boolean isIndexEmpty(Directory indexDirectory) throws IOException { try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { return reader.numDocs() == 0; } catch (IndexNotFoundException e) { @@ -110,18 +114,17 @@ private boolean isIndexEmpty() throws IOException { } public SearchResult searchDatasets(String query, List fields) throws Exception { - return search(query, fields); + return search(query, fields, datasetIndexDirectory); } public SearchResult searchJobs(String query, List fields) throws Exception { - return search(query, fields); + return search(query, fields, jobIndexDirectory); } - private SearchResult search(String query, List fields) throws Exception { + private SearchResult search(String query, List fields, Directory indexDirectory) throws Exception { long startTime = System.currentTimeMillis(); - if (isIndexEmpty()) { - System.out.println("Index is empty"); + if (isIndexEmpty(indexDirectory)) { return createEmptySearchResult(startTime); } @@ -129,36 +132,41 @@ private SearchResult search(String query, List fields) throws Exception IndexSearcher searcher = new IndexSearcher(reader); MultiFieldQueryParser parser = new MultiFieldQueryParser(fields.toArray(new String[0]), analyzer); Query q = parser.parse(query); - //System.out.println("Executing query: " + q.toString()); TopDocs topDocs = searcher.search(q, MAX_RESULTS); - //System.out.println("Total hits: " + topDocs.totalHits); long took = System.currentTimeMillis() - startTime; SearchResult result = new SearchResult(); result.setTook(took); result.getHitsMetadata().getTotalHits().setValue(topDocs.totalHits.value); - // result.setMaxScore(topDocs.getMaxScore()); StoredFields storedFields = searcher.storedFields(); - SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("",""); + SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("", ""); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(q)); - for (ScoreDoc sd : topDocs.scoreDocs) { + for (int i = 0; i < topDocs.scoreDocs.length; i++) { + ScoreDoc sd = topDocs.scoreDocs[i]; Document doc = storedFields.document(sd.doc); - Map highlightedDoc = new HashMap<>(); + Map allDoc = new HashMap<>(); + Map> highlight = new HashMap<>(); + + for (IndexableField field : doc.getFields()) { + allDoc.put(field.name(), field.stringValue()); + } for (String field : fields) { String text = doc.get(field); if (text != null) { String highlightedText = highlighter.getBestFragment(analyzer, field, text); - highlightedDoc.put(field, highlightedText != null ? highlightedText : text); - } else { - highlightedDoc.put(field, doc.get(field)); + if (highlightedText != null) { + List highlightList = new ArrayList<>(); + highlightList.add(highlightedText); + highlight.put(field, highlightList); + } } } - result.addDocument("index",highlightedDoc); + result.addDocument(indexDirectory == jobIndexDirectory ? "jobs" : "datasets", allDoc, highlight, i); } return result; @@ -171,7 +179,6 @@ private SearchResult createEmptySearchResult(long startTime) { SearchResult result = new SearchResult(); result.setTook(took); result.getHitsMetadata().getTotalHits().setValue(0); - // result.setMaxScore(0.0f); result.setTimedOut(false); return result; From dc77ead300c45942ec6c5cb4dcdf3a6f8dedb4a2 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Wed, 4 Sep 2024 11:40:45 +0200 Subject: [PATCH 75/87] feat: changing the analyzer to an NGramAnalyzer Signed-off-by: Yannick Libert --- .../searchengine/services/NGramAnalyzer.java | 23 +++++++++++++++++++ .../searchengine/services/SearchService.java | 8 ++++--- 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 search/src/main/java/marquez/searchengine/services/NGramAnalyzer.java diff --git a/search/src/main/java/marquez/searchengine/services/NGramAnalyzer.java b/search/src/main/java/marquez/searchengine/services/NGramAnalyzer.java new file mode 100644 index 0000000000..6c8eaf8180 --- /dev/null +++ b/search/src/main/java/marquez/searchengine/services/NGramAnalyzer.java @@ -0,0 +1,23 @@ +package marquez.searchengine.services; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.ngram.NGramTokenizer; + +public class NGramAnalyzer extends Analyzer { + private final int minGram; + private final int maxGram; + + public NGramAnalyzer(int minGram, int maxGram) { + this.minGram = minGram; + this.maxGram = maxGram; + } + + @Override + protected TokenStreamComponents createComponents(String fieldName) { + NGramTokenizer tokenizer = new NGramTokenizer(minGram, maxGram); // Define the N-grams range + TokenStream tokenStream = new LowerCaseFilter(tokenizer); // Optional: make everything lowercase + return new TokenStreamComponents(tokenizer, tokenStream); + } +} diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 7b479cf339..8f3ad2f291 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -5,7 +5,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.lucene.analysis.standard.StandardAnalyzer; +//import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; @@ -34,13 +34,15 @@ public class SearchService { private final Directory jobIndexDirectory; private final Directory datasetIndexDirectory; - private final StandardAnalyzer analyzer; + //private final StandardAnalyzer analyzer; + private final NGramAnalyzer analyzer; private static final int MAX_RESULTS = 10; public SearchService() { this.jobIndexDirectory = new ByteBuffersDirectory(); this.datasetIndexDirectory = new ByteBuffersDirectory(); - this.analyzer = new StandardAnalyzer(); + //this.analyzer = new StandardAnalyzer(); + this.analyzer = new NGramAnalyzer(3, 4); } // Method to index a job document From c506eeee2398a5906caf9fdd40eed7a1e243672d Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Wed, 4 Sep 2024 17:12:00 +0200 Subject: [PATCH 76/87] WIP: load event at startup Signed-off-by: Yannick Libert --- search/build.gradle | 3 + .../searchengine/SearchApplication.java | 7 +- .../searchengine/db/DatabaseConnection.java | 14 ++++ .../resources/SearchResource.java | 13 +++- .../searchengine/services/SearchService.java | 77 ++++++++++++++++++- 5 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 search/src/main/java/marquez/searchengine/db/DatabaseConnection.java diff --git a/search/build.gradle b/search/build.gradle index 4b3e08db51..6fbadb1872 100644 --- a/search/build.gradle +++ b/search/build.gradle @@ -12,6 +12,9 @@ dependencies { implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" implementation "org.apache.lucene:lucene-highlighter:${luceneVersion}" implementation 'org.opensearch.client:opensearch-rest-high-level-client:2.16.0' + implementation 'org.jdbi:jdbi3-core:3.45.4' + implementation 'org.jdbi:jdbi3-sqlobject:3.45.4' + implementation "io.dropwizard:dropwizard-core:${dropwizardVersion}" } diff --git a/search/src/main/java/marquez/searchengine/SearchApplication.java b/search/src/main/java/marquez/searchengine/SearchApplication.java index 055132cdc5..2502f98c45 100644 --- a/search/src/main/java/marquez/searchengine/SearchApplication.java +++ b/search/src/main/java/marquez/searchengine/SearchApplication.java @@ -4,6 +4,10 @@ import io.dropwizard.setup.Bootstrap; import io.dropwizard.setup.Environment; import java.io.IOException; + +import org.jdbi.v3.core.Jdbi; + +import marquez.searchengine.db.DatabaseConnection; import marquez.searchengine.health.SearchHealthCheck; import marquez.searchengine.resources.SearchResource; @@ -23,7 +27,8 @@ public void initialize(Bootstrap bootstrap) {} @Override public void run(SearchConfig configuration, Environment environment) throws IOException { - final SearchResource searchResource = new SearchResource(); + Jdbi jdbi = DatabaseConnection.initializeJdbi(); + final SearchResource searchResource = new SearchResource(jdbi); environment.jersey().register(searchResource); environment.healthChecks().register("search-health-check", new SearchHealthCheck()); } diff --git a/search/src/main/java/marquez/searchengine/db/DatabaseConnection.java b/search/src/main/java/marquez/searchengine/db/DatabaseConnection.java new file mode 100644 index 0000000000..471678e039 --- /dev/null +++ b/search/src/main/java/marquez/searchengine/db/DatabaseConnection.java @@ -0,0 +1,14 @@ +package marquez.searchengine.db; + +import org.jdbi.v3.core.Jdbi; + +public class DatabaseConnection { + + public static Jdbi initializeJdbi() { + String jdbcUrl = "jdbc:postgresql://localhost:5432/marquez"; + String username = "marquez"; + String password = "marquez"; + + return Jdbi.create(jdbcUrl, username, password); + } +} \ No newline at end of file diff --git a/search/src/main/java/marquez/searchengine/resources/SearchResource.java b/search/src/main/java/marquez/searchengine/resources/SearchResource.java index cd09cd88b0..017942fa3a 100644 --- a/search/src/main/java/marquez/searchengine/resources/SearchResource.java +++ b/search/src/main/java/marquez/searchengine/resources/SearchResource.java @@ -13,21 +13,28 @@ import javax.ws.rs.Produces; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; + +import org.jdbi.v3.core.Jdbi; +import org.jdbi.v3.sqlobject.SqlObjectPlugin; + //import com.fasterxml.jackson.databind.ObjectMapper; import marquez.searchengine.services.SearchService; import marquez.searchengine.models.IndexResponse; import marquez.searchengine.models.SearchResult; import marquez.searchengine.models.SearchRequest; - +import marquez.db.OpenLineageDao; @Path("/") @Produces(MediaType.APPLICATION_JSON) public class SearchResource { private final SearchService searchService; + private final Jdbi jdbi; - public SearchResource() { - this.searchService = new SearchService(); + public SearchResource(Jdbi jdbi) throws IOException { + this.jdbi = jdbi.installPlugin(new SqlObjectPlugin()); + OpenLineageDao openLineageDao = jdbi.onDemand(OpenLineageDao.class); + this.searchService = new SearchService(openLineageDao); } @POST diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 8f3ad2f291..6def7ea2fb 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -1,10 +1,16 @@ package marquez.searchengine.services; import java.io.IOException; +import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; + +import javax.validation.Valid; +import javax.validation.constraints.NotNull; + //import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -29,20 +35,89 @@ import marquez.searchengine.models.IndexResponse; import marquez.searchengine.models.SearchResult; +import marquez.db.OpenLineageDao; +import marquez.service.models.LineageEvent; +import marquez.service.models.LineageEvent.Dataset; public class SearchService { + private final OpenLineageDao openLineageDao; private final Directory jobIndexDirectory; private final Directory datasetIndexDirectory; //private final StandardAnalyzer analyzer; private final NGramAnalyzer analyzer; private static final int MAX_RESULTS = 10; - public SearchService() { + public SearchService(OpenLineageDao openLineageDao) throws IOException { + this.openLineageDao = openLineageDao; this.jobIndexDirectory = new ByteBuffersDirectory(); this.datasetIndexDirectory = new ByteBuffersDirectory(); //this.analyzer = new StandardAnalyzer(); this.analyzer = new NGramAnalyzer(3, 4); + // init index with DB lineage events + loadLineageEventsFromDatabase(); + } + + private void loadLineageEventsFromDatabase() throws IOException { + ZonedDateTime before = ZonedDateTime.now(); // Current time + ZonedDateTime after = before.minusYears(5); // Fetch events from the past 1 month + int limit = 1000; // Limit of events to load at a time + int offset = 0; // Offset for pagination + + List lineageEvents; + System.out.println("prout"); + do { + // Fetch a batch of lineage events + lineageEvents = openLineageDao.getAllLineageEventsDesc(before, after, limit, offset); + + // Index each event into Lucene + for (LineageEvent event : lineageEvents) { + System.out.println("prooooooout"); + indexLineageEvent(event); + } + + offset += limit; // Increment the offset for the next batch + } while (!lineageEvents.isEmpty()); + } + + private void indexLineageEvent(@Valid @NotNull LineageEvent event) throws IOException { + // Convert inputs and outputs to Map and index them + if (event.getInputs() != null) { + for (Dataset input : event.getInputs()) { + Map inputMap = mapDatasetEvent(input, event.getRun().getRunId(), event.getEventType()); + indexDatasetDocument(inputMap); + } + } + + if (event.getOutputs() != null) { + for (Dataset output : event.getOutputs()) { + Map outputMap = mapDatasetEvent(output, event.getRun().getRunId(), event.getEventType()); + indexDatasetDocument(outputMap); + } + } + Map jobMap = mapJobEvent(event); + indexJobDocument(jobMap); + } + + private Map mapDatasetEvent(Dataset dataset, String run_id, String eventType) { + Map datasetMap = new HashMap<>(); + datasetMap.put("run_id", run_id); + datasetMap.put("eventType", eventType); + datasetMap.put("name", dataset.getName()); + datasetMap.put("namespace", dataset.getNamespace()); + Optional.ofNullable(dataset.getFacets()).ifPresent(facets -> datasetMap.put("facets", facets)); + return datasetMap; + } + + // Helper method to map job details to Map + private Map mapJobEvent(LineageEvent event) { + Map jobMap = new HashMap<>(); + jobMap.put("run_id", event.getRun().getRunId().toString()); + jobMap.put("name", event.getJob().getName()); + jobMap.put("namespace", event.getJob().getNamespace()); + jobMap.put("eventType", event.getEventType()); + Optional.ofNullable(event.getRun().getFacets()).ifPresent(facets -> jobMap.put("facets", facets)); + return jobMap; } // Method to index a job document From 61631bb56271b81fc7d851c6adc2b461dd125f81 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Thu, 5 Sep 2024 10:18:01 +0200 Subject: [PATCH 77/87] WIP: dedup event at startup Signed-off-by: Yannick Libert --- .../searchengine/services/SearchService.java | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 6def7ea2fb..967ac2f6c3 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -22,10 +22,14 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StoredFields; +import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; @@ -120,8 +124,52 @@ private Map mapJobEvent(LineageEvent event) { return jobMap; } + private boolean documentAlreadyExists(Map document, Directory indexDirectory) throws IOException { + // Check if the index is empty before performing any search + if (isIndexEmpty(indexDirectory)) { + return false; // No document exists if the index is empty + } + + try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { + IndexSearcher searcher = new IndexSearcher(reader); + MultiFieldQueryParser parser = new MultiFieldQueryParser(new String[]{"name", "namespace"}, analyzer); + String name = (String) document.get("name"); + String namespace = (String) document.get("namespace"); + Query query = parser.parse("name:\"" + name + "\" AND namespace:\"" + namespace + "\""); + TopDocs topDocs = searcher.search(query, 1); + + if (topDocs.totalHits.value > 0) { + Document existingDoc = searcher.doc(topDocs.scoreDocs[0].doc); + // Compare other fields to determine if the document needs an update + for (Map.Entry entry : document.entrySet()) { + String fieldName = entry.getKey(); + String fieldValue = entry.getValue() != null ? entry.getValue().toString() : null; + // If the stored field is different from the new field value, return true (needs update) + if (fieldValue != null && !fieldValue.equals(existingDoc.get(fieldName))) { + return false; // Document exists but needs an update + } + } + System.out.println("Document exists and does not need an update"); + + return true; // Document exists and does not need an update + } + + return false; // Document does not exist + } catch (Exception e) { + e.printStackTrace(); + throw new IOException("Failed to search for document", e); + } + } + + // Method to index a job document public IndexResponse indexJobDocument(Map document) throws IOException { + // Check if the document already exists + if (documentAlreadyExists(document, jobIndexDirectory)) { + // Document exists and needs an update; first delete the old document + return createIndexResponse("jobs", document.get("name").toString(), false); + } + try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { Document doc = new Document(); @@ -145,6 +193,12 @@ public IndexResponse indexJobDocument(Map document) throws IOExc // Method to index a dataset document public IndexResponse indexDatasetDocument(Map document) throws IOException { + // Check if the document exists + if (documentAlreadyExists(document, datasetIndexDirectory)) { + // Document exists and needs an update; first delete the old document + return createIndexResponse("datasets", document.get("name").toString(), false); + } + try (IndexWriter writer = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { Document doc = new Document(); From 4fd9535a18a62e07f7328402abdc8e22d9481ee0 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Thu, 5 Sep 2024 10:27:28 +0200 Subject: [PATCH 78/87] WIP: remove log Signed-off-by: Yannick Libert --- .../searchengine/services/SearchService.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 967ac2f6c3..970300a431 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -22,14 +22,10 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StoredFields; -import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; @@ -69,14 +65,12 @@ private void loadLineageEventsFromDatabase() throws IOException { int offset = 0; // Offset for pagination List lineageEvents; - System.out.println("prout"); do { // Fetch a batch of lineage events lineageEvents = openLineageDao.getAllLineageEventsDesc(before, after, limit, offset); // Index each event into Lucene for (LineageEvent event : lineageEvents) { - System.out.println("prooooooout"); indexLineageEvent(event); } @@ -139,7 +133,8 @@ private boolean documentAlreadyExists(Map document, Directory in TopDocs topDocs = searcher.search(query, 1); if (topDocs.totalHits.value > 0) { - Document existingDoc = searcher.doc(topDocs.scoreDocs[0].doc); + StoredFields storedFields = searcher.storedFields(); + Document existingDoc = storedFields.document(topDocs.scoreDocs[0].doc); // Compare other fields to determine if the document needs an update for (Map.Entry entry : document.entrySet()) { String fieldName = entry.getKey(); @@ -147,10 +142,9 @@ private boolean documentAlreadyExists(Map document, Directory in // If the stored field is different from the new field value, return true (needs update) if (fieldValue != null && !fieldValue.equals(existingDoc.get(fieldName))) { return false; // Document exists but needs an update + //TODO: handle that case in a better way } } - System.out.println("Document exists and does not need an update"); - return true; // Document exists and does not need an update } @@ -163,6 +157,7 @@ private boolean documentAlreadyExists(Map document, Directory in // Method to index a job document + //TODO: don't index a Map, use the Dataset object directly public IndexResponse indexJobDocument(Map document) throws IOException { // Check if the document already exists if (documentAlreadyExists(document, jobIndexDirectory)) { @@ -192,6 +187,7 @@ public IndexResponse indexJobDocument(Map document) throws IOExc } // Method to index a dataset document + //TODO: don't index a Map, use the Dataset object directly public IndexResponse indexDatasetDocument(Map document) throws IOException { // Check if the document exists if (documentAlreadyExists(document, datasetIndexDirectory)) { From dda476e960274e017acd31d4f4aa199621b47f0e Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Thu, 5 Sep 2024 15:08:37 +0200 Subject: [PATCH 79/87] WIP: batch load event from lineage_events Signed-off-by: Yannick Libert --- .../searchengine/services/SearchService.java | 149 +++++++++++------- 1 file changed, 95 insertions(+), 54 deletions(-) diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 970300a431..f5713aa89f 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -61,42 +61,55 @@ public SearchService(OpenLineageDao openLineageDao) throws IOException { private void loadLineageEventsFromDatabase() throws IOException { ZonedDateTime before = ZonedDateTime.now(); // Current time ZonedDateTime after = before.minusYears(5); // Fetch events from the past 1 month - int limit = 1000; // Limit of events to load at a time + int limit = 50000; // Limit of events to load at a time int offset = 0; // Offset for pagination List lineageEvents; do { // Fetch a batch of lineage events lineageEvents = openLineageDao.getAllLineageEventsDesc(before, after, limit, offset); + System.out.println("Lineage events fetched: "+lineageEvents.size()); - // Index each event into Lucene - for (LineageEvent event : lineageEvents) { - indexLineageEvent(event); - } + indexLineageEvents(lineageEvents); offset += limit; // Increment the offset for the next batch } while (!lineageEvents.isEmpty()); } - private void indexLineageEvent(@Valid @NotNull LineageEvent event) throws IOException { - // Convert inputs and outputs to Map and index them - if (event.getInputs() != null) { - for (Dataset input : event.getInputs()) { - Map inputMap = mapDatasetEvent(input, event.getRun().getRunId(), event.getEventType()); - indexDatasetDocument(inputMap); + private void indexLineageEvents(@Valid @NotNull List lineageEvents) throws IOException { + List> inputMaps = new ArrayList<>(); + List> outputMaps = new ArrayList<>(); + List> jobMaps = new ArrayList<>(); + for (LineageEvent event : lineageEvents) { + if (event.getInputs() != null) { + for (Dataset input : event.getInputs()) { + Map inputMap = mapDatasetEvent(input, event.getRun().getRunId(), + event.getEventType()); + inputMaps.add(inputMap); + } } - } - - if (event.getOutputs() != null) { - for (Dataset output : event.getOutputs()) { - Map outputMap = mapDatasetEvent(output, event.getRun().getRunId(), event.getEventType()); - indexDatasetDocument(outputMap); + if (event.getOutputs() != null) { + for (Dataset output : event.getOutputs()) { + Map outputMap = mapDatasetEvent(output, event.getRun().getRunId(), + event.getEventType()); + outputMaps.add(outputMap); + } } + Map jobMap = mapJobEvent(event); + jobMaps.add(jobMap); + } + if (!inputMaps.isEmpty()) { + indexDatasetDocuments(inputMaps); + } + if (!outputMaps.isEmpty()) { + indexDatasetDocuments(outputMaps); + } + if (!jobMaps.isEmpty()) { + indexJobDocuments(jobMaps); } - Map jobMap = mapJobEvent(event); - indexJobDocument(jobMap); } + // Helper method to map dataset details to Map private Map mapDatasetEvent(Dataset dataset, String run_id, String eventType) { Map datasetMap = new HashMap<>(); datasetMap.put("run_id", run_id); @@ -155,31 +168,15 @@ private boolean documentAlreadyExists(Map document, Directory in } } - // Method to index a job document //TODO: don't index a Map, use the Dataset object directly public IndexResponse indexJobDocument(Map document) throws IOException { // Check if the document already exists if (documentAlreadyExists(document, jobIndexDirectory)) { - // Document exists and needs an update; first delete the old document return createIndexResponse("jobs", document.get("name").toString(), false); } - try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { - Document doc = new Document(); - - doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); - doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); - doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); - doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); - - if (document.containsKey("facets")) { - doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); - } - if (document.containsKey("runFacets")) { - doc.add(new TextField("runFacets", document.get("runFacets").toString(), Field.Store.YES)); - } - + Document doc = createJobDocument(document); writer.addDocument(doc); writer.commit(); return createIndexResponse("jobs", document.get("name").toString(), true); @@ -191,33 +188,77 @@ public IndexResponse indexJobDocument(Map document) throws IOExc public IndexResponse indexDatasetDocument(Map document) throws IOException { // Check if the document exists if (documentAlreadyExists(document, datasetIndexDirectory)) { - // Document exists and needs an update; first delete the old document return createIndexResponse("datasets", document.get("name").toString(), false); } - try (IndexWriter writer = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { - Document doc = new Document(); - - doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); - doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); - doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); - doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); + Document doc = createDatasetDocument(document); + writer.addDocument(doc); + writer.commit(); + return createIndexResponse("datasets", document.get("name").toString(), true); + } + } - if (document.containsKey("facets")) { - doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); - } - if (document.containsKey("inputFacets")) { - doc.add(new TextField("inputFacets", document.get("inputFacets").toString(), Field.Store.YES)); - } - if (document.containsKey("outputFacets")) { - doc.add(new TextField("outputFacets", document.get("outputFacets").toString(), Field.Store.YES)); + public void indexJobDocuments(List> documents) throws IOException { + // Check if the document already exists + try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { + for (Map document : documents) { + if (documentAlreadyExists(document, jobIndexDirectory)) { + continue; + } + Document doc = createJobDocument(document); + writer.addDocument(doc); } + writer.commit(); + } + } - writer.addDocument(doc); + public void indexDatasetDocuments(List> documents) throws IOException { + // Check if the document exists + try (IndexWriter writer = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { + for (Map document : documents) { + if (documentAlreadyExists(document, datasetIndexDirectory)) { + continue; + } + Document doc = createDatasetDocument(document); + writer.addDocument(doc); + } writer.commit(); + } + } - return createIndexResponse("datasets", document.get("name").toString(), true); + private Document createJobDocument(Map document) { + Document doc = new Document(); + doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); + doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); + doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); + doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); + if (document.containsKey("facets")) { + doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); + } + if (document.containsKey("runFacets")) { + doc.add(new TextField("runFacets", document.get("runFacets").toString(), Field.Store.YES)); + } + return doc; + } + + private Document createDatasetDocument(Map document) { + Document doc = new Document(); + + doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); + doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); + doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); + doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); + + if (document.containsKey("facets")) { + doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); + } + if (document.containsKey("inputFacets")) { + doc.add(new TextField("inputFacets", document.get("inputFacets").toString(), Field.Store.YES)); + } + if (document.containsKey("outputFacets")) { + doc.add(new TextField("outputFacets", document.get("outputFacets").toString(), Field.Store.YES)); } + return doc; } private IndexResponse createIndexResponse(String index, String id, boolean created) { From c7921f503bfc36fe6f115d155c7d56b8a1a859f8 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Fri, 6 Sep 2024 16:45:17 +0200 Subject: [PATCH 80/87] WIP: reduce memory footprint by removing duplicates Signed-off-by: Yannick Libert --- .../searchengine/services/SearchService.java | 90 ++++++++----------- 1 file changed, 37 insertions(+), 53 deletions(-) diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index f5713aa89f..f237e81bcc 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -22,10 +22,12 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StoredFields; +import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; @@ -59,10 +61,10 @@ public SearchService(OpenLineageDao openLineageDao) throws IOException { } private void loadLineageEventsFromDatabase() throws IOException { - ZonedDateTime before = ZonedDateTime.now(); // Current time - ZonedDateTime after = before.minusYears(5); // Fetch events from the past 1 month - int limit = 50000; // Limit of events to load at a time - int offset = 0; // Offset for pagination + ZonedDateTime before = ZonedDateTime.now(); + ZonedDateTime after = before.minusDays(1); + int limit = 10000; + int offset = 0; List lineageEvents; do { @@ -72,40 +74,42 @@ private void loadLineageEventsFromDatabase() throws IOException { indexLineageEvents(lineageEvents); - offset += limit; // Increment the offset for the next batch + offset += limit; } while (!lineageEvents.isEmpty()); } private void indexLineageEvents(@Valid @NotNull List lineageEvents) throws IOException { - List> inputMaps = new ArrayList<>(); - List> outputMaps = new ArrayList<>(); - List> jobMaps = new ArrayList<>(); + // for dedup purpose + Map> inputMaps = new HashMap<>(); + Map> outputMaps = new HashMap<>(); + Map> jobMaps = new HashMap<>(); + for (LineageEvent event : lineageEvents) { if (event.getInputs() != null) { for (Dataset input : event.getInputs()) { - Map inputMap = mapDatasetEvent(input, event.getRun().getRunId(), - event.getEventType()); - inputMaps.add(inputMap); + Map inputMap = mapDatasetEvent(input, event.getRun().getRunId(), event.getEventType()); + //deduplicate on uniqueId + inputMaps.put((String)inputMap.get("id"), inputMap); } } if (event.getOutputs() != null) { for (Dataset output : event.getOutputs()) { - Map outputMap = mapDatasetEvent(output, event.getRun().getRunId(), - event.getEventType()); - outputMaps.add(outputMap); + Map outputMap = mapDatasetEvent(output, event.getRun().getRunId(), event.getEventType()); + outputMaps.put((String)outputMap.get("id"), outputMap); } } Map jobMap = mapJobEvent(event); - jobMaps.add(jobMap); + jobMaps.put((String)jobMap.get("id"), jobMap); } + // At this point, inputMaps, outputMaps, and jobMaps are de-duplicated if (!inputMaps.isEmpty()) { - indexDatasetDocuments(inputMaps); + indexDatasetDocuments(new ArrayList<>(inputMaps.values())); } if (!outputMaps.isEmpty()) { - indexDatasetDocuments(outputMaps); + indexDatasetDocuments(new ArrayList<>(outputMaps.values())); } if (!jobMaps.isEmpty()) { - indexJobDocuments(jobMaps); + indexJobDocuments(new ArrayList<>(jobMaps.values())); } } @@ -116,6 +120,7 @@ private Map mapDatasetEvent(Dataset dataset, String run_id, Stri datasetMap.put("eventType", eventType); datasetMap.put("name", dataset.getName()); datasetMap.put("namespace", dataset.getNamespace()); + datasetMap.put("id",dataset.getName()+"_"+dataset.getNamespace()); Optional.ofNullable(dataset.getFacets()).ifPresent(facets -> datasetMap.put("facets", facets)); return datasetMap; } @@ -126,42 +131,24 @@ private Map mapJobEvent(LineageEvent event) { jobMap.put("run_id", event.getRun().getRunId().toString()); jobMap.put("name", event.getJob().getName()); jobMap.put("namespace", event.getJob().getNamespace()); + jobMap.put("id",event.getJob().getName()+"_"+event.getJob().getNamespace()); jobMap.put("eventType", event.getEventType()); Optional.ofNullable(event.getRun().getFacets()).ifPresent(facets -> jobMap.put("facets", facets)); return jobMap; } - private boolean documentAlreadyExists(Map document, Directory indexDirectory) throws IOException { - // Check if the index is empty before performing any search + private boolean documentAlreadyExists(String uniqueId, Directory indexDirectory) throws IOException { if (isIndexEmpty(indexDirectory)) { - return false; // No document exists if the index is empty + return false; } - try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { IndexSearcher searcher = new IndexSearcher(reader); - MultiFieldQueryParser parser = new MultiFieldQueryParser(new String[]{"name", "namespace"}, analyzer); - String name = (String) document.get("name"); - String namespace = (String) document.get("namespace"); - Query query = parser.parse("name:\"" + name + "\" AND namespace:\"" + namespace + "\""); - TopDocs topDocs = searcher.search(query, 1); - - if (topDocs.totalHits.value > 0) { - StoredFields storedFields = searcher.storedFields(); - Document existingDoc = storedFields.document(topDocs.scoreDocs[0].doc); - // Compare other fields to determine if the document needs an update - for (Map.Entry entry : document.entrySet()) { - String fieldName = entry.getKey(); - String fieldValue = entry.getValue() != null ? entry.getValue().toString() : null; - // If the stored field is different from the new field value, return true (needs update) - if (fieldValue != null && !fieldValue.equals(existingDoc.get(fieldName))) { - return false; // Document exists but needs an update - //TODO: handle that case in a better way - } - } - return true; // Document exists and does not need an update + Query query = new TermQuery(new Term("id", uniqueId)); + TopDocs topDocs = searcher.search(query, 1); + if (topDocs.totalHits.value > 0){ + System.out.println("found duplicate "+uniqueId); } - - return false; // Document does not exist + return topDocs.totalHits.value > 0; } catch (Exception e) { e.printStackTrace(); throw new IOException("Failed to search for document", e); @@ -171,8 +158,7 @@ private boolean documentAlreadyExists(Map document, Directory in // Method to index a job document //TODO: don't index a Map, use the Dataset object directly public IndexResponse indexJobDocument(Map document) throws IOException { - // Check if the document already exists - if (documentAlreadyExists(document, jobIndexDirectory)) { + if (documentAlreadyExists((String) document.get("id"), jobIndexDirectory)) { return createIndexResponse("jobs", document.get("name").toString(), false); } try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { @@ -186,8 +172,7 @@ public IndexResponse indexJobDocument(Map document) throws IOExc // Method to index a dataset document //TODO: don't index a Map, use the Dataset object directly public IndexResponse indexDatasetDocument(Map document) throws IOException { - // Check if the document exists - if (documentAlreadyExists(document, datasetIndexDirectory)) { + if (documentAlreadyExists((String) document.get("id"), datasetIndexDirectory)) { return createIndexResponse("datasets", document.get("name").toString(), false); } try (IndexWriter writer = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { @@ -199,10 +184,9 @@ public IndexResponse indexDatasetDocument(Map document) throws I } public void indexJobDocuments(List> documents) throws IOException { - // Check if the document already exists try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { for (Map document : documents) { - if (documentAlreadyExists(document, jobIndexDirectory)) { + if (documentAlreadyExists((String) document.get("id"), jobIndexDirectory)) { continue; } Document doc = createJobDocument(document); @@ -213,10 +197,9 @@ public void indexJobDocuments(List> documents) throws IOExce } public void indexDatasetDocuments(List> documents) throws IOException { - // Check if the document exists try (IndexWriter writer = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { for (Map document : documents) { - if (documentAlreadyExists(document, datasetIndexDirectory)) { + if (documentAlreadyExists((String) document.get("id"), datasetIndexDirectory)) { continue; } Document doc = createDatasetDocument(document); @@ -228,6 +211,7 @@ public void indexDatasetDocuments(List> documents) throws IO private Document createJobDocument(Map document) { Document doc = new Document(); + doc.add(new StringField("id", (String) document.get("id"), Field.Store.YES)); doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); @@ -243,7 +227,7 @@ private Document createJobDocument(Map document) { private Document createDatasetDocument(Map document) { Document doc = new Document(); - + doc.add(new StringField("id", (String) document.get("id"), Field.Store.YES)); doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); From f37d6b5858ab16f67886a06547897fe6acd9298b Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Fri, 6 Sep 2024 16:46:30 +0200 Subject: [PATCH 81/87] WIP: remove debug log Signed-off-by: Yannick Libert --- .../java/marquez/searchengine/services/SearchService.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index f237e81bcc..00b3fa8953 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -70,7 +70,6 @@ private void loadLineageEventsFromDatabase() throws IOException { do { // Fetch a batch of lineage events lineageEvents = openLineageDao.getAllLineageEventsDesc(before, after, limit, offset); - System.out.println("Lineage events fetched: "+lineageEvents.size()); indexLineageEvents(lineageEvents); @@ -145,9 +144,6 @@ private boolean documentAlreadyExists(String uniqueId, Directory indexDirectory) IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("id", uniqueId)); TopDocs topDocs = searcher.search(query, 1); - if (topDocs.totalHits.value > 0){ - System.out.println("found duplicate "+uniqueId); - } return topDocs.totalHits.value > 0; } catch (Exception e) { e.printStackTrace(); From 00455d68531e34336a0b0a3ba57fbca4f593a12f Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Fri, 6 Sep 2024 18:58:41 +0200 Subject: [PATCH 82/87] WIP: thread safe index writing Signed-off-by: Yannick Libert --- .../searchengine/services/SearchService.java | 93 ++++++++++++++----- 1 file changed, 69 insertions(+), 24 deletions(-) diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 00b3fa8953..4cb85aab54 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -7,6 +7,10 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import javax.validation.Valid; import javax.validation.constraints.NotNull; @@ -49,6 +53,10 @@ public class SearchService { //private final StandardAnalyzer analyzer; private final NGramAnalyzer analyzer; private static final int MAX_RESULTS = 10; + private final ExecutorService executor; + private final IndexWriter jobIndexWriter; + private final IndexWriter datasetIndexWriter; + public SearchService(OpenLineageDao openLineageDao) throws IOException { this.openLineageDao = openLineageDao; @@ -56,6 +64,9 @@ public SearchService(OpenLineageDao openLineageDao) throws IOException { this.datasetIndexDirectory = new ByteBuffersDirectory(); //this.analyzer = new StandardAnalyzer(); this.analyzer = new NGramAnalyzer(3, 4); + this.executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + this.jobIndexWriter = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer)); + this.datasetIndexWriter = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer)); // init index with DB lineage events loadLineageEventsFromDatabase(); } @@ -67,14 +78,52 @@ private void loadLineageEventsFromDatabase() throws IOException { int offset = 0; List lineageEvents; + List> futures = new ArrayList<>(); do { // Fetch a batch of lineage events lineageEvents = openLineageDao.getAllLineageEventsDesc(before, after, limit, offset); - - indexLineageEvents(lineageEvents); - + + // If there are events, process them in parallel + if (!lineageEvents.isEmpty()) { + // Submit the batch to the executor service + Future future = executor.submit(new LineageEventProcessor(lineageEvents)); + futures.add(future); + } + System.out.println("done"); offset += limit; } while (!lineageEvents.isEmpty()); + // Wait for all tasks to finish + for (Future future : futures) { + try { + future.get(); // Wait for each thread to finish + } catch (Exception e) { + e.printStackTrace(); + } + } + executor.shutdown(); + + // Close the IndexWriters when done + jobIndexWriter.close(); + datasetIndexWriter.close(); + } + + // The class responsible for processing a batch of lineage events in parallel + private class LineageEventProcessor implements Callable { + private final List events; + + public LineageEventProcessor(List events) { + this.events = events; + } + + @Override + public Void call() throws Exception { + try { + indexLineageEvents(events); // Index the batch of lineage events + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } } private void indexLineageEvents(@Valid @NotNull List lineageEvents) throws IOException { @@ -102,13 +151,13 @@ private void indexLineageEvents(@Valid @NotNull List lineageEvents } // At this point, inputMaps, outputMaps, and jobMaps are de-duplicated if (!inputMaps.isEmpty()) { - indexDatasetDocuments(new ArrayList<>(inputMaps.values())); + indexDatasetDocuments(new ArrayList<>(inputMaps.values()), datasetIndexWriter); } if (!outputMaps.isEmpty()) { - indexDatasetDocuments(new ArrayList<>(outputMaps.values())); + indexDatasetDocuments(new ArrayList<>(outputMaps.values()), datasetIndexWriter); } if (!jobMaps.isEmpty()) { - indexJobDocuments(new ArrayList<>(jobMaps.values())); + indexJobDocuments(new ArrayList<>(jobMaps.values()), jobIndexWriter); } } @@ -179,30 +228,26 @@ public IndexResponse indexDatasetDocument(Map document) throws I } } - public void indexJobDocuments(List> documents) throws IOException { - try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { - for (Map document : documents) { - if (documentAlreadyExists((String) document.get("id"), jobIndexDirectory)) { - continue; - } - Document doc = createJobDocument(document); - writer.addDocument(doc); + public void indexJobDocuments(List> documents, IndexWriter writer) throws IOException { + for (Map document : documents) { + if (documentAlreadyExists((String) document.get("id"), jobIndexDirectory)) { + continue; } - writer.commit(); + Document doc = createJobDocument(document); + writer.addDocument(doc); } + writer.commit(); } - public void indexDatasetDocuments(List> documents) throws IOException { - try (IndexWriter writer = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { - for (Map document : documents) { - if (documentAlreadyExists((String) document.get("id"), datasetIndexDirectory)) { - continue; - } - Document doc = createDatasetDocument(document); - writer.addDocument(doc); + public void indexDatasetDocuments(List> documents, IndexWriter writer) throws IOException { + for (Map document : documents) { + if (documentAlreadyExists((String) document.get("id"), datasetIndexDirectory)) { + continue; } - writer.commit(); + Document doc = createDatasetDocument(document); + writer.addDocument(doc); } + writer.commit(); } private Document createJobDocument(Map document) { From b8589bcdabb5232b09cfb51563904efafd4ca829 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Sat, 7 Sep 2024 00:39:55 +0200 Subject: [PATCH 83/87] WIP: load init events in background Signed-off-by: Yannick Libert --- .../searchengine/services/SearchService.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 4cb85aab54..8592d0ee1e 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -68,7 +68,18 @@ public SearchService(OpenLineageDao openLineageDao) throws IOException { this.jobIndexWriter = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer)); this.datasetIndexWriter = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer)); // init index with DB lineage events - loadLineageEventsFromDatabase(); + indexLineageEventsInBackground(); + } + + // Load lineage events from DB and index them in the background + private void indexLineageEventsInBackground() { + executor.submit(() -> { + try { + loadLineageEventsFromDatabase(); + } catch (IOException e) { + e.printStackTrace(); + } + }); } private void loadLineageEventsFromDatabase() throws IOException { From 17c0d852f105de7518d1fc300d74e1aaa657b9f4 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Sat, 7 Sep 2024 02:41:28 +0200 Subject: [PATCH 84/87] WIP: remove the ERROR StatusLogger due to unnecessary opensearch deps Signed-off-by: Yannick Libert --- search/build.gradle | 2 +- .../src/main/java/marquez/searchengine/SearchApplication.java | 4 ++++ .../java/marquez/searchengine/services/SearchService.java | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/search/build.gradle b/search/build.gradle index 6fbadb1872..c1a356aebc 100644 --- a/search/build.gradle +++ b/search/build.gradle @@ -11,9 +11,9 @@ dependencies { implementation "org.apache.lucene:lucene-queryparser:${luceneVersion}" implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" implementation "org.apache.lucene:lucene-highlighter:${luceneVersion}" - implementation 'org.opensearch.client:opensearch-rest-high-level-client:2.16.0' implementation 'org.jdbi:jdbi3-core:3.45.4' implementation 'org.jdbi:jdbi3-sqlobject:3.45.4' + implementation "org.slf4j:slf4j-api:${slf4jVersion}" implementation "io.dropwizard:dropwizard-core:${dropwizardVersion}" diff --git a/search/src/main/java/marquez/searchengine/SearchApplication.java b/search/src/main/java/marquez/searchengine/SearchApplication.java index 2502f98c45..bb660b1229 100644 --- a/search/src/main/java/marquez/searchengine/SearchApplication.java +++ b/search/src/main/java/marquez/searchengine/SearchApplication.java @@ -3,6 +3,8 @@ import io.dropwizard.Application; import io.dropwizard.setup.Bootstrap; import io.dropwizard.setup.Environment; +import lombok.extern.slf4j.Slf4j; + import java.io.IOException; import org.jdbi.v3.core.Jdbi; @@ -11,6 +13,7 @@ import marquez.searchengine.health.SearchHealthCheck; import marquez.searchengine.resources.SearchResource; +@Slf4j public class SearchApplication extends Application { public static void main(String[] args) throws Exception { @@ -27,6 +30,7 @@ public void initialize(Bootstrap bootstrap) {} @Override public void run(SearchConfig configuration, Environment environment) throws IOException { + log.info("Application starting..."); Jdbi jdbi = DatabaseConnection.initializeJdbi(); final SearchResource searchResource = new SearchResource(jdbi); environment.jersey().register(searchResource); diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 8592d0ee1e..1aece37e01 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -38,6 +38,7 @@ import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; +import lombok.extern.slf4j.Slf4j; import marquez.searchengine.models.IndexResponse; import marquez.searchengine.models.SearchResult; @@ -45,6 +46,7 @@ import marquez.service.models.LineageEvent; import marquez.service.models.LineageEvent.Dataset; +@Slf4j public class SearchService { private final OpenLineageDao openLineageDao; @@ -100,7 +102,7 @@ private void loadLineageEventsFromDatabase() throws IOException { Future future = executor.submit(new LineageEventProcessor(lineageEvents)); futures.add(future); } - System.out.println("done"); + log.info("I'm done loading events");; offset += limit; } while (!lineageEvents.isEmpty()); // Wait for all tasks to finish From bf3808a7ca80254b3650f1b58f801ee114ab7ce3 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Sat, 7 Sep 2024 02:48:07 +0200 Subject: [PATCH 85/87] WIP: spotlessApply Signed-off-by: Yannick Libert --- .../searchengine/SearchApplication.java | 7 +- .../searchengine/db/DatabaseConnection.java | 16 +- .../searchengine/models/IndexResponse.java | 175 ++--- .../searchengine/models/SearchRequest.java | 148 ++--- .../searchengine/models/SearchResult.java | 401 +++++------ .../resources/SearchResource.java | 161 +++-- .../searchengine/services/NGramAnalyzer.java | 24 +- .../searchengine/services/SearchService.java | 629 +++++++++--------- 8 files changed, 789 insertions(+), 772 deletions(-) diff --git a/search/src/main/java/marquez/searchengine/SearchApplication.java b/search/src/main/java/marquez/searchengine/SearchApplication.java index bb660b1229..78bbccc4b5 100644 --- a/search/src/main/java/marquez/searchengine/SearchApplication.java +++ b/search/src/main/java/marquez/searchengine/SearchApplication.java @@ -3,15 +3,12 @@ import io.dropwizard.Application; import io.dropwizard.setup.Bootstrap; import io.dropwizard.setup.Environment; -import lombok.extern.slf4j.Slf4j; - import java.io.IOException; - -import org.jdbi.v3.core.Jdbi; - +import lombok.extern.slf4j.Slf4j; import marquez.searchengine.db.DatabaseConnection; import marquez.searchengine.health.SearchHealthCheck; import marquez.searchengine.resources.SearchResource; +import org.jdbi.v3.core.Jdbi; @Slf4j public class SearchApplication extends Application { diff --git a/search/src/main/java/marquez/searchengine/db/DatabaseConnection.java b/search/src/main/java/marquez/searchengine/db/DatabaseConnection.java index 471678e039..4a794867a5 100644 --- a/search/src/main/java/marquez/searchengine/db/DatabaseConnection.java +++ b/search/src/main/java/marquez/searchengine/db/DatabaseConnection.java @@ -4,11 +4,11 @@ public class DatabaseConnection { - public static Jdbi initializeJdbi() { - String jdbcUrl = "jdbc:postgresql://localhost:5432/marquez"; - String username = "marquez"; - String password = "marquez"; - - return Jdbi.create(jdbcUrl, username, password); - } -} \ No newline at end of file + public static Jdbi initializeJdbi() { + String jdbcUrl = "jdbc:postgresql://localhost:5432/marquez"; + String username = "marquez"; + String password = "marquez"; + + return Jdbi.create(jdbcUrl, username, password); + } +} diff --git a/search/src/main/java/marquez/searchengine/models/IndexResponse.java b/search/src/main/java/marquez/searchengine/models/IndexResponse.java index 56ce1fa5b0..9e9f15ef68 100644 --- a/search/src/main/java/marquez/searchengine/models/IndexResponse.java +++ b/search/src/main/java/marquez/searchengine/models/IndexResponse.java @@ -4,95 +4,102 @@ public class IndexResponse { - @JsonProperty("_index") - private final String index; - - @JsonProperty("_id") - private final String id; - - @JsonProperty("_version") - private final long version; - - @JsonProperty("result") - private final String result; - - @JsonProperty("_shards") - private final ShardInfo shardInfo; - - @JsonProperty("_seq_no") - private final long seqNo; - - @JsonProperty("_primary_term") - private final long primaryTerm; - - // Constructor to initialize all final fields - public IndexResponse(String index, String id, long version, String result, ShardInfo shardInfo, long seqNo, long primaryTerm) { - this.index = index; - this.id = id; - this.version = version; - this.result = result; - this.shardInfo = shardInfo; - this.seqNo = seqNo; - this.primaryTerm = primaryTerm; - } - - // Getters - public String getIndex() { - return index; + @JsonProperty("_index") + private final String index; + + @JsonProperty("_id") + private final String id; + + @JsonProperty("_version") + private final long version; + + @JsonProperty("result") + private final String result; + + @JsonProperty("_shards") + private final ShardInfo shardInfo; + + @JsonProperty("_seq_no") + private final long seqNo; + + @JsonProperty("_primary_term") + private final long primaryTerm; + + // Constructor to initialize all final fields + public IndexResponse( + String index, + String id, + long version, + String result, + ShardInfo shardInfo, + long seqNo, + long primaryTerm) { + this.index = index; + this.id = id; + this.version = version; + this.result = result; + this.shardInfo = shardInfo; + this.seqNo = seqNo; + this.primaryTerm = primaryTerm; + } + + // Getters + public String getIndex() { + return index; + } + + public String getId() { + return id; + } + + public long getVersion() { + return version; + } + + public String getResult() { + return result; + } + + public ShardInfo getShardInfo() { + return shardInfo; + } + + public long getSeqNo() { + return seqNo; + } + + public long getPrimaryTerm() { + return primaryTerm; + } + + // ShardInfo inner class + public static class ShardInfo { + @JsonProperty("total") + private final int total; + + @JsonProperty("successful") + private final int successful; + + @JsonProperty("failed") + private final int failed; + + public ShardInfo(int total, int successful, int failed) { + this.total = total; + this.successful = successful; + this.failed = failed; } - public String getId() { - return id; + // Getters for ShardInfo + public int getTotal() { + return total; } - public long getVersion() { - return version; + public int getSuccessful() { + return successful; } - public String getResult() { - return result; - } - - public ShardInfo getShardInfo() { - return shardInfo; - } - - public long getSeqNo() { - return seqNo; - } - - public long getPrimaryTerm() { - return primaryTerm; - } - - // ShardInfo inner class - public static class ShardInfo { - @JsonProperty("total") - private final int total; - - @JsonProperty("successful") - private final int successful; - - @JsonProperty("failed") - private final int failed; - - public ShardInfo(int total, int successful, int failed) { - this.total = total; - this.successful = successful; - this.failed = failed; - } - - // Getters for ShardInfo - public int getTotal() { - return total; - } - - public int getSuccessful() { - return successful; - } - - public int getFailed() { - return failed; - } + public int getFailed() { + return failed; } + } } diff --git a/search/src/main/java/marquez/searchengine/models/SearchRequest.java b/search/src/main/java/marquez/searchengine/models/SearchRequest.java index ac718ae6b6..4933cb1c85 100644 --- a/search/src/main/java/marquez/searchengine/models/SearchRequest.java +++ b/search/src/main/java/marquez/searchengine/models/SearchRequest.java @@ -1,92 +1,92 @@ package marquez.searchengine.models; -import java.util.Map; import java.util.List; +import java.util.Map; public class SearchRequest { - private Highlight highlight; - private Query query; - - public static class Highlight { - private Map> fields; + private Highlight highlight; + private Query query; - // Getters and setters - public Map> getFields() { - return fields; - } + public static class Highlight { + private Map> fields; - public void setFields(Map> fields) { - this.fields = fields; - } + // Getters and setters + public Map> getFields() { + return fields; } - public static class Query { - private MultiMatch multi_match; - - public static class MultiMatch { - private List fields; - private String operator; - private String query; - private String type; - - // Getters and setters - public List getFields() { - return fields; - } - - public void setFields(List fields) { - this.fields = fields; - } - - public String getOperator() { - return operator; - } - - public void setOperator(String operator) { - this.operator = operator; - } - - public String getQuery() { - return query; - } - - public void setQuery(String query) { - this.query = query; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - } - - // Getters and setters - public MultiMatch getMulti_match() { - return multi_match; - } - - public void setMulti_match(MultiMatch multi_match) { - this.multi_match = multi_match; - } + public void setFields(Map> fields) { + this.fields = fields; } + } - // Getters and setters for SearchRequest - public Highlight getHighlight() { - return highlight; - } + public static class Query { + private MultiMatch multi_match; - public void setHighlight(Highlight highlight) { - this.highlight = highlight; - } + public static class MultiMatch { + private List fields; + private String operator; + private String query; + private String type; - public Query getQuery() { + // Getters and setters + public List getFields() { + return fields; + } + + public void setFields(List fields) { + this.fields = fields; + } + + public String getOperator() { + return operator; + } + + public void setOperator(String operator) { + this.operator = operator; + } + + public String getQuery() { return query; - } + } - public void setQuery(Query query) { + public void setQuery(String query) { this.query = query; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + } + + // Getters and setters + public MultiMatch getMulti_match() { + return multi_match; + } + + public void setMulti_match(MultiMatch multi_match) { + this.multi_match = multi_match; } + } + + // Getters and setters for SearchRequest + public Highlight getHighlight() { + return highlight; + } + + public void setHighlight(Highlight highlight) { + this.highlight = highlight; + } + + public Query getQuery() { + return query; + } + + public void setQuery(Query query) { + this.query = query; + } } diff --git a/search/src/main/java/marquez/searchengine/models/SearchResult.java b/search/src/main/java/marquez/searchengine/models/SearchResult.java index 5f46ded8be..d3ba82b1ec 100644 --- a/search/src/main/java/marquez/searchengine/models/SearchResult.java +++ b/search/src/main/java/marquez/searchengine/models/SearchResult.java @@ -6,247 +6,250 @@ import java.util.List; import java.util.Map; - public class SearchResult { - @JsonProperty("took") - private long took; - - @JsonProperty("timed_out") - private boolean timedOut = false; - - @JsonProperty("_shards") - private ShardStatistics shards; - - @JsonProperty("hits") - private HitsMetadata hitsMetadata; - - @JsonProperty("num_reduce_phases") - private long numberOfReducePhases; - - @JsonProperty("terminated_early") - private boolean terminatedEarly; - - @JsonProperty("suggest") - private Map suggest = new HashMap<>(); // Initialize as empty map - - @JsonProperty("highlights") - private List>> highlights; // Add this field for highlights - + @JsonProperty("took") + private long took; + + @JsonProperty("timed_out") + private boolean timedOut = false; + + @JsonProperty("_shards") + private ShardStatistics shards; + + @JsonProperty("hits") + private HitsMetadata hitsMetadata; + + @JsonProperty("num_reduce_phases") + private long numberOfReducePhases; + + @JsonProperty("terminated_early") + private boolean terminatedEarly; + + @JsonProperty("suggest") + private Map suggest = new HashMap<>(); // Initialize as empty map + + @JsonProperty("highlights") + private List>> highlights; // Add this field for highlights + + // Constructor + public SearchResult() { + this.shards = new ShardStatistics(1, 1, 0, 0); // Assuming a single shard with no failures + this.hitsMetadata = new HitsMetadata(); + this.numberOfReducePhases = 0; // Default value + this.terminatedEarly = false; // Default value + this.suggest = new HashMap<>(); // Empty suggestion map + this.highlights = new ArrayList<>(); + } + + // Add document to hits + public void addDocument( + String index, + Map doc, + Map> highlight, + int indexPosition) { + Map hit = new HashMap<>(); + hit.put("_index", index); // Include the index name in the hit + hit.put("_source", doc); + hit.putAll(doc); + hitsMetadata.addHit(index, hit, indexPosition); + highlights.add(highlight); + } + + // Getters and Setters for all fields + public long getTook() { + return took; + } + + public void setTook(long took) { + this.took = took; + } + + public boolean isTimedOut() { + return timedOut; + } + + public void setTimedOut(boolean timedOut) { + this.timedOut = timedOut; + } + + public ShardStatistics getShards() { + return shards; + } + + public void setShards(ShardStatistics shards) { + this.shards = shards; + } + + public HitsMetadata getHitsMetadata() { + return hitsMetadata; + } + + public void setHitsMetadata(HitsMetadata hitsMetadata) { + this.hitsMetadata = hitsMetadata; + } + + public long getNumberOfReducePhases() { + return numberOfReducePhases; + } + + public void setNumberOfReducePhases(long numberOfReducePhases) { + this.numberOfReducePhases = numberOfReducePhases; + } + + public boolean isTerminatedEarly() { + return terminatedEarly; + } + + public void setTerminatedEarly(boolean terminatedEarly) { + this.terminatedEarly = terminatedEarly; + } + + public Map getSuggest() { + return suggest; + } + + public void setSuggest(Map suggest) { + this.suggest = suggest; + } + + // ShardStatistics inner class + public static class ShardStatistics { + @JsonProperty("total") + private int total; + + @JsonProperty("successful") + private int successful; + + @JsonProperty("skipped") + private int skipped; + + @JsonProperty("failed") + private int failed; // Constructor - public SearchResult() { - this.shards = new ShardStatistics(1, 1, 0, 0); // Assuming a single shard with no failures - this.hitsMetadata = new HitsMetadata(); - this.numberOfReducePhases = 0; // Default value - this.terminatedEarly = false; // Default value - this.suggest = new HashMap<>(); // Empty suggestion map - this.highlights = new ArrayList<>(); + public ShardStatistics(int total, int successful, int skipped, int failed) { + this.total = total; + this.successful = successful; + this.skipped = skipped; + this.failed = failed; } - // Add document to hits - public void addDocument(String index, Map doc, Map> highlight, int indexPosition) { - Map hit = new HashMap<>(); - hit.put("_index", index); // Include the index name in the hit - hit.put("_source", doc); - hit.putAll(doc); - hitsMetadata.addHit(index, hit, indexPosition); - highlights.add(highlight); + // Getters and Setters + public int getTotal() { + return total; } - // Getters and Setters for all fields - public long getTook() { - return took; + public void setTotal(int total) { + this.total = total; } - public void setTook(long took) { - this.took = took; + public int getSuccessful() { + return successful; } - public boolean isTimedOut() { - return timedOut; + public void setSuccessful(int successful) { + this.successful = successful; } - public void setTimedOut(boolean timedOut) { - this.timedOut = timedOut; + public int getSkipped() { + return skipped; } - public ShardStatistics getShards() { - return shards; + public void setSkipped(int skipped) { + this.skipped = skipped; } - public void setShards(ShardStatistics shards) { - this.shards = shards; + public int getFailed() { + return failed; } - public HitsMetadata getHitsMetadata() { - return hitsMetadata; + public void setFailed(int failed) { + this.failed = failed; } + } - public void setHitsMetadata(HitsMetadata hitsMetadata) { - this.hitsMetadata = hitsMetadata; - } + // HitsMetadata inner class + public static class HitsMetadata { + @JsonProperty("total") + private TotalHits totalHits; - public long getNumberOfReducePhases() { - return numberOfReducePhases; - } + @JsonProperty("max_score") + private Float maxScore; - public void setNumberOfReducePhases(long numberOfReducePhases) { - this.numberOfReducePhases = numberOfReducePhases; - } + @JsonProperty("hits") + private List> hits; - public boolean isTerminatedEarly() { - return terminatedEarly; + public HitsMetadata() { + this.totalHits = new TotalHits(0, "eq"); + this.maxScore = null; + this.hits = new ArrayList<>(); } - public void setTerminatedEarly(boolean terminatedEarly) { - this.terminatedEarly = terminatedEarly; + // Getters and Setters + public TotalHits getTotalHits() { + return totalHits; } - public Map getSuggest() { - return suggest; + public void setTotalHits(TotalHits totalHits) { + this.totalHits = totalHits; } - public void setSuggest(Map suggest) { - this.suggest = suggest; + public Float getMaxScore() { + return maxScore; } - // ShardStatistics inner class - public static class ShardStatistics { - @JsonProperty("total") - private int total; - - @JsonProperty("successful") - private int successful; - - @JsonProperty("skipped") - private int skipped; - - @JsonProperty("failed") - private int failed; - - // Constructor - public ShardStatistics(int total, int successful, int skipped, int failed) { - this.total = total; - this.successful = successful; - this.skipped = skipped; - this.failed = failed; - } - - // Getters and Setters - public int getTotal() { - return total; - } - - public void setTotal(int total) { - this.total = total; - } - - public int getSuccessful() { - return successful; - } - - public void setSuccessful(int successful) { - this.successful = successful; - } - - public int getSkipped() { - return skipped; - } - - public void setSkipped(int skipped) { - this.skipped = skipped; - } - - public int getFailed() { - return failed; - } - - public void setFailed(int failed) { - this.failed = failed; - } + public void setMaxScore(Float maxScore) { + this.maxScore = maxScore; } - // HitsMetadata inner class - public static class HitsMetadata { - @JsonProperty("total") - private TotalHits totalHits; - - @JsonProperty("max_score") - private Float maxScore; - - @JsonProperty("hits") - private List> hits; - - public HitsMetadata() { - this.totalHits = new TotalHits(0, "eq"); - this.maxScore = null; - this.hits = new ArrayList<>(); - } - - // Getters and Setters - public TotalHits getTotalHits() { - return totalHits; - } - - public void setTotalHits(TotalHits totalHits) { - this.totalHits = totalHits; - } + public List> getHits() { + return hits; + } - public Float getMaxScore() { - return maxScore; - } + public void setHits(List> hits) { + this.hits = hits; + } - public void setMaxScore(Float maxScore) { - this.maxScore = maxScore; - } + // Add a hit to the hits list + public void addHit(String index, Map doc, int indexPosition) { + Map hit = new HashMap<>(); + hit.put("_index", index); + hit.putAll(doc); + // String uniqueId = ((Map) doc.get("_source")).get("run_id") + "-" + + // indexPosition; + hit.put("_id", "id"); // Ensure the `_id` is unique + this.hits.add(hit); + } + } - public List> getHits() { - return hits; - } + // TotalHits inner class + public static class TotalHits { + @JsonProperty("value") + private long value; - public void setHits(List> hits) { - this.hits = hits; - } + @JsonProperty("relation") + private String relation; - // Add a hit to the hits list - public void addHit(String index, Map doc, int indexPosition) { - Map hit = new HashMap<>(); - hit.put("_index", index); - hit.putAll(doc); - //String uniqueId = ((Map) doc.get("_source")).get("run_id") + "-" + indexPosition; - hit.put("_id", "id"); // Ensure the `_id` is unique - this.hits.add(hit); - } - } - - // TotalHits inner class - public static class TotalHits { - @JsonProperty("value") - private long value; - - @JsonProperty("relation") - private String relation; - - public TotalHits(long value, String relation) { - this.value = value; - this.relation = relation; - } + public TotalHits(long value, String relation) { + this.value = value; + this.relation = relation; + } - // Getters and Setters - public long getValue() { - return value; - } + // Getters and Setters + public long getValue() { + return value; + } - public void setValue(long value) { - this.value = value; - } + public void setValue(long value) { + this.value = value; + } - public String getRelation() { - return relation; - } + public String getRelation() { + return relation; + } - public void setRelation(String relation) { - this.relation = relation; - } + public void setRelation(String relation) { + this.relation = relation; } + } } diff --git a/search/src/main/java/marquez/searchengine/resources/SearchResource.java b/search/src/main/java/marquez/searchengine/resources/SearchResource.java index 017942fa3a..8191380f48 100644 --- a/search/src/main/java/marquez/searchengine/resources/SearchResource.java +++ b/search/src/main/java/marquez/searchengine/resources/SearchResource.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.util.List; import java.util.Map; - import javax.ws.rs.Consumes; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -13,101 +12,101 @@ import javax.ws.rs.Produces; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; - -import org.jdbi.v3.core.Jdbi; -import org.jdbi.v3.sqlobject.SqlObjectPlugin; - -//import com.fasterxml.jackson.databind.ObjectMapper; -import marquez.searchengine.services.SearchService; +import marquez.db.OpenLineageDao; import marquez.searchengine.models.IndexResponse; -import marquez.searchengine.models.SearchResult; import marquez.searchengine.models.SearchRequest; -import marquez.db.OpenLineageDao; +import marquez.searchengine.models.SearchResult; +import marquez.searchengine.services.SearchService; +import org.jdbi.v3.core.Jdbi; +import org.jdbi.v3.sqlobject.SqlObjectPlugin; @Path("/") @Produces(MediaType.APPLICATION_JSON) public class SearchResource { - private final SearchService searchService; - private final Jdbi jdbi; + private final SearchService searchService; + private final Jdbi jdbi; - public SearchResource(Jdbi jdbi) throws IOException { - this.jdbi = jdbi.installPlugin(new SqlObjectPlugin()); - OpenLineageDao openLineageDao = jdbi.onDemand(OpenLineageDao.class); - this.searchService = new SearchService(openLineageDao); - } + public SearchResource(Jdbi jdbi) throws IOException { + this.jdbi = jdbi.installPlugin(new SqlObjectPlugin()); + OpenLineageDao openLineageDao = jdbi.onDemand(OpenLineageDao.class); + this.searchService = new SearchService(openLineageDao); + } - @POST - @Path("/jobs/_search") - @Consumes(MediaType.APPLICATION_JSON) - public Response searchJobs(SearchRequest request) { - try { - String query = request.getQuery().getMulti_match().getQuery(); - List fields = request.getQuery().getMulti_match().getFields(); - // Log the extracted details for debugging - //System.out.println("Received query: " + query + fields); - SearchResult result = searchService.searchJobs(query, fields); - //String jsonResponse = new ObjectMapper().writeValueAsString(result); - //System.out.println("Serialized Response: " + jsonResponse); - return Response.ok(result).build(); - } catch (Exception e) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); - } + @POST + @Path("/jobs/_search") + @Consumes(MediaType.APPLICATION_JSON) + public Response searchJobs(SearchRequest request) { + try { + String query = request.getQuery().getMulti_match().getQuery(); + List fields = request.getQuery().getMulti_match().getFields(); + // Log the extracted details for debugging + // System.out.println("Received query: " + query + fields); + SearchResult result = searchService.searchJobs(query, fields); + // String jsonResponse = new ObjectMapper().writeValueAsString(result); + // System.out.println("Serialized Response: " + jsonResponse); + return Response.ok(result).build(); + } catch (Exception e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); } + } - @POST - @Path("/datasets/_search") - @Consumes(MediaType.APPLICATION_JSON) - public Response searchDatasets(SearchRequest request) { - try { - String query = request.getQuery().getMulti_match().getQuery(); - List fields = request.getQuery().getMulti_match().getFields(); - // Log the extracted details for debugging - //System.out.println("Received query: " + query); - SearchResult result = searchService.searchDatasets(query, fields); - //String jsonResponse = new ObjectMapper().writeValueAsString(result); - //System.out.println("Serialized Response: " + jsonResponse); - return Response.ok(result).build(); - } catch (Exception e) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); - } + @POST + @Path("/datasets/_search") + @Consumes(MediaType.APPLICATION_JSON) + public Response searchDatasets(SearchRequest request) { + try { + String query = request.getQuery().getMulti_match().getQuery(); + List fields = request.getQuery().getMulti_match().getFields(); + // Log the extracted details for debugging + // System.out.println("Received query: " + query); + SearchResult result = searchService.searchDatasets(query, fields); + // String jsonResponse = new ObjectMapper().writeValueAsString(result); + // System.out.println("Serialized Response: " + jsonResponse); + return Response.ok(result).build(); + } catch (Exception e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); } + } - @PUT - @Path("/jobs/_doc/{id}") - @Consumes(MediaType.APPLICATION_JSON) - public Response indexJob(@PathParam("id") String id, Map document) { - try { - IndexResponse indexResponse = searchService.indexJobDocument(document); - return Response.ok(indexResponse).build(); - } catch (IOException e) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR) - .entity("Failed to index job document: " + e.getMessage()) - .build(); - } + @PUT + @Path("/jobs/_doc/{id}") + @Consumes(MediaType.APPLICATION_JSON) + public Response indexJob(@PathParam("id") String id, Map document) { + try { + IndexResponse indexResponse = searchService.indexJobDocument(document); + return Response.ok(indexResponse).build(); + } catch (IOException e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR) + .entity("Failed to index job document: " + e.getMessage()) + .build(); } + } - @PUT - @Path("/datasets/_doc/{id}") - @Consumes(MediaType.APPLICATION_JSON) - public Response indexDataset(@PathParam("id") String id, Map document) { - try { - IndexResponse indexResponse = searchService.indexDatasetDocument(document); - return Response.ok(indexResponse).build(); - } catch (IOException e) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR) - .entity("Failed to index dataset document: " + e.getMessage()) - .build(); - } + @PUT + @Path("/datasets/_doc/{id}") + @Consumes(MediaType.APPLICATION_JSON) + public Response indexDataset(@PathParam("id") String id, Map document) { + try { + IndexResponse indexResponse = searchService.indexDatasetDocument(document); + return Response.ok(indexResponse).build(); + } catch (IOException e) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR) + .entity("Failed to index dataset document: " + e.getMessage()) + .build(); } - @GET - @Path("/ping") - public Response ping() { - boolean isHealthy = true; - if (isHealthy) { - return Response.ok().entity("{\"status\":\"true\"}").build(); - } else { - return Response.status(Response.Status.SERVICE_UNAVAILABLE).entity("{\"status\":\"false\"}").build(); - } + } + + @GET + @Path("/ping") + public Response ping() { + boolean isHealthy = true; + if (isHealthy) { + return Response.ok().entity("{\"status\":\"true\"}").build(); + } else { + return Response.status(Response.Status.SERVICE_UNAVAILABLE) + .entity("{\"status\":\"false\"}") + .build(); } + } } diff --git a/search/src/main/java/marquez/searchengine/services/NGramAnalyzer.java b/search/src/main/java/marquez/searchengine/services/NGramAnalyzer.java index 6c8eaf8180..b8994ec8dc 100644 --- a/search/src/main/java/marquez/searchengine/services/NGramAnalyzer.java +++ b/search/src/main/java/marquez/searchengine/services/NGramAnalyzer.java @@ -6,18 +6,18 @@ import org.apache.lucene.analysis.ngram.NGramTokenizer; public class NGramAnalyzer extends Analyzer { - private final int minGram; - private final int maxGram; + private final int minGram; + private final int maxGram; - public NGramAnalyzer(int minGram, int maxGram) { - this.minGram = minGram; - this.maxGram = maxGram; - } + public NGramAnalyzer(int minGram, int maxGram) { + this.minGram = minGram; + this.maxGram = maxGram; + } - @Override - protected TokenStreamComponents createComponents(String fieldName) { - NGramTokenizer tokenizer = new NGramTokenizer(minGram, maxGram); // Define the N-grams range - TokenStream tokenStream = new LowerCaseFilter(tokenizer); // Optional: make everything lowercase - return new TokenStreamComponents(tokenizer, tokenStream); - } + @Override + protected TokenStreamComponents createComponents(String fieldName) { + NGramTokenizer tokenizer = new NGramTokenizer(minGram, maxGram); // Define the N-grams range + TokenStream tokenStream = new LowerCaseFilter(tokenizer); // Optional: make everything lowercase + return new TokenStreamComponents(tokenizer, tokenStream); + } } diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 1aece37e01..041cd878f2 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -11,11 +11,14 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - import javax.validation.Valid; import javax.validation.constraints.NotNull; - -//import org.apache.lucene.analysis.standard.StandardAnalyzer; +import lombok.extern.slf4j.Slf4j; +import marquez.db.OpenLineageDao; +import marquez.searchengine.models.IndexResponse; +import marquez.searchengine.models.SearchResult; +import marquez.service.models.LineageEvent; +import marquez.service.models.LineageEvent.Dataset; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; @@ -38,355 +41,363 @@ import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; -import lombok.extern.slf4j.Slf4j; - -import marquez.searchengine.models.IndexResponse; -import marquez.searchengine.models.SearchResult; -import marquez.db.OpenLineageDao; -import marquez.service.models.LineageEvent; -import marquez.service.models.LineageEvent.Dataset; @Slf4j public class SearchService { - private final OpenLineageDao openLineageDao; - private final Directory jobIndexDirectory; - private final Directory datasetIndexDirectory; - //private final StandardAnalyzer analyzer; - private final NGramAnalyzer analyzer; - private static final int MAX_RESULTS = 10; - private final ExecutorService executor; - private final IndexWriter jobIndexWriter; - private final IndexWriter datasetIndexWriter; - - - public SearchService(OpenLineageDao openLineageDao) throws IOException { - this.openLineageDao = openLineageDao; - this.jobIndexDirectory = new ByteBuffersDirectory(); - this.datasetIndexDirectory = new ByteBuffersDirectory(); - //this.analyzer = new StandardAnalyzer(); - this.analyzer = new NGramAnalyzer(3, 4); - this.executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); - this.jobIndexWriter = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer)); - this.datasetIndexWriter = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer)); - // init index with DB lineage events - indexLineageEventsInBackground(); - } - - // Load lineage events from DB and index them in the background - private void indexLineageEventsInBackground() { - executor.submit(() -> { - try { - loadLineageEventsFromDatabase(); - } catch (IOException e) { - e.printStackTrace(); - } + private final OpenLineageDao openLineageDao; + private final Directory jobIndexDirectory; + private final Directory datasetIndexDirectory; + // private final StandardAnalyzer analyzer; + private final NGramAnalyzer analyzer; + private static final int MAX_RESULTS = 10; + private final ExecutorService executor; + private final IndexWriter jobIndexWriter; + private final IndexWriter datasetIndexWriter; + + public SearchService(OpenLineageDao openLineageDao) throws IOException { + this.openLineageDao = openLineageDao; + this.jobIndexDirectory = new ByteBuffersDirectory(); + this.datasetIndexDirectory = new ByteBuffersDirectory(); + // this.analyzer = new StandardAnalyzer(); + this.analyzer = new NGramAnalyzer(3, 4); + this.executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + this.jobIndexWriter = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer)); + this.datasetIndexWriter = + new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer)); + // init index with DB lineage events + indexLineageEventsInBackground(); + } + + // Load lineage events from DB and index them in the background + private void indexLineageEventsInBackground() { + executor.submit( + () -> { + try { + loadLineageEventsFromDatabase(); + } catch (IOException e) { + e.printStackTrace(); + } }); + } + + private void loadLineageEventsFromDatabase() throws IOException { + ZonedDateTime before = ZonedDateTime.now(); + ZonedDateTime after = before.minusDays(1); + int limit = 10000; + int offset = 0; + + List lineageEvents; + List> futures = new ArrayList<>(); + do { + // Fetch a batch of lineage events + lineageEvents = openLineageDao.getAllLineageEventsDesc(before, after, limit, offset); + + // If there are events, process them in parallel + if (!lineageEvents.isEmpty()) { + // Submit the batch to the executor service + Future future = executor.submit(new LineageEventProcessor(lineageEvents)); + futures.add(future); + } + ; + offset += limit; + } while (!lineageEvents.isEmpty()); + // Wait for all tasks to finish + for (Future future : futures) { + try { + future.get(); // Wait for each thread to finish + } catch (Exception e) { + e.printStackTrace(); + } } + executor.shutdown(); - private void loadLineageEventsFromDatabase() throws IOException { - ZonedDateTime before = ZonedDateTime.now(); - ZonedDateTime after = before.minusDays(1); - int limit = 10000; - int offset = 0; - - List lineageEvents; - List> futures = new ArrayList<>(); - do { - // Fetch a batch of lineage events - lineageEvents = openLineageDao.getAllLineageEventsDesc(before, after, limit, offset); - - // If there are events, process them in parallel - if (!lineageEvents.isEmpty()) { - // Submit the batch to the executor service - Future future = executor.submit(new LineageEventProcessor(lineageEvents)); - futures.add(future); - } - log.info("I'm done loading events");; - offset += limit; - } while (!lineageEvents.isEmpty()); - // Wait for all tasks to finish - for (Future future : futures) { - try { - future.get(); // Wait for each thread to finish - } catch (Exception e) { - e.printStackTrace(); - } - } - executor.shutdown(); - - // Close the IndexWriters when done - jobIndexWriter.close(); - datasetIndexWriter.close(); - } + // Close the IndexWriters when done + jobIndexWriter.close(); + datasetIndexWriter.close(); + } - // The class responsible for processing a batch of lineage events in parallel - private class LineageEventProcessor implements Callable { - private final List events; + // The class responsible for processing a batch of lineage events in parallel + private class LineageEventProcessor implements Callable { + private final List events; - public LineageEventProcessor(List events) { - this.events = events; - } - - @Override - public Void call() throws Exception { - try { - indexLineageEvents(events); // Index the batch of lineage events - } catch (IOException e) { - e.printStackTrace(); - } - return null; - } + public LineageEventProcessor(List events) { + this.events = events; } - private void indexLineageEvents(@Valid @NotNull List lineageEvents) throws IOException { - // for dedup purpose - Map> inputMaps = new HashMap<>(); - Map> outputMaps = new HashMap<>(); - Map> jobMaps = new HashMap<>(); - - for (LineageEvent event : lineageEvents) { - if (event.getInputs() != null) { - for (Dataset input : event.getInputs()) { - Map inputMap = mapDatasetEvent(input, event.getRun().getRunId(), event.getEventType()); - //deduplicate on uniqueId - inputMaps.put((String)inputMap.get("id"), inputMap); - } - } - if (event.getOutputs() != null) { - for (Dataset output : event.getOutputs()) { - Map outputMap = mapDatasetEvent(output, event.getRun().getRunId(), event.getEventType()); - outputMaps.put((String)outputMap.get("id"), outputMap); - } - } - Map jobMap = mapJobEvent(event); - jobMaps.put((String)jobMap.get("id"), jobMap); - } - // At this point, inputMaps, outputMaps, and jobMaps are de-duplicated - if (!inputMaps.isEmpty()) { - indexDatasetDocuments(new ArrayList<>(inputMaps.values()), datasetIndexWriter); - } - if (!outputMaps.isEmpty()) { - indexDatasetDocuments(new ArrayList<>(outputMaps.values()), datasetIndexWriter); + @Override + public Void call() throws Exception { + try { + indexLineageEvents(events); // Index the batch of lineage events + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } + } + + private void indexLineageEvents(@Valid @NotNull List lineageEvents) + throws IOException { + // for dedup purpose + Map> inputMaps = new HashMap<>(); + Map> outputMaps = new HashMap<>(); + Map> jobMaps = new HashMap<>(); + + for (LineageEvent event : lineageEvents) { + if (event.getInputs() != null) { + for (Dataset input : event.getInputs()) { + Map inputMap = + mapDatasetEvent(input, event.getRun().getRunId(), event.getEventType()); + // deduplicate on uniqueId + inputMaps.put((String) inputMap.get("id"), inputMap); } - if (!jobMaps.isEmpty()) { - indexJobDocuments(new ArrayList<>(jobMaps.values()), jobIndexWriter); + } + if (event.getOutputs() != null) { + for (Dataset output : event.getOutputs()) { + Map outputMap = + mapDatasetEvent(output, event.getRun().getRunId(), event.getEventType()); + outputMaps.put((String) outputMap.get("id"), outputMap); } + } + Map jobMap = mapJobEvent(event); + jobMaps.put((String) jobMap.get("id"), jobMap); } - - // Helper method to map dataset details to Map - private Map mapDatasetEvent(Dataset dataset, String run_id, String eventType) { - Map datasetMap = new HashMap<>(); - datasetMap.put("run_id", run_id); - datasetMap.put("eventType", eventType); - datasetMap.put("name", dataset.getName()); - datasetMap.put("namespace", dataset.getNamespace()); - datasetMap.put("id",dataset.getName()+"_"+dataset.getNamespace()); - Optional.ofNullable(dataset.getFacets()).ifPresent(facets -> datasetMap.put("facets", facets)); - return datasetMap; + // At this point, inputMaps, outputMaps, and jobMaps are de-duplicated + if (!inputMaps.isEmpty()) { + indexDatasetDocuments(new ArrayList<>(inputMaps.values()), datasetIndexWriter); } - - // Helper method to map job details to Map - private Map mapJobEvent(LineageEvent event) { - Map jobMap = new HashMap<>(); - jobMap.put("run_id", event.getRun().getRunId().toString()); - jobMap.put("name", event.getJob().getName()); - jobMap.put("namespace", event.getJob().getNamespace()); - jobMap.put("id",event.getJob().getName()+"_"+event.getJob().getNamespace()); - jobMap.put("eventType", event.getEventType()); - Optional.ofNullable(event.getRun().getFacets()).ifPresent(facets -> jobMap.put("facets", facets)); - return jobMap; + if (!outputMaps.isEmpty()) { + indexDatasetDocuments(new ArrayList<>(outputMaps.values()), datasetIndexWriter); } - - private boolean documentAlreadyExists(String uniqueId, Directory indexDirectory) throws IOException { - if (isIndexEmpty(indexDirectory)) { - return false; - } - try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { - IndexSearcher searcher = new IndexSearcher(reader); - Query query = new TermQuery(new Term("id", uniqueId)); - TopDocs topDocs = searcher.search(query, 1); - return topDocs.totalHits.value > 0; - } catch (Exception e) { - e.printStackTrace(); - throw new IOException("Failed to search for document", e); - } + if (!jobMaps.isEmpty()) { + indexJobDocuments(new ArrayList<>(jobMaps.values()), jobIndexWriter); } - - // Method to index a job document - //TODO: don't index a Map, use the Dataset object directly - public IndexResponse indexJobDocument(Map document) throws IOException { - if (documentAlreadyExists((String) document.get("id"), jobIndexDirectory)) { - return createIndexResponse("jobs", document.get("name").toString(), false); - } - try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { - Document doc = createJobDocument(document); - writer.addDocument(doc); - writer.commit(); - return createIndexResponse("jobs", document.get("name").toString(), true); - } + } + + // Helper method to map dataset details to Map + private Map mapDatasetEvent(Dataset dataset, String run_id, String eventType) { + Map datasetMap = new HashMap<>(); + datasetMap.put("run_id", run_id); + datasetMap.put("eventType", eventType); + datasetMap.put("name", dataset.getName()); + datasetMap.put("namespace", dataset.getNamespace()); + datasetMap.put("id", dataset.getName() + "_" + dataset.getNamespace()); + Optional.ofNullable(dataset.getFacets()).ifPresent(facets -> datasetMap.put("facets", facets)); + return datasetMap; + } + + // Helper method to map job details to Map + private Map mapJobEvent(LineageEvent event) { + Map jobMap = new HashMap<>(); + jobMap.put("run_id", event.getRun().getRunId().toString()); + jobMap.put("name", event.getJob().getName()); + jobMap.put("namespace", event.getJob().getNamespace()); + jobMap.put("id", event.getJob().getName() + "_" + event.getJob().getNamespace()); + jobMap.put("eventType", event.getEventType()); + Optional.ofNullable(event.getRun().getFacets()) + .ifPresent(facets -> jobMap.put("facets", facets)); + return jobMap; + } + + private boolean documentAlreadyExists(String uniqueId, Directory indexDirectory) + throws IOException { + if (isIndexEmpty(indexDirectory)) { + return false; } - - // Method to index a dataset document - //TODO: don't index a Map, use the Dataset object directly - public IndexResponse indexDatasetDocument(Map document) throws IOException { - if (documentAlreadyExists((String) document.get("id"), datasetIndexDirectory)) { - return createIndexResponse("datasets", document.get("name").toString(), false); - } - try (IndexWriter writer = new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { - Document doc = createDatasetDocument(document); - writer.addDocument(doc); - writer.commit(); - return createIndexResponse("datasets", document.get("name").toString(), true); - } + try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { + IndexSearcher searcher = new IndexSearcher(reader); + Query query = new TermQuery(new Term("id", uniqueId)); + TopDocs topDocs = searcher.search(query, 1); + return topDocs.totalHits.value > 0; + } catch (Exception e) { + e.printStackTrace(); + throw new IOException("Failed to search for document", e); } + } - public void indexJobDocuments(List> documents, IndexWriter writer) throws IOException { - for (Map document : documents) { - if (documentAlreadyExists((String) document.get("id"), jobIndexDirectory)) { - continue; - } - Document doc = createJobDocument(document); - writer.addDocument(doc); - } - writer.commit(); + // Method to index a job document + // TODO: don't index a Map, use the Dataset object directly + public IndexResponse indexJobDocument(Map document) throws IOException { + if (documentAlreadyExists((String) document.get("id"), jobIndexDirectory)) { + return createIndexResponse("jobs", document.get("name").toString(), false); } - - public void indexDatasetDocuments(List> documents, IndexWriter writer) throws IOException { - for (Map document : documents) { - if (documentAlreadyExists((String) document.get("id"), datasetIndexDirectory)) { - continue; - } - Document doc = createDatasetDocument(document); - writer.addDocument(doc); - } - writer.commit(); + try (IndexWriter writer = new IndexWriter(jobIndexDirectory, new IndexWriterConfig(analyzer))) { + Document doc = createJobDocument(document); + writer.addDocument(doc); + writer.commit(); + return createIndexResponse("jobs", document.get("name").toString(), true); } + } - private Document createJobDocument(Map document) { - Document doc = new Document(); - doc.add(new StringField("id", (String) document.get("id"), Field.Store.YES)); - doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); - doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); - doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); - doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); - if (document.containsKey("facets")) { - doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); - } - if (document.containsKey("runFacets")) { - doc.add(new TextField("runFacets", document.get("runFacets").toString(), Field.Store.YES)); - } - return doc; + // Method to index a dataset document + // TODO: don't index a Map, use the Dataset object directly + public IndexResponse indexDatasetDocument(Map document) throws IOException { + if (documentAlreadyExists((String) document.get("id"), datasetIndexDirectory)) { + return createIndexResponse("datasets", document.get("name").toString(), false); + } + try (IndexWriter writer = + new IndexWriter(datasetIndexDirectory, new IndexWriterConfig(analyzer))) { + Document doc = createDatasetDocument(document); + writer.addDocument(doc); + writer.commit(); + return createIndexResponse("datasets", document.get("name").toString(), true); + } + } + + public void indexJobDocuments(List> documents, IndexWriter writer) + throws IOException { + for (Map document : documents) { + if (documentAlreadyExists((String) document.get("id"), jobIndexDirectory)) { + continue; + } + Document doc = createJobDocument(document); + writer.addDocument(doc); + } + writer.commit(); + } + + public void indexDatasetDocuments(List> documents, IndexWriter writer) + throws IOException { + for (Map document : documents) { + if (documentAlreadyExists((String) document.get("id"), datasetIndexDirectory)) { + continue; + } + Document doc = createDatasetDocument(document); + writer.addDocument(doc); + } + writer.commit(); + } + + private Document createJobDocument(Map document) { + Document doc = new Document(); + doc.add(new StringField("id", (String) document.get("id"), Field.Store.YES)); + doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); + doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); + doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); + doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); + if (document.containsKey("facets")) { + doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); + } + if (document.containsKey("runFacets")) { + doc.add(new TextField("runFacets", document.get("runFacets").toString(), Field.Store.YES)); } + return doc; + } + + private Document createDatasetDocument(Map document) { + Document doc = new Document(); + doc.add(new StringField("id", (String) document.get("id"), Field.Store.YES)); + doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); + doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); + doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); + doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); + + if (document.containsKey("facets")) { + doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); + } + if (document.containsKey("inputFacets")) { + doc.add( + new TextField("inputFacets", document.get("inputFacets").toString(), Field.Store.YES)); + } + if (document.containsKey("outputFacets")) { + doc.add( + new TextField("outputFacets", document.get("outputFacets").toString(), Field.Store.YES)); + } + return doc; + } - private Document createDatasetDocument(Map document) { - Document doc = new Document(); - doc.add(new StringField("id", (String) document.get("id"), Field.Store.YES)); - doc.add(new StringField("run_id", (String) document.get("run_id"), Field.Store.YES)); - doc.add(new TextField("name", (String) document.get("name"), Field.Store.YES)); - doc.add(new TextField("namespace", (String) document.get("namespace"), Field.Store.YES)); - doc.add(new TextField("eventType", (String) document.get("eventType"), Field.Store.YES)); + private IndexResponse createIndexResponse(String index, String id, boolean created) { + long version = 1L; // Simulated version number + String result = created ? "created" : "updated"; - if (document.containsKey("facets")) { - doc.add(new TextField("facets", document.get("facets").toString(), Field.Store.YES)); - } - if (document.containsKey("inputFacets")) { - doc.add(new TextField("inputFacets", document.get("inputFacets").toString(), Field.Store.YES)); - } - if (document.containsKey("outputFacets")) { - doc.add(new TextField("outputFacets", document.get("outputFacets").toString(), Field.Store.YES)); - } - return doc; + IndexResponse.ShardInfo shardInfo = + new IndexResponse.ShardInfo(1, 1, 0); // 1 shard, 1 successful, 0 failed + + long seqNo = 1L; // Simulated sequence number + long primaryTerm = 1L; // Simulated primary term + + return new IndexResponse(index, id, version, result, shardInfo, seqNo, primaryTerm); + } + + private boolean isIndexEmpty(Directory indexDirectory) throws IOException { + try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { + return reader.numDocs() == 0; + } catch (IndexNotFoundException e) { + return true; } + } - private IndexResponse createIndexResponse(String index, String id, boolean created) { - long version = 1L; // Simulated version number - String result = created ? "created" : "updated"; + public SearchResult searchDatasets(String query, List fields) throws Exception { + return search(query, fields, datasetIndexDirectory); + } - IndexResponse.ShardInfo shardInfo = new IndexResponse.ShardInfo(1, 1, 0); // 1 shard, 1 successful, 0 failed + public SearchResult searchJobs(String query, List fields) throws Exception { + return search(query, fields, jobIndexDirectory); + } - long seqNo = 1L; // Simulated sequence number - long primaryTerm = 1L; // Simulated primary term + private SearchResult search(String query, List fields, Directory indexDirectory) + throws Exception { + long startTime = System.currentTimeMillis(); - return new IndexResponse(index, id, version, result, shardInfo, seqNo, primaryTerm); + if (isIndexEmpty(indexDirectory)) { + return createEmptySearchResult(startTime); } - private boolean isIndexEmpty(Directory indexDirectory) throws IOException { - try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { - return reader.numDocs() == 0; - } catch (IndexNotFoundException e) { - return true; - } - } + try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { + IndexSearcher searcher = new IndexSearcher(reader); + MultiFieldQueryParser parser = + new MultiFieldQueryParser(fields.toArray(new String[0]), analyzer); + Query q = parser.parse(query); - public SearchResult searchDatasets(String query, List fields) throws Exception { - return search(query, fields, datasetIndexDirectory); - } + TopDocs topDocs = searcher.search(q, MAX_RESULTS); + long took = System.currentTimeMillis() - startTime; - public SearchResult searchJobs(String query, List fields) throws Exception { - return search(query, fields, jobIndexDirectory); - } + SearchResult result = new SearchResult(); + result.setTook(took); + result.getHitsMetadata().getTotalHits().setValue(topDocs.totalHits.value); + + StoredFields storedFields = searcher.storedFields(); + SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("", ""); + Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(q)); - private SearchResult search(String query, List fields, Directory indexDirectory) throws Exception { - long startTime = System.currentTimeMillis(); + for (int i = 0; i < topDocs.scoreDocs.length; i++) { + ScoreDoc sd = topDocs.scoreDocs[i]; + Document doc = storedFields.document(sd.doc); + Map allDoc = new HashMap<>(); + Map> highlight = new HashMap<>(); - if (isIndexEmpty(indexDirectory)) { - return createEmptySearchResult(startTime); + for (IndexableField field : doc.getFields()) { + allDoc.put(field.name(), field.stringValue()); } - try (DirectoryReader reader = DirectoryReader.open(indexDirectory)) { - IndexSearcher searcher = new IndexSearcher(reader); - MultiFieldQueryParser parser = new MultiFieldQueryParser(fields.toArray(new String[0]), analyzer); - Query q = parser.parse(query); - - TopDocs topDocs = searcher.search(q, MAX_RESULTS); - long took = System.currentTimeMillis() - startTime; - - SearchResult result = new SearchResult(); - result.setTook(took); - result.getHitsMetadata().getTotalHits().setValue(topDocs.totalHits.value); - - StoredFields storedFields = searcher.storedFields(); - SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("", ""); - Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(q)); - - for (int i = 0; i < topDocs.scoreDocs.length; i++) { - ScoreDoc sd = topDocs.scoreDocs[i]; - Document doc = storedFields.document(sd.doc); - Map allDoc = new HashMap<>(); - Map> highlight = new HashMap<>(); - - for (IndexableField field : doc.getFields()) { - allDoc.put(field.name(), field.stringValue()); - } - - for (String field : fields) { - String text = doc.get(field); - if (text != null) { - String highlightedText = highlighter.getBestFragment(analyzer, field, text); - if (highlightedText != null) { - List highlightList = new ArrayList<>(); - highlightList.add(highlightedText); - highlight.put(field, highlightList); - } - } - } - - result.addDocument(indexDirectory == jobIndexDirectory ? "jobs" : "datasets", allDoc, highlight, i); + for (String field : fields) { + String text = doc.get(field); + if (text != null) { + String highlightedText = highlighter.getBestFragment(analyzer, field, text); + if (highlightedText != null) { + List highlightList = new ArrayList<>(); + highlightList.add(highlightedText); + highlight.put(field, highlightList); } - - return result; + } } + + result.addDocument( + indexDirectory == jobIndexDirectory ? "jobs" : "datasets", allDoc, highlight, i); + } + + return result; } + } - private SearchResult createEmptySearchResult(long startTime) { - long took = System.currentTimeMillis() - startTime; + private SearchResult createEmptySearchResult(long startTime) { + long took = System.currentTimeMillis() - startTime; - SearchResult result = new SearchResult(); - result.setTook(took); - result.getHitsMetadata().getTotalHits().setValue(0); - result.setTimedOut(false); + SearchResult result = new SearchResult(); + result.setTook(took); + result.getHitsMetadata().getTotalHits().setValue(0); + result.setTimedOut(false); - return result; - } + return result; + } } From 7422cbdf54542fb324210ae4c2189e3a2961df32 Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Sat, 7 Sep 2024 02:59:05 +0200 Subject: [PATCH 86/87] WIP: tidy up Signed-off-by: Yannick Libert --- .../searchengine/resources/SearchResource.java | 12 ++---------- .../marquez/searchengine/services/SearchService.java | 2 +- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/search/src/main/java/marquez/searchengine/resources/SearchResource.java b/search/src/main/java/marquez/searchengine/resources/SearchResource.java index 8191380f48..9641c60f78 100644 --- a/search/src/main/java/marquez/searchengine/resources/SearchResource.java +++ b/search/src/main/java/marquez/searchengine/resources/SearchResource.java @@ -25,11 +25,10 @@ public class SearchResource { private final SearchService searchService; - private final Jdbi jdbi; public SearchResource(Jdbi jdbi) throws IOException { - this.jdbi = jdbi.installPlugin(new SqlObjectPlugin()); - OpenLineageDao openLineageDao = jdbi.onDemand(OpenLineageDao.class); + OpenLineageDao openLineageDao = + jdbi.installPlugin(new SqlObjectPlugin()).onDemand(OpenLineageDao.class); this.searchService = new SearchService(openLineageDao); } @@ -40,11 +39,8 @@ public Response searchJobs(SearchRequest request) { try { String query = request.getQuery().getMulti_match().getQuery(); List fields = request.getQuery().getMulti_match().getFields(); - // Log the extracted details for debugging - // System.out.println("Received query: " + query + fields); SearchResult result = searchService.searchJobs(query, fields); // String jsonResponse = new ObjectMapper().writeValueAsString(result); - // System.out.println("Serialized Response: " + jsonResponse); return Response.ok(result).build(); } catch (Exception e) { return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); @@ -58,11 +54,7 @@ public Response searchDatasets(SearchRequest request) { try { String query = request.getQuery().getMulti_match().getQuery(); List fields = request.getQuery().getMulti_match().getFields(); - // Log the extracted details for debugging - // System.out.println("Received query: " + query); SearchResult result = searchService.searchDatasets(query, fields); - // String jsonResponse = new ObjectMapper().writeValueAsString(result); - // System.out.println("Serialized Response: " + jsonResponse); return Response.ok(result).build(); } catch (Exception e) { return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.getMessage()).build(); diff --git a/search/src/main/java/marquez/searchengine/services/SearchService.java b/search/src/main/java/marquez/searchengine/services/SearchService.java index 041cd878f2..8251eeed4a 100644 --- a/search/src/main/java/marquez/searchengine/services/SearchService.java +++ b/search/src/main/java/marquez/searchengine/services/SearchService.java @@ -83,7 +83,7 @@ private void indexLineageEventsInBackground() { private void loadLineageEventsFromDatabase() throws IOException { ZonedDateTime before = ZonedDateTime.now(); - ZonedDateTime after = before.minusDays(1); + ZonedDateTime after = before.minusYears(5); int limit = 10000; int offset = 0; From 52a088d55fa107422286559896f2dd28c85f32fd Mon Sep 17 00:00:00 2001 From: Yannick Libert Date: Sat, 7 Sep 2024 03:08:45 +0200 Subject: [PATCH 87/87] WIP: add a banner Signed-off-by: Yannick Libert --- search/src/main/resources/banner.txt | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 search/src/main/resources/banner.txt diff --git a/search/src/main/resources/banner.txt b/search/src/main/resources/banner.txt new file mode 100644 index 0000000000..979e2c07c1 --- /dev/null +++ b/search/src/main/resources/banner.txt @@ -0,0 +1,7 @@ + + __ ___ ____ __ ____ _ + / |/ /__ ________ ___ _____ ___ / __/__ ___ _________/ / / __/__ ___ _(_)__ ___ + / /|_/ / _ `/ __/ _ `/ // / -_)_ / _\ \/ -_) _ `/ __/ __/ _ \ / _// _ \/ _ `/ / _ \/ -_) +/_/ /_/\_,_/_/ \_, /\_,_/\__//__/ /___/\__/\_,_/_/ \__/_//_/ /___/_//_/\_, /_/_//_/\__/ + /_/ /___/ + \ No newline at end of file