Skip to content

Commit 0c92a8e

Browse files
jywadhwaniJyoti Wadhwani
andauthored
refactor search index builder to store urn parts efficiently (#1937) (#1972)
* refactor search index builder to store urn parts efficiently (#1937) Co-authored-by: Jyoti Wadhwani <[email protected]> * set urn for all documents * rebase, fix merge conflicts and modify tests Co-authored-by: Jyoti Wadhwani <[email protected]>
1 parent 32133cd commit 0c92a8e

File tree

4 files changed

+95
-71
lines changed

4 files changed

+95
-71
lines changed

metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package com.linkedin.metadata.builders.search;
22

33
import com.linkedin.common.Ownership;
4-
import com.linkedin.common.Status;
54
import com.linkedin.common.urn.DataProcessUrn;
65
import com.linkedin.data.template.RecordTemplate;
76
import com.linkedin.data.template.StringArray;
@@ -42,15 +41,16 @@ private static DataProcessDocument setUrnDerivedFields(@Nonnull DataProcessUrn u
4241
@Nonnull
4342
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Ownership ownership) {
4443
final StringArray owners = BuilderUtils.getCorpUserOwners(ownership);
45-
return setUrnDerivedFields(urn)
44+
return new DataProcessDocument()
45+
.setUrn(urn)
4646
.setHasOwners(!owners.isEmpty())
4747
.setOwners(owners);
4848
}
4949

5050
@Nonnull
5151
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn,
5252
@Nonnull DataProcessInfo dataProcessInfo) {
53-
DataProcessDocument dataProcessDocument = setUrnDerivedFields(urn);
53+
final DataProcessDocument dataProcessDocument = new DataProcessDocument().setUrn(urn);
5454
if (dataProcessInfo.getInputs() != null) {
5555
dataProcessDocument.setInputs(dataProcessInfo.getInputs())
5656
.setNumInputDatasets(dataProcessInfo.getInputs().size());
@@ -62,23 +62,19 @@ private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUr
6262
return dataProcessDocument;
6363
}
6464

65-
@Nonnull
66-
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Status status) {
67-
return setUrnDerivedFields(urn)
68-
.setRemoved(status.isRemoved());
69-
}
70-
7165
@Nonnull
7266
private List<DataProcessDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull DataProcessSnapshot dataProcessSnapshot) {
73-
DataProcessUrn urn = dataProcessSnapshot.getUrn();
74-
return dataProcessSnapshot.getAspects().stream().map(aspect -> {
67+
final DataProcessUrn urn = dataProcessSnapshot.getUrn();
68+
final List<DataProcessDocument> documents = dataProcessSnapshot.getAspects().stream().map(aspect -> {
7569
if (aspect.isDataProcessInfo()) {
7670
return getDocumentToUpdateFromAspect(urn, aspect.getDataProcessInfo());
7771
} else if (aspect.isOwnership()) {
7872
return getDocumentToUpdateFromAspect(urn, aspect.getOwnership());
7973
}
8074
return null;
8175
}).filter(Objects::nonNull).collect(Collectors.toList());
76+
documents.add(setUrnDerivedFields(urn));
77+
return documents;
8278
}
8379

8480
@Nullable

metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DatasetIndexBuilder.java

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,51 +51,54 @@ private static DatasetDocument setUrnDerivedFields(@Nonnull DatasetUrn urn) {
5151
@Nonnull
5252
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Ownership ownership) {
5353
final StringArray owners = BuilderUtils.getCorpUserOwners(ownership);
54-
return setUrnDerivedFields(urn)
54+
return new DatasetDocument()
55+
.setUrn(urn)
5556
.setHasOwners(!owners.isEmpty())
5657
.setOwners(owners);
5758
}
5859

5960
@Nonnull
6061
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Status status) {
61-
return setUrnDerivedFields(urn)
62+
return new DatasetDocument()
63+
.setUrn(urn)
6264
.setRemoved(status.isRemoved());
6365
}
6466

6567
@Nonnull
6668
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetDeprecation deprecation) {
67-
return setUrnDerivedFields(urn).setDeprecated(deprecation.isDeprecated());
69+
return new DatasetDocument().setUrn(urn).setDeprecated(deprecation.isDeprecated());
6870
}
6971

7072
@Nonnull
7173
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetProperties datasetProperties) {
72-
final DatasetDocument doc = setUrnDerivedFields(urn);
73-
if (datasetProperties.hasDescription()) {
74+
final DatasetDocument doc = new DatasetDocument().setUrn(urn);
75+
if (datasetProperties.getDescription() != null) {
7476
doc.setDescription(datasetProperties.getDescription());
75-
} else {
76-
doc.setDescription("");
7777
}
7878
return doc;
7979
}
8080

8181
@Nonnull
8282
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull SchemaMetadata schemaMetadata) {
83-
return setUrnDerivedFields(urn)
83+
return new DatasetDocument()
84+
.setUrn(urn)
8485
.setHasSchema(true);
8586
}
8687

8788
@Nonnull
88-
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull UpstreamLineage upstreamLineage) {
89-
return setUrnDerivedFields(urn)
90-
.setUpstreams(new DatasetUrnArray(
91-
upstreamLineage.getUpstreams().stream().map(upstream -> upstream.getDataset()).collect(Collectors.toList())
92-
));
89+
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn,
90+
@Nonnull UpstreamLineage upstreamLineage) {
91+
return new DatasetDocument().setUrn(urn)
92+
.setUpstreams(new DatasetUrnArray(upstreamLineage.getUpstreams()
93+
.stream()
94+
.map(upstream -> upstream.getDataset())
95+
.collect(Collectors.toList())));
9396
}
9497

9598
@Nonnull
9699
private List<DatasetDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull DatasetSnapshot datasetSnapshot) {
97100
final DatasetUrn urn = datasetSnapshot.getUrn();
98-
return datasetSnapshot.getAspects().stream().map(aspect -> {
101+
final List<DatasetDocument> documents = datasetSnapshot.getAspects().stream().map(aspect -> {
99102
if (aspect.isDatasetDeprecation()) {
100103
return getDocumentToUpdateFromAspect(urn, aspect.getDatasetDeprecation());
101104
} else if (aspect.isDatasetProperties()) {
@@ -111,6 +114,8 @@ private List<DatasetDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull Data
111114
}
112115
return null;
113116
}).filter(Objects::nonNull).collect(Collectors.toList());
117+
documents.add(setUrnDerivedFields(urn));
118+
return documents;
114119
}
115120

116121
@Override
@@ -123,6 +128,7 @@ public final List<DatasetDocument> getDocumentsToUpdate(@Nonnull RecordTemplate
123128
}
124129

125130
@Override
131+
@Nonnull
126132
public Class<DatasetDocument> getDocumentType() {
127133
return DatasetDocument.class;
128134
}

metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ public void testGetDocumentsToUpdateFromDataProcessSnapshot() {
3939
new DataProcessSnapshot().setUrn(dataProcessUrn).setAspects(dataProcessAspectArray);
4040

4141
List<DataProcessDocument> actualDocs = new DataProcessIndexBuilder().getDocumentsToUpdate(dataProcessSnapshot);
42-
assertEquals(actualDocs.size(), 1);
43-
assertEquals(actualDocs.get(0).getUrn(), dataProcessUrn);
42+
assertEquals(actualDocs.size(), 2);
4443
assertEquals(actualDocs.get(0).getInputs().get(0), inputDatasetUrn);
4544
assertEquals(actualDocs.get(0).getOutputs().get(0), outputDatasetUrn);
46-
45+
assertEquals(actualDocs.get(0).getUrn(), dataProcessUrn);
46+
assertEquals(actualDocs.get(1).getUrn(), dataProcessUrn);
4747
}
4848
}

0 commit comments

Comments
 (0)