Skip to content

Commit

Permalink
refactor search index builder to store urn parts efficiently (#1937) (#…
Browse files Browse the repository at this point in the history
…1972)

* refactor search index builder to store urn parts efficiently (#1937)

Co-authored-by: Jyoti Wadhwani <[email protected]>

* set urn for all documents

* rebase, fix merge conflicts and modify tests

Co-authored-by: Jyoti Wadhwani <[email protected]>
  • Loading branch information
jywadhwani and Jyoti Wadhwani authored Oct 29, 2020
1 parent 32133cd commit 0c92a8e
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 71 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.linkedin.metadata.builders.search;

import com.linkedin.common.Ownership;
import com.linkedin.common.Status;
import com.linkedin.common.urn.DataProcessUrn;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.data.template.StringArray;
Expand Down Expand Up @@ -42,15 +41,16 @@ private static DataProcessDocument setUrnDerivedFields(@Nonnull DataProcessUrn u
@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Ownership ownership) {
final StringArray owners = BuilderUtils.getCorpUserOwners(ownership);
return setUrnDerivedFields(urn)
return new DataProcessDocument()
.setUrn(urn)
.setHasOwners(!owners.isEmpty())
.setOwners(owners);
}

@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn,
@Nonnull DataProcessInfo dataProcessInfo) {
DataProcessDocument dataProcessDocument = setUrnDerivedFields(urn);
final DataProcessDocument dataProcessDocument = new DataProcessDocument().setUrn(urn);
if (dataProcessInfo.getInputs() != null) {
dataProcessDocument.setInputs(dataProcessInfo.getInputs())
.setNumInputDatasets(dataProcessInfo.getInputs().size());
Expand All @@ -62,23 +62,19 @@ private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUr
return dataProcessDocument;
}

@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Status status) {
return setUrnDerivedFields(urn)
.setRemoved(status.isRemoved());
}

@Nonnull
private List<DataProcessDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull DataProcessSnapshot dataProcessSnapshot) {
DataProcessUrn urn = dataProcessSnapshot.getUrn();
return dataProcessSnapshot.getAspects().stream().map(aspect -> {
final DataProcessUrn urn = dataProcessSnapshot.getUrn();
final List<DataProcessDocument> documents = dataProcessSnapshot.getAspects().stream().map(aspect -> {
if (aspect.isDataProcessInfo()) {
return getDocumentToUpdateFromAspect(urn, aspect.getDataProcessInfo());
} else if (aspect.isOwnership()) {
return getDocumentToUpdateFromAspect(urn, aspect.getOwnership());
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
documents.add(setUrnDerivedFields(urn));
return documents;
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,51 +51,54 @@ private static DatasetDocument setUrnDerivedFields(@Nonnull DatasetUrn urn) {
@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Ownership ownership) {
final StringArray owners = BuilderUtils.getCorpUserOwners(ownership);
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setUrn(urn)
.setHasOwners(!owners.isEmpty())
.setOwners(owners);
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Status status) {
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setUrn(urn)
.setRemoved(status.isRemoved());
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetDeprecation deprecation) {
return setUrnDerivedFields(urn).setDeprecated(deprecation.isDeprecated());
return new DatasetDocument().setUrn(urn).setDeprecated(deprecation.isDeprecated());
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetProperties datasetProperties) {
final DatasetDocument doc = setUrnDerivedFields(urn);
if (datasetProperties.hasDescription()) {
final DatasetDocument doc = new DatasetDocument().setUrn(urn);
if (datasetProperties.getDescription() != null) {
doc.setDescription(datasetProperties.getDescription());
} else {
doc.setDescription("");
}
return doc;
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull SchemaMetadata schemaMetadata) {
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setUrn(urn)
.setHasSchema(true);
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull UpstreamLineage upstreamLineage) {
return setUrnDerivedFields(urn)
.setUpstreams(new DatasetUrnArray(
upstreamLineage.getUpstreams().stream().map(upstream -> upstream.getDataset()).collect(Collectors.toList())
));
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn,
@Nonnull UpstreamLineage upstreamLineage) {
return new DatasetDocument().setUrn(urn)
.setUpstreams(new DatasetUrnArray(upstreamLineage.getUpstreams()
.stream()
.map(upstream -> upstream.getDataset())
.collect(Collectors.toList())));
}

@Nonnull
private List<DatasetDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull DatasetSnapshot datasetSnapshot) {
final DatasetUrn urn = datasetSnapshot.getUrn();
return datasetSnapshot.getAspects().stream().map(aspect -> {
final List<DatasetDocument> documents = datasetSnapshot.getAspects().stream().map(aspect -> {
if (aspect.isDatasetDeprecation()) {
return getDocumentToUpdateFromAspect(urn, aspect.getDatasetDeprecation());
} else if (aspect.isDatasetProperties()) {
Expand All @@ -111,6 +114,8 @@ private List<DatasetDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull Data
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
documents.add(setUrnDerivedFields(urn));
return documents;
}

@Override
Expand All @@ -123,6 +128,7 @@ public final List<DatasetDocument> getDocumentsToUpdate(@Nonnull RecordTemplate
}

@Override
@Nonnull
public Class<DatasetDocument> getDocumentType() {
return DatasetDocument.class;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ public void testGetDocumentsToUpdateFromDataProcessSnapshot() {
new DataProcessSnapshot().setUrn(dataProcessUrn).setAspects(dataProcessAspectArray);

List<DataProcessDocument> actualDocs = new DataProcessIndexBuilder().getDocumentsToUpdate(dataProcessSnapshot);
assertEquals(actualDocs.size(), 1);
assertEquals(actualDocs.get(0).getUrn(), dataProcessUrn);
assertEquals(actualDocs.size(), 2);
assertEquals(actualDocs.get(0).getInputs().get(0), inputDatasetUrn);
assertEquals(actualDocs.get(0).getOutputs().get(0), outputDatasetUrn);

assertEquals(actualDocs.get(0).getUrn(), dataProcessUrn);
assertEquals(actualDocs.get(1).getUrn(), dataProcessUrn);
}
}
Loading

0 comments on commit 0c92a8e

Please sign in to comment.