From f462e2a4a3a528d2a7ee2a950144aab2d8c0901d Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 17 Jan 2025 14:35:03 -0800 Subject: [PATCH 01/81] Add flatten command to ANTLR lexer and parser. Signed-off-by: currantw --- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 0307fb4ca1..0265a4ddf2 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -37,6 +37,7 @@ AD: 'AD'; ML: 'ML'; FILLNULL: 'FILLNULL'; TRENDLINE: 'TRENDLINE'; +FLATTEN: 'FLATTEN'; // COMMAND ASSIST KEYWORDS AS: 'AS'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 451edeb29b..0b1fc4f130 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -51,6 +51,7 @@ commands | mlCommand | fillnullCommand | trendlineCommand + | flattenCommand ; searchCommand @@ -158,6 +159,11 @@ trendlineType : SMA ; +// TODO #3030: Extend flatten command to support aliases +flattenCommand + : FLATTEN fieldExpression + ; + kmeansCommand : KMEANS (kmeansParameter)* ; @@ -890,6 +896,7 @@ keywordsCanBeId | SORT | EVAL | FILLNULL + | FLATTEN | HEAD | TOP | RARE From 69f0b1af35aebab25be14836a0dd5faa2bd7e64b Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 20 Jan 2025 10:36:02 -0800 Subject: [PATCH 02/81] Skeleton implementation, tests, and documents with lots of TODOs. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 12 ++++++ .../sql/ast/AbstractNodeVisitor.java | 5 +++ .../org/opensearch/sql/ast/tree/Flatten.java | 39 ++++++++++++++++++ .../org/opensearch/sql/executor/Explain.java | 8 ++++ .../sql/planner/DefaultImplementor.java | 8 ++++ .../sql/planner/logical/LogicalFlatten.java | 27 +++++++++++++ .../logical/LogicalPlanNodeVisitor.java | 4 ++ .../sql/planner/physical/FlattenOperator.java | 40 +++++++++++++++++++ .../physical/PhysicalPlanNodeVisitor.java | 4 ++ .../opensearch/sql/analysis/AnalyzerTest.java | 6 +++ .../opensearch/sql/executor/ExplainTest.java | 6 +++ .../sql/planner/DefaultImplementorTest.java | 6 +++ .../planner/logical/LogicalFlattenTest.java | 16 ++++++++ .../logical/LogicalPlanNodeVisitorTest.java | 1 + .../planner/physical/FlattenOperatorTest.java | 18 +++++++++ docs/category.json | 1 + docs/user/ppl/cmd/flatten.rst | 5 +++ docs/user/ppl/index.rst | 22 +++++----- .../org/opensearch/sql/ppl/ExplainIT.java | 9 ++++- .../opensearch/sql/ppl/FlattenCommandIT.java | 11 +++++ .../OpenSearchExecutionProtector.java | 8 ++++ .../OpenSearchExecutionProtectorTest.java | 6 +++ .../opensearch/sql/ppl/parser/AstBuilder.java | 7 ++++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 8 ++++ .../sql/ppl/parser/AstBuilderTest.java | 6 +++ .../ppl/parser/AstExpressionBuilderTest.java | 6 +++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 6 +++ 27 files changed, 284 insertions(+), 11 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java create mode 100644 core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java create mode 100644 core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java create mode 100644 docs/user/ppl/cmd/flatten.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d0051568c4..05ce2bb002 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -52,6 +52,7 @@ import org.opensearch.sql.ast.tree.FetchCursor; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Limit; @@ -448,6 +449,17 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { return new LogicalEval(child, expressionsBuilder.build()); } + /** + * Builds and returns a {@link org.opensearch.sql.planner.logical.LogicalFlatten} corresponding to + * the given flatten node. + */ + @Override + public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { + + // TODO #3030: Implement + return null; + } + /** Build {@link ParseExpression} to context and skip to child nodes. */ @Override public LogicalPlan visitParse(Parse node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index f27260dd5f..adc804bab1 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -47,6 +47,7 @@ import org.opensearch.sql.ast.tree.FetchCursor; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Limit; @@ -107,6 +108,10 @@ public T visitTableFunction(TableFunction node, C context) { return visitChildren(node, context); } + public T visitFlatten(Flatten node, C context) { + return visitChildren(node, context); + } + public T visitFilter(Filter node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java b/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java new file mode 100644 index 0000000000..8677e50292 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java @@ -0,0 +1,39 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import java.util.List; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Field; + +@ToString +@RequiredArgsConstructor +public class Flatten extends UnresolvedPlan { + + private UnresolvedPlan child; + + @Getter private final Field field; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return child == null ? List.of() : List.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFlatten(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index 31890a8090..14fa432b10 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -23,6 +23,7 @@ import org.opensearch.sql.planner.physical.DedupeOperator; import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; +import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.LimitOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; @@ -160,6 +161,13 @@ public ExplainResponseNode visitEval(EvalOperator node, Object context) { ImmutableMap.of("expressions", convertPairListToMap(node.getExpressionList())))); } + @Override + public ExplainResponseNode visitFlatten(FlattenOperator node, Object context) { + + // TODO #3030: Implement + return null; + } + @Override public ExplainResponseNode visitDedupe(DedupeOperator node, Object context) { return explain( diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index c988084d1b..a08045b539 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -12,6 +12,7 @@ import org.opensearch.sql.planner.logical.LogicalEval; import org.opensearch.sql.planner.logical.LogicalFetchCursor; import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalFlatten; import org.opensearch.sql.planner.logical.LogicalLimit; import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPaginate; @@ -99,6 +100,13 @@ public PhysicalPlan visitEval(LogicalEval node, C context) { return new EvalOperator(visitChild(node, context), node.getExpressions()); } + @Override + public PhysicalPlan visitFlatten(LogicalFlatten plan, C context) { + + // TODO #3030: Implement + return null; + } + @Override public PhysicalPlan visitNested(LogicalNested node, C context) { return new NestedOperator(visitChild(node, context), node.getFields()); diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java new file mode 100644 index 0000000000..ab29fc0bf5 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java @@ -0,0 +1,27 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; + +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +/** Logical plan that represent the flatten command. */ +@ToString +@EqualsAndHashCode(callSuper = true) +public class LogicalFlatten extends LogicalPlan { + + // TODO #3030: Implement + + public LogicalFlatten(List childPlans) { + super(childPlans); + } + + @Override + public R accept(LogicalPlanNodeVisitor visitor, C context) { + return visitor.visitFlatten(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index c9eedd8efc..821f55ab93 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -72,6 +72,10 @@ public R visitEval(LogicalEval plan, C context) { return visitNode(plan, context); } + public R visitFlatten(LogicalFlatten plan, C context) { + return visitNode(plan, context); + } + public R visitNested(LogicalNested plan, C context) { return visitNode(plan, context); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java new file mode 100644 index 0000000000..b49665ec71 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -0,0 +1,40 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.data.model.ExprValue; + +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +public class FlattenOperator extends PhysicalPlan { + + // TODO #3030: Implement + + @Override + public R accept(PhysicalPlanNodeVisitor visitor, C context) { + return null; + } + + @Override + public boolean hasNext() { + return false; + } + + @Override + public ExprValue next() { + return null; + } + + @Override + public List getChild() { + return List.of(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java index 66c7219e39..0d619ab8bc 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java @@ -56,6 +56,10 @@ public R visitEval(EvalOperator node, C context) { return visitNode(node, context); } + public R visitFlatten(FlattenOperator node, C context) { + return visitNode(node, context); + } + public R visitNested(NestedOperator node, C context) { return visitNode(node, context); } diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 3f4752aa2e..64c100610d 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -1485,6 +1485,12 @@ public void fillnull_various_values() { AstDSL.field("int_null_value"), AstDSL.intLiteral(1)))))); } + @Test + void flatten() { + + // TODO #3030: Test + } + @Test public void trendline() { assertAnalyzeEqual( diff --git a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java index febf662843..de80e844ee 100644 --- a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java @@ -299,6 +299,12 @@ void can_explain_trendline() { explain.apply(plan)); } + @Test + void can_explain_flatten() { + + // TODO #3030: Test + } + private static class FakeTableScan extends TableScanOperator { @Override public boolean hasNext() { diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 8ee0dd7e70..f6c1faeab9 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -325,4 +325,10 @@ public void visitTrendline_should_build_TrendlineOperator() { assertInstanceOf(TrendlineOperator.class, implemented); assertSame(physicalChild, implemented.getChild().get(0)); } + + @Test + void visitFlatten_should_build_FlattenOperator() { + + // TODO #3030: Test + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java new file mode 100644 index 0000000000..5b5966462c --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -0,0 +1,16 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; + +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.analysis.AnalyzerTestBase; + +@ExtendWith(MockitoExtension.class) +class LogicalFlattenTest extends AnalyzerTestBase { + + // TODO #3030: Test +} diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java index 43ce23ed56..6666ea1616 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java @@ -152,6 +152,7 @@ public TableWriteOperator build(PhysicalPlan child) { AstDSL.computation(1, AstDSL.field("testField"), "dummy", SMA), ExprCoreType.DOUBLE))); + // TODO #3030: Test return Stream.of( relation, tableScanBuilder, diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java new file mode 100644 index 0000000000..c62d669364 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -0,0 +1,18 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +@ExtendWith(MockitoExtension.class) +class FlattenOperatorTest { + + // TODO #3030: Test +} diff --git a/docs/category.json b/docs/category.json index efbb57d6e6..8dda7160a8 100644 --- a/docs/category.json +++ b/docs/category.json @@ -15,6 +15,7 @@ "user/ppl/cmd/eval.rst", "user/ppl/cmd/fields.rst", "user/ppl/cmd/fillnull.rst", + "user/ppl/cmd/flatten.rst", "user/ppl/cmd/grok.rst", "user/ppl/cmd/head.rst", "user/ppl/cmd/parse.rst", diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst new file mode 100644 index 0000000000..969f283000 --- /dev/null +++ b/docs/user/ppl/cmd/flatten.rst @@ -0,0 +1,5 @@ +============= +flatten +============= + +TODO #3030: Documentation and doctests \ No newline at end of file diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index ef8cff334e..7a10e98504 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -50,42 +50,44 @@ The query start with search command and then flowing a set of command delimited - `describe command `_ - - `show datasources command `_ - - `eval command `_ - `fields command `_ + - `flatten command `_ + - `grok command `_ + - `head command `_ + - `kmeans command `_ + - `metadata commands `_ + - `ml command `_ - `parse command `_ - `patterns command `_ + - `rare command `_ + - `rename command `_ - `search command `_ + - `show datasources command `_ + - `sort command `_ - `stats command `_ + - `top command `_ + - `trendline command `_ - `where command `_ - - `head command `_ - - - `rare command `_ - - - `top command `_ - - - `metadata commands `_ - * **Functions** - `Expressions `_ diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 531a24bad6..dd382381ee 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -126,7 +126,14 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { + "| fields ageTrend")); } - String loadFromFile(String filename) throws Exception { + @Test + void testFlatten() { + + // TODO #3030: Test + } + + private static String loadFromFile(String filename) + throws java.net.URISyntaxException, IOException { URI uri = Resources.getResource(filename).toURI(); return new String(Files.readAllBytes(Paths.get(uri))); } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java new file mode 100644 index 0000000000..39e973b981 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -0,0 +1,11 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +public class FlattenCommandIT extends PPLIntegTestCase { + + // TODO #3030: Test +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index 235ddc0075..a9e2474cab 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -16,6 +16,7 @@ import org.opensearch.sql.planner.physical.DedupeOperator; import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; +import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.LimitOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; @@ -107,6 +108,13 @@ public PhysicalPlan visitEval(EvalOperator node, Object context) { return new EvalOperator(visitInput(node.getInput(), context), node.getExpressionList()); } + @Override + public PhysicalPlan visitFlatten(FlattenOperator node, Object context) { + + // TODO #3030: Implement + return doProtect(new FlattenOperator()); + } + @Override public PhysicalPlan visitNested(NestedOperator node, Object context) { return doProtect( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java index 18958c74db..f6e93e3445 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java @@ -363,6 +363,12 @@ void test_visitOpenSearchEval() { executionProtector.visitEval(evalOperator, null)); } + @Test + void test_visitFlatten() { + + // TODO #3030: Test + } + PhysicalPlan resourceMonitor(PhysicalPlan input) { return new ResourceMonitorPlan(input, resourceMonitor); } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index c3c31ee2e1..ee4edfc609 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -53,6 +53,7 @@ import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; @@ -264,6 +265,12 @@ public UnresolvedPlan visitRareCommand(RareCommandContext ctx) { groupList); } + @Override + public UnresolvedPlan visitFlattenCommand(OpenSearchPPLParser.FlattenCommandContext ctx) { + Field fieldExpression = (Field) internalVisitExpression(ctx.fieldExpression()); + return new Flatten(fieldExpression); + } + @Override public UnresolvedPlan visitGrokCommand(OpenSearchPPLParser.GrokCommandContext ctx) { UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 96e21eafcd..24aba40a71 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -37,6 +37,7 @@ import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.RareTopN; @@ -192,6 +193,13 @@ public String visitEval(Eval node, String context) { return StringUtils.format("%s | eval %s", child, expressions); } + @Override + public String visitFlatten(Flatten node, String context) { + + // TODO #3030: Implement + return ""; + } + /** Build {@link LogicalSort}. */ @Override public String visitSort(Sort node, String context) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index c6f4ed2044..116c607944 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -446,6 +446,12 @@ public void testEvalCommand() { eval(relation("t"), let(field("r"), function("abs", field("f"))))); } + @Test + public void testFlattenCommand() { + + // TODO #3030: Test + } + @Test public void testIndexName() { assertEqual( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index de74cdf433..8adaf65fea 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -252,6 +252,12 @@ public void testEvalIfFunctionExpr() { intLiteral(0))))); } + @Test + public void testFlattenExpr() { + + // TODO #3030: Test + } + @Test public void testPositionFunctionExpr() { assertEqual( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 06f8fbb061..2d1270a49d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -96,6 +96,12 @@ public void testTrendlineCommand() { anonymize("source=t | trendline sma(2, date) as date_alias sma(3, time) as time_alias")); } + @Test + public void testFlattenCommand() { + + // TODO #3030: Test + } + @Test public void testHeadCommandWithNumber() { assertEquals("source=t | head 3", anonymize("source=t | head 3")); From c1ac737506c083bd4ce5e744a370c7fe5f7fab98 Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 20 Jan 2025 14:41:42 -0800 Subject: [PATCH 03/81] Initial implementation Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 9 ++- .../org/opensearch/sql/ast/tree/Flatten.java | 10 +-- .../sql/data/model/ExprTupleValue.java | 1 + .../org/opensearch/sql/executor/Explain.java | 9 ++- .../sql/planner/DefaultImplementor.java | 7 +- .../sql/planner/logical/LogicalFlatten.java | 13 ++-- .../sql/planner/physical/FlattenOperator.java | 74 +++++++++++++++++-- .../opensearch/sql/ppl/FlattenCommandIT.java | 2 +- .../OpenSearchExecutionProtector.java | 4 +- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 5 +- 10 files changed, 101 insertions(+), 33 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 05ce2bb002..e6c2e2869c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -95,6 +95,7 @@ import org.opensearch.sql.planner.logical.LogicalEval; import org.opensearch.sql.planner.logical.LogicalFetchCursor; import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalFlatten; import org.opensearch.sql.planner.logical.LogicalLimit; import org.opensearch.sql.planner.logical.LogicalML; import org.opensearch.sql.planner.logical.LogicalMLCommons; @@ -455,9 +456,13 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { */ @Override public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { + LogicalPlan child = node.getChild().getFirst().accept(this, context); - // TODO #3030: Implement - return null; + Field field = node.getField(); + Expression expression = expressionAnalyzer.analyze(field, context); + ReferenceExpression ref = DSL.ref(field.getField().toString(), expression.type()); + + return new LogicalFlatten(child, ref); } /** Build {@link ParseExpression} to context and skip to child nodes. */ diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java b/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java index 8677e50292..aa402d9cd9 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java @@ -5,31 +5,31 @@ package org.opensearch.sql.ast.tree; +import com.google.common.collect.ImmutableList; import java.util.List; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; -import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.expression.Field; +@Getter @ToString @RequiredArgsConstructor public class Flatten extends UnresolvedPlan { - private UnresolvedPlan child; @Getter private final Field field; @Override - public UnresolvedPlan attach(UnresolvedPlan child) { + public Flatten attach(UnresolvedPlan child) { this.child = child; return this; } @Override - public List getChild() { - return child == null ? List.of() : List.of(child); + public List getChild() { + return ImmutableList.of(child); } @Override diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java index 856075bed8..be85c0e54b 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java @@ -69,6 +69,7 @@ public ExprValue keyValue(String key) { * * @return true for equal, otherwise false. */ + @Override public boolean equal(ExprValue o) { if (!(o instanceof ExprTupleValue)) { return false; diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index 14fa432b10..fb9f932f29 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -163,9 +163,12 @@ public ExplainResponseNode visitEval(EvalOperator node, Object context) { @Override public ExplainResponseNode visitFlatten(FlattenOperator node, Object context) { - - // TODO #3030: Implement - return null; + return explain( + node, + context, + explainNode -> + explainNode.setDescription( + ImmutableMap.of("flattenField", node.getField()))); } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index a08045b539..da3704dd4e 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -32,6 +32,7 @@ import org.opensearch.sql.planner.physical.DedupeOperator; import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; +import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.LimitOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; @@ -101,10 +102,8 @@ public PhysicalPlan visitEval(LogicalEval node, C context) { } @Override - public PhysicalPlan visitFlatten(LogicalFlatten plan, C context) { - - // TODO #3030: Implement - return null; + public PhysicalPlan visitFlatten(LogicalFlatten node, C context) { + return new FlattenOperator(visitChild(node, context), node.getField()); } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java index ab29fc0bf5..a4b018543d 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java @@ -5,19 +5,22 @@ package org.opensearch.sql.planner.logical; -import java.util.List; +import java.util.Collections; import lombok.EqualsAndHashCode; +import lombok.Getter; import lombok.ToString; +import org.opensearch.sql.expression.ReferenceExpression; /** Logical plan that represent the flatten command. */ +@Getter @ToString @EqualsAndHashCode(callSuper = true) public class LogicalFlatten extends LogicalPlan { + private final ReferenceExpression field; - // TODO #3030: Implement - - public LogicalFlatten(List childPlans) { - super(childPlans); + public LogicalFlatten(LogicalPlan child, ReferenceExpression field) { + super(Collections.singletonList(child)); + this.field = field; } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index b49665ec71..a22d1862bf 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -5,36 +5,94 @@ package org.opensearch.sql.planner.physical; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; + +import com.google.common.collect.ImmutableMap; +import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.NoSuchElementException; import lombok.EqualsAndHashCode; +import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; +import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.expression.ReferenceExpression; +@Getter @ToString -@EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) public class FlattenOperator extends PhysicalPlan { - // TODO #3030: Implement + private final PhysicalPlan input; + private final ReferenceExpression field; @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { - return null; + return visitor.visitFlatten(this, context); + } + + @Override + public List getChild() { + return Collections.singletonList(input); } @Override public boolean hasNext() { - return false; + return input.hasNext(); } @Override public ExprValue next() { - return null; + + if (!hasNext()) { + throw new NoSuchElementException("The next expression value does not exist"); + } + + String fieldName = field.getAttr(); + + // Verify that the field name is valid. + Map exprValueForFieldNameMap = ExprValueUtils.getTupleValue(input.next()); + if (!exprValueForFieldNameMap.containsKey(fieldName)) { + throw new IllegalArgumentException( + String.format("Field name '%s' for flatten command is not valid", fieldName)); + } + + // Verify that the field is a tuple. + ExprValue exprValue = exprValueForFieldNameMap.get(fieldName); + if (exprValue.type() != STRUCT) { + throw new IllegalArgumentException( + String.format("Field '%s' for flatten command must be a struct", fieldName)); + } + + // Flatten the tuple and add the flattened field names and values to result. + Map flattenedExprValueMap = flattenExprValue(exprValue); + exprValueForFieldNameMap.putAll(flattenedExprValueMap); + + return ExprTupleValue.fromExprValueMap(exprValueForFieldNameMap); } - @Override - public List getChild() { - return List.of(); + /** Flattens the given tuple and returns the result. */ + private static Map flattenExprValue(ExprValue exprValue) { + + ImmutableMap.Builder flattenedMap = new ImmutableMap.Builder<>(); + + for (Entry entry : exprValue.tupleValue().entrySet()) { + ExprValue entryExprValue = entry.getValue(); + + // If the expression is a tuple, recursively flatten it. + Map flattenedEntryMap = + (entryExprValue.type() == STRUCT) + ? flattenExprValue(entryExprValue) + : Map.of(entry.getKey(), entryExprValue); + + flattenedEntryMap.forEach(flattenedMap::put); + } + + return flattenedMap.build(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 39e973b981..96ac41ceb6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -8,4 +8,4 @@ public class FlattenCommandIT extends PPLIntegTestCase { // TODO #3030: Test -} \ No newline at end of file +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index a9e2474cab..0920d01eec 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -110,9 +110,7 @@ public PhysicalPlan visitEval(EvalOperator node, Object context) { @Override public PhysicalPlan visitFlatten(FlattenOperator node, Object context) { - - // TODO #3030: Implement - return doProtect(new FlattenOperator()); + return doProtect(new FlattenOperator(visitInput(node.getInput(), context), node.getField())); } @Override diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 24aba40a71..05701deb00 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -195,9 +195,10 @@ public String visitEval(Eval node, String context) { @Override public String visitFlatten(Flatten node, String context) { + String child = node.getChild().getFirst().accept(this, context); + Field field = node.getField(); - // TODO #3030: Implement - return ""; + return StringUtils.format("%s | flatten %s", child, field); } /** Build {@link LogicalSort}. */ From 366e16294e080a89e29d31e8861d055c3495bfbf Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 24 Jan 2025 13:40:09 -0800 Subject: [PATCH 04/81] Fix typo Signed-off-by: currantw --- core/src/main/java/org/opensearch/sql/ast/tree/Relation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Relation.java b/core/src/main/java/org/opensearch/sql/ast/tree/Relation.java index ec5264a86b..267249eeb9 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Relation.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Relation.java @@ -67,7 +67,7 @@ public String getAlias() { } /** - * Get Qualified name preservs parts of the user given identifiers. This can later be utilized to + * Get Qualified name preserves parts of the user given identifiers. This can later be utilized to * determine DataSource,Schema and Table Name during Analyzer stage. So Passing QualifiedName * directly to Analyzer Stage. * From 0cbd8d41d9a0f809761a979c7e4646a7c12aae8c Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 27 Jan 2025 12:36:41 -0800 Subject: [PATCH 05/81] Initial implementation Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 62 +++++++++- .../org/opensearch/sql/ast/dsl/AstDSL.java | 5 + .../org/opensearch/sql/ast/tree/Eval.java | 2 - .../sql/data/model/ExprTupleValue.java | 21 +--- .../org/opensearch/sql/executor/Explain.java | 9 +- .../sql/planner/logical/LogicalPlanDSL.java | 4 + .../sql/planner/physical/FlattenOperator.java | 60 ++++----- .../sql/planner/physical/PhysicalPlanDSL.java | 4 + .../opensearch/sql/analysis/AnalyzerTest.java | 6 - .../opensearch/sql/executor/ExplainTest.java | 16 ++- .../sql/planner/DefaultImplementorTest.java | 22 +++- .../planner/logical/LogicalFlattenTest.java | 21 +++- .../logical/LogicalPlanNodeVisitorTest.java | 11 +- .../planner/physical/FlattenOperatorTest.java | 82 +++++++++++- .../opensearch/sql/ppl/FlattenCommandIT.java | 117 +++++++++++++++++- .../OpenSearchExecutionProtectorTest.java | 10 +- .../sql/ppl/parser/AstBuilderTest.java | 7 +- .../ppl/parser/AstExpressionBuilderTest.java | 7 +- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 6 +- 19 files changed, 391 insertions(+), 81 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index e6c2e2869c..04b7dbd8bb 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -25,10 +25,13 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -73,6 +76,7 @@ import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.data.model.ExprMissingValue; import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.datasource.DataSourceService; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -129,6 +133,10 @@ public class Analyzer extends AbstractNodeVisitor private final BuiltinFunctionRepository repository; + private static final String PATH_SEPARATOR = "."; + private static final Pattern PATH_COMPONENT_PATTERN = + Pattern.compile(PATH_SEPARATOR, Pattern.LITERAL); + /** Constructor. */ public Analyzer( ExpressionAnalyzer expressionAnalyzer, @@ -457,12 +465,58 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { @Override public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { LogicalPlan child = node.getChild().getFirst().accept(this, context); + TypeEnvironment env = context.peek(); + + // Verify that the field type is valid. + ReferenceExpression fieldExpr = + (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); + + ExprType fieldType = fieldExpr.type(); + if (fieldType != STRUCT) { + throw new IllegalArgumentException( + String.format("Invalid field type '%s' for flatten command", fieldType)); + } + + // Get fields to add and remove. + String fieldName = fieldExpr.getAttr(); + java.util.Map fieldsMap = env.lookupAllTupleFields(Namespace.FIELD_NAME); + + java.util.Map addFieldsMap = new HashMap<>(); + java.util.Map removeFieldsMap = new HashMap<>(); + + for (java.util.Map.Entry entry : fieldsMap.entrySet()) { + String path = entry.getKey(); + List pathComponents = Arrays.stream(PATH_COMPONENT_PATTERN.split(path)).toList(); + + // Verify that path starts with the field name. + if (!pathComponents.getFirst().equals(fieldName)) { + continue; + } + + // Remove non-leaf nodes. + ExprType type = entry.getValue(); + if (type == STRUCT) { + removeFieldsMap.put(path, STRUCT); + continue; + } + + String newFieldName = pathComponents.getLast(); + + // Verify that new field does not overwrite an existing field. + if (fieldsMap.containsKey(newFieldName)) { + throw new IllegalArgumentException( + String.format("Flatten command cannot overwrite field '%s'", newFieldName)); + } + + addFieldsMap.put(newFieldName, type); + removeFieldsMap.put(path, type); + } - Field field = node.getField(); - Expression expression = expressionAnalyzer.analyze(field, context); - ReferenceExpression ref = DSL.ref(field.getField().toString(), expression.type()); + // Update environment. + addFieldsMap.forEach((name, type) -> env.define(DSL.ref(name, type))); + removeFieldsMap.forEach((name, type) -> env.remove(DSL.ref(name, type))); - return new LogicalFlatten(child, ref); + return new LogicalFlatten(child, DSL.ref(fieldName, STRUCT)); } /** Build {@link ParseExpression} to context and skip to child nodes. */ diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index d9956609ec..526a5e8def 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -51,6 +51,7 @@ import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Parse; @@ -104,6 +105,10 @@ public static Eval eval(UnresolvedPlan input, Let... projectList) { return new Eval(Arrays.asList(projectList)).attach(input); } + public Flatten flatten(UnresolvedPlan input, Field field) { + return new Flatten(field).attach(input); + } + public static UnresolvedPlan projectWithArg( UnresolvedPlan input, List argList, UnresolvedExpression... projectList) { return new Project(Arrays.asList(projectList), argList).attach(input); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Eval.java b/core/src/main/java/org/opensearch/sql/ast/tree/Eval.java index ecceabd757..aa7cba4851 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Eval.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Eval.java @@ -10,14 +10,12 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; -import lombok.Setter; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Let; /** AST node represent Eval operation. */ @Getter -@Setter @ToString @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java index be85c0e54b..643c2cd8cd 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java @@ -5,7 +5,6 @@ package org.opensearch.sql.data.model; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Map.Entry; @@ -70,23 +69,13 @@ public ExprValue keyValue(String key) { * @return true for equal, otherwise false. */ @Override - public boolean equal(ExprValue o) { - if (!(o instanceof ExprTupleValue)) { + public boolean equal(ExprValue other) { + + if (!(other instanceof ExprTupleValue)) { return false; - } else { - ExprTupleValue other = (ExprTupleValue) o; - Iterator> thisIterator = this.valueMap.entrySet().iterator(); - Iterator> otherIterator = other.valueMap.entrySet().iterator(); - while (thisIterator.hasNext() && otherIterator.hasNext()) { - Entry thisEntry = thisIterator.next(); - Entry otherEntry = otherIterator.next(); - if (!(thisEntry.getKey().equals(otherEntry.getKey()) - && thisEntry.getValue().equals(otherEntry.getValue()))) { - return false; - } - } - return !(thisIterator.hasNext() || otherIterator.hasNext()); } + + return valueMap.equals(other.tupleValue()); } /** Only compare the size of the map. */ diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index fb9f932f29..a5dc3b9c93 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -164,11 +164,10 @@ public ExplainResponseNode visitEval(EvalOperator node, Object context) { @Override public ExplainResponseNode visitFlatten(FlattenOperator node, Object context) { return explain( - node, - context, - explainNode -> - explainNode.setDescription( - ImmutableMap.of("flattenField", node.getField()))); + node, + context, + explainNode -> + explainNode.setDescription(ImmutableMap.of("flattenField", node.getField()))); } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index 13c6d7a979..e401feb024 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -97,6 +97,10 @@ public static LogicalPlan eval( return new LogicalEval(input, Arrays.asList(expressions)); } + public LogicalPlan flatten(LogicalPlan input, ReferenceExpression field) { + return new LogicalFlatten(input, field); + } + public static LogicalPlan sort(LogicalPlan input, Pair... sorts) { return new LogicalSort(input, Arrays.asList(sorts)); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index a22d1862bf..f343c2cea4 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -7,12 +7,11 @@ import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; -import com.google.common.collect.ImmutableMap; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.NoSuchElementException; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -20,7 +19,9 @@ import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.env.Environment; @Getter @ToString @@ -49,50 +50,49 @@ public boolean hasNext() { @Override public ExprValue next() { - if (!hasNext()) { - throw new NoSuchElementException("The next expression value does not exist"); - } + ExprValue inputExprValue = input.next(); + Map fieldsMap = ExprValueUtils.getTupleValue(inputExprValue); + // Get the flattened field map. String fieldName = field.getAttr(); + ExprValue exprValue = fieldsMap.get(fieldName); - // Verify that the field name is valid. - Map exprValueForFieldNameMap = ExprValueUtils.getTupleValue(input.next()); - if (!exprValueForFieldNameMap.containsKey(fieldName)) { - throw new IllegalArgumentException( - String.format("Field name '%s' for flatten command is not valid", fieldName)); - } + Map flattenedFieldsMap = flatten(exprValue); - // Verify that the field is a tuple. - ExprValue exprValue = exprValueForFieldNameMap.get(fieldName); - if (exprValue.type() != STRUCT) { - throw new IllegalArgumentException( - String.format("Field '%s' for flatten command must be a struct", fieldName)); - } + // Update field map. + fieldsMap.putAll(flattenedFieldsMap); + fieldsMap.remove(fieldName); - // Flatten the tuple and add the flattened field names and values to result. - Map flattenedExprValueMap = flattenExprValue(exprValue); - exprValueForFieldNameMap.putAll(flattenedExprValueMap); + // Update environment. + Environment env = inputExprValue.bindingTuples(); + + for (Entry entry : flattenedFieldsMap.entrySet()) { + ExprValue fieldValue = entry.getValue(); + Expression fieldRefExp = new ReferenceExpression(entry.getKey(), fieldValue.type()); + Environment.extendEnv(env, fieldRefExp, fieldValue); + } - return ExprTupleValue.fromExprValueMap(exprValueForFieldNameMap); + return ExprTupleValue.fromExprValueMap(fieldsMap); } - /** Flattens the given tuple and returns the result. */ - private static Map flattenExprValue(ExprValue exprValue) { + /** Flattens the given expression value tuple and returns the result. */ + private static Map flatten(ExprValue exprValue) { - ImmutableMap.Builder flattenedMap = new ImmutableMap.Builder<>(); + // Build flattened map from field name to value. + Map flattenedFieldMap = new HashMap<>(); for (Entry entry : exprValue.tupleValue().entrySet()) { ExprValue entryExprValue = entry.getValue(); - // If the expression is a tuple, recursively flatten it. + // Recursively flatten. Map flattenedEntryMap = - (entryExprValue.type() == STRUCT) - ? flattenExprValue(entryExprValue) - : Map.of(entry.getKey(), entryExprValue); + (entryExprValue.type() == STRUCT) + ? flatten(entryExprValue) + : Map.of(entry.getKey(), entryExprValue); - flattenedEntryMap.forEach(flattenedMap::put); + flattenedFieldMap.putAll(flattenedEntryMap); } - return flattenedMap.build(); + return flattenedFieldMap; } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index 0c2764112d..2a07ec3eee 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -60,6 +60,10 @@ public static EvalOperator eval( return new EvalOperator(input, Arrays.asList(expressions)); } + public FlattenOperator flatten(PhysicalPlan input, ReferenceExpression field) { + return new FlattenOperator(input, field); + } + public static SortOperator sort(PhysicalPlan input, Pair... sorts) { return new SortOperator(input, Arrays.asList(sorts)); } diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 64c100610d..3f4752aa2e 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -1485,12 +1485,6 @@ public void fillnull_various_values() { AstDSL.field("int_null_value"), AstDSL.intLiteral(1)))))); } - @Test - void flatten() { - - // TODO #3030: Test - } - @Test public void trendline() { assertAnalyzeEqual( diff --git a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java index de80e844ee..a47027fb52 100644 --- a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java @@ -14,6 +14,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.expression.DSL.literal; import static org.opensearch.sql.expression.DSL.named; import static org.opensearch.sql.expression.DSL.ref; @@ -55,6 +56,7 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.expression.window.WindowDefinition; +import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.TrendlineOperator; import org.opensearch.sql.storage.TableScanOperator; @@ -301,8 +303,20 @@ void can_explain_trendline() { @Test void can_explain_flatten() { + String fieldName = "field_name"; + ReferenceExpression fieldReference = ref(fieldName, STRUCT); - // TODO #3030: Test + PhysicalPlan plan = new FlattenOperator(tableScan, fieldReference); + ExplainResponse actual = explain.apply(plan); + + ExplainResponse expected = + new ExplainResponse( + new ExplainResponseNode( + "FlattenOperator", + ImmutableMap.of("flattenField", fieldReference), + singletonList(tableScan.explainNode()))); + + assertEquals(expected, actual, "explain flatten"); } private static class FakeTableScan extends TableScanOperator { diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index f6c1faeab9..25a32e1b9e 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -16,6 +16,7 @@ import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.expression.DSL.literal; import static org.opensearch.sql.expression.DSL.named; import static org.opensearch.sql.expression.DSL.ref; @@ -62,6 +63,7 @@ import org.opensearch.sql.expression.window.WindowDefinition; import org.opensearch.sql.expression.window.ranking.RowNumberFunction; import org.opensearch.sql.planner.logical.LogicalCloseCursor; +import org.opensearch.sql.planner.logical.LogicalFlatten; import org.opensearch.sql.planner.logical.LogicalPaginate; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanDSL; @@ -70,6 +72,7 @@ import org.opensearch.sql.planner.logical.LogicalTrendline; import org.opensearch.sql.planner.logical.LogicalValues; import org.opensearch.sql.planner.physical.CursorCloseOperator; +import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanDSL; import org.opensearch.sql.planner.physical.ProjectOperator; @@ -329,6 +332,23 @@ public void visitTrendline_should_build_TrendlineOperator() { @Test void visitFlatten_should_build_FlattenOperator() { - // TODO #3030: Test + // Mock physical and logical plan children. + var logicalChild = mock(LogicalPlan.class); + var physicalChild = mock(PhysicalPlan.class); + when(logicalChild.accept(implementor, null)).thenReturn(physicalChild); + + // Build physical plan from logical plan. + var fieldName = "field_name"; + var logicalPlan = new LogicalFlatten(logicalChild, ref(fieldName, STRUCT)); + var implemented = logicalPlan.accept(implementor, null); + + assertInstanceOf( + FlattenOperator.class, implemented, "Visiting logical flatten builds physical flatten"); + assertEquals( + fieldName, + ((FlattenOperator) implemented).getField().getAttr(), + "Physical flatten has expected field"); + assertSame( + physicalChild, implemented.getChild().getFirst(), "Physical flatten has expected child"); } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 5b5966462c..787996f99b 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -5,12 +5,31 @@ package org.opensearch.sql.planner.logical; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; + +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.analysis.AnalyzerTestBase; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.tree.Flatten; +import org.opensearch.sql.expression.DSL; @ExtendWith(MockitoExtension.class) class LogicalFlattenTest extends AnalyzerTestBase { - // TODO #3030: Test + @Test + void testFlatten() { + String fieldName = "field_name"; + String tableName = "schema"; + + LogicalPlan expectedLogicalPlan = + LogicalPlanDSL.flatten( + LogicalPlanDSL.relation(tableName, table), DSL.ref(fieldName, STRUCT)); + + Flatten actualUnresolvedPlan = + AstDSL.flatten(AstDSL.relation(tableName), AstDSL.field(fieldName)); + + assertAnalyzeEqual(expectedLogicalPlan, actualUnresolvedPlan); + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java index 6666ea1616..b4e852f4b1 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java @@ -10,7 +10,9 @@ import static org.mockito.Mockito.mock; import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.expression.DSL.named; +import static org.opensearch.sql.expression.DSL.ref; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -135,8 +137,7 @@ public TableWriteOperator build(PhysicalPlan child) { "field", new ReferenceExpression("message.info", STRING), "path", new ReferenceExpression("message", STRING))); List projectList = - List.of( - new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null)); + List.of(new NamedExpression("message.info", DSL.nested(ref("message.info", STRING)), null)); LogicalNested nested = new LogicalNested(null, nestedArgs, projectList); @@ -152,7 +153,8 @@ public TableWriteOperator build(PhysicalPlan child) { AstDSL.computation(1, AstDSL.field("testField"), "dummy", SMA), ExprCoreType.DOUBLE))); - // TODO #3030: Test + LogicalFlatten flatten = new LogicalFlatten(relation, ref("testField", STRUCT)); + return Stream.of( relation, tableScanBuilder, @@ -176,7 +178,8 @@ public TableWriteOperator build(PhysicalPlan child) { nested, cursor, closeCursor, - trendline) + trendline, + flatten) .map(Arguments::of); } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index c62d669364..3d0f5b5418 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -5,14 +5,92 @@ package org.opensearch.sql.planner.physical; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.hasItems; +import static org.hamcrest.Matchers.iterableWithSize; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.flatten; + +import com.google.common.collect.ImmutableMap; +import java.util.List; +import java.util.Map; +import lombok.ToString; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.expression.DSL; +@ToString @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) @ExtendWith(MockitoExtension.class) -class FlattenOperatorTest { +class FlattenOperatorTest extends PhysicalPlanTestBase { + @Mock private PhysicalPlan inputPlan; + + @Test + void testFlattenStruct() { + Map structMap = + ImmutableMap.of( + "string_field", + "string_value", + "integer_field", + 1, + "long_field", + 1L, + "boolean_field", + true, + "list_field", + List.of("a", "b")); + + Map rowMap = ImmutableMap.of("struct_field", structMap); + ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); + + ExprValue expectedRowValue = ExprValueUtils.tupleValue(structMap); + + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()).thenReturn(rowValue); + + PhysicalPlan plan = flatten(inputPlan, DSL.ref("struct_field", STRUCT)); + + assertThat(execute(plan), allOf(iterableWithSize(1), hasItems(expectedRowValue))); + } + + @Test + void testFlattenStructEmpty() { + Map structMap = ImmutableMap.of(); + Map rowMap = ImmutableMap.of("struct_field", structMap); + ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); + + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()).thenReturn(rowValue); + + PhysicalPlan plan = flatten(inputPlan, DSL.ref("struct_field", STRUCT)); + + assertThat(execute(plan), allOf(iterableWithSize(1), hasItems())); + } + + @Test + void testFlattenStructNested() { + Map structMap = + ImmutableMap.of( + "nested_struct_field", ImmutableMap.of("nested_string_field", "string_value")); + Map rowMap = ImmutableMap.of("struct_field", structMap); + ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); + + Map expectedRowMap = ImmutableMap.of("nested_string_field", "string_value"); + ExprValue expectedRowValue = ExprValueUtils.tupleValue(expectedRowMap); + + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()).thenReturn(rowValue); + + PhysicalPlan plan = flatten(inputPlan, DSL.ref("struct_field", STRUCT)); - // TODO #3030: Test + assertThat(execute(plan), allOf(iterableWithSize(1), hasItems(expectedRowValue))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 96ac41ceb6..56355d1001 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -5,7 +5,122 @@ package org.opensearch.sql.ppl; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MULTI_NESTED_TYPE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_WITH_NULLS; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + public class FlattenCommandIT extends PPLIntegTestCase { - // TODO #3030: Test + @Override + public void init() throws IOException { + loadIndex(Index.NESTED_WITHOUT_ARRAYS); + loadIndex(Index.NESTED_WITH_NULLS); + loadIndex(Index.MULTI_NESTED); + } + + @Test + public void testFlattenStructBasic() throws IOException { + String query = + String.format( + "source=%s | flatten message | fields info, author, dayOfWeek", + TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS); + JSONObject result = executeQuery(query); + + verifySchema( + result, + schema("info", "string"), + schema("author", "string"), + schema("dayOfWeek", "integer")); + verifyDataRows( + result, + rows("a", "e", 1), + rows("b", "f", 2), + rows("c", "g", 1), + rows("c", "h", 4), + rows("zz", "zz", 6)); + } + + @Test + public void testFlattenStructMultiple() throws IOException { + String query = + String.format( + "source=%s | flatten message | flatten comment " + + "| fields info, author, dayOfWeek, data, likes", + TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS); + JSONObject result = executeQuery(query); + + verifySchema( + result, + schema("info", "string"), + schema("author", "string"), + schema("dayOfWeek", "integer"), + schema("data", "string"), + schema("likes", "integer")); + verifyDataRows( + result, + rows("a", "e", 1, "ab", 1), + rows("b", "f", 2, "aa", 2), + rows("c", "g", 1, "aa", 3), + rows("c", "h", 4, "ab", 1), + rows("zz", "zz", 6, "bb", 10)); + } + + @Test + public void testFlattenStructNull() throws IOException { + String query = + String.format( + "source=%s | flatten message | fields info, author, dayOfWeek", + TEST_INDEX_NESTED_WITH_NULLS); + JSONObject result = executeQuery(query); + + verifySchema( + result, + schema("info", "string"), + schema("author", "string"), + schema("dayOfWeek", "integer")); + verifyDataRows( + result, + rows("e", null, 5), + rows("c", "g", 1), + rows("c", "h", 4), + rows("zz", "zz", 6), + rows("zz", "z\"z", 6), + rows(null, "e", 7), + rows("a", "e", 1), + rows("rr", "this \"value\" contains quotes", 3), + rows(null, null, null), + rows(null, null, null)); + } + + @Test + public void testFlattenStructNested() throws IOException { + String query = + String.format( + "source=%s | flatten message | fields info, name, street, number, dayOfWeek", + TEST_INDEX_MULTI_NESTED_TYPE); + JSONObject result = executeQuery(query); + + verifySchema( + result, + schema("info", "string"), + schema("name", "string"), + schema("street", "string"), + schema("number", "integer"), + schema("dayOfWeek", "integer")); + verifyDataRows( + result, + rows("a", "e", "bc", 1, 1), + rows("b", "f", "ab", 2, 2), + rows("c", "g", "sk", 3, 1), + rows("d", "h", "mb", 4, 4), + rows("zz", "yy", "qc", 6, 6)); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java index f6e93e3445..650d324b9f 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java @@ -14,6 +14,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.expression.DSL.literal; import static org.opensearch.sql.expression.DSL.named; import static org.opensearch.sql.expression.DSL.ref; @@ -70,6 +71,7 @@ import org.opensearch.sql.opensearch.setting.OpenSearchSettings; import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScan; import org.opensearch.sql.planner.physical.CursorCloseOperator; +import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanDSL; @@ -365,8 +367,12 @@ void test_visitOpenSearchEval() { @Test void test_visitFlatten() { - - // TODO #3030: Test + FlattenOperator flattenOperator = + new FlattenOperator(values(emptyList()), ref("field_name", STRUCT)); + assertEquals( + resourceMonitor(flattenOperator), + executionProtector.visitFlatten(flattenOperator, null), + "flatten operator is protected"); } PhysicalPlan resourceMonitor(PhysicalPlan input) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 116c607944..0ca20f1dc3 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -24,6 +24,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.exprList; import static org.opensearch.sql.ast.dsl.AstDSL.field; import static org.opensearch.sql.ast.dsl.AstDSL.filter; +import static org.opensearch.sql.ast.dsl.AstDSL.flatten; import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.head; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; @@ -448,8 +449,10 @@ public void testEvalCommand() { @Test public void testFlattenCommand() { - - // TODO #3030: Test + String fieldName = "field_name"; + assertEqual( + String.format("source=t | flatten %s", fieldName), + flatten(relation("t"), field(fieldName))); } @Test diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 8adaf65fea..1f28f2e490 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -25,6 +25,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.exprList; import static org.opensearch.sql.ast.dsl.AstDSL.field; import static org.opensearch.sql.ast.dsl.AstDSL.filter; +import static org.opensearch.sql.ast.dsl.AstDSL.flatten; import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.in; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; @@ -254,8 +255,10 @@ public void testEvalIfFunctionExpr() { @Test public void testFlattenExpr() { - - // TODO #3030: Test + String fieldName = "field_name"; + assertEqual( + String.format("source=t | flatten %s", fieldName), + flatten(relation("t"), field(fieldName))); } @Test diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 2d1270a49d..fef1661165 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -98,8 +98,10 @@ public void testTrendlineCommand() { @Test public void testFlattenCommand() { - - // TODO #3030: Test + assertEquals( + "source=t | flatten field_name", + anonymize("source=t | flatten field_name"), + "Flatten command is not modified by anonymizer"); } @Test From 26e94436f767e59777c9bd934bc19138d416aca9 Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 27 Jan 2025 14:07:46 -0800 Subject: [PATCH 06/81] Update/fix tests. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 8 +- .../planner/logical/LogicalFlattenTest.java | 113 +++++++++++++++++- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 2 +- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 4 +- 4 files changed, 116 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 04b7dbd8bb..24a3178b98 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -468,8 +468,12 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { TypeEnvironment env = context.peek(); // Verify that the field type is valid. - ReferenceExpression fieldExpr = - (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); + ReferenceExpression fieldExpr; + try { + fieldExpr = (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); + } catch (SemanticCheckException e) { + throw new IllegalArgumentException("Invalid field name for flatten command", e); + } ExprType fieldType = fieldExpr.type(); if (fieldType != STRUCT) { diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 787996f99b..c0e6afc4c5 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -5,31 +5,132 @@ package org.opensearch.sql.planner.logical; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; +import java.util.HashMap; +import java.util.Map; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.analysis.AnalyzerTestBase; +import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.tree.Flatten; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; @ExtendWith(MockitoExtension.class) class LogicalFlattenTest extends AnalyzerTestBase { + private static final String TABLE_NAME = "schema"; + + @Override + protected Map typeMapping() { + Map mapping = new HashMap<>(super.typeMapping()); + + // Override mapping for testing. + mapping.put("struct_empty", STRUCT); + + mapping.put("struct_basic", STRUCT); + mapping.put("struct_basic.integer", INTEGER); + mapping.put("struct_basic.double", DOUBLE); + + mapping.put("struct_nested", STRUCT); + mapping.put("struct_nested.struct", STRUCT); + mapping.put("struct_nested.struct.string", STRING); + + mapping.put("duplicate", STRUCT); + mapping.put("duplicate.integer_value", INTEGER); + + return mapping; + } + @Test - void testFlatten() { - String fieldName = "field_name"; - String tableName = "schema"; + void testStructEmpty() { + LogicalPlan expectedLogicalPlan = + LogicalPlanDSL.flatten( + LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct_empty", STRUCT)); + LogicalPlan actualLogicalPlan = + analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct_empty"))); + assertEquals(expectedLogicalPlan, actualLogicalPlan); + + Map fieldMap = + analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); + assertFalse(fieldMap.containsKey("struct_empty")); + } + + @Test + void testStructBasic() { + LogicalPlan expectedLogicalPlan = + LogicalPlanDSL.flatten( + LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct_basic", STRUCT)); + LogicalPlan actualLogicalPlan = + analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct_basic"))); + + assertEquals(expectedLogicalPlan, actualLogicalPlan); + Map fieldMap = + analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); + assertFalse(fieldMap.containsKey("struct_basic")); + assertFalse(fieldMap.containsKey("struct_basic.integer")); + assertFalse(fieldMap.containsKey("struct_basic.double")); + assertEquals(INTEGER, fieldMap.get("integer")); + assertEquals(DOUBLE, fieldMap.get("double")); + } + + @Test + void testStructNested() { LogicalPlan expectedLogicalPlan = LogicalPlanDSL.flatten( - LogicalPlanDSL.relation(tableName, table), DSL.ref(fieldName, STRUCT)); + LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct_nested", STRUCT)); + LogicalPlan actualLogicalPlan = + analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct_nested"))); + + assertEquals(expectedLogicalPlan, actualLogicalPlan); + + Map fieldMap = + analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); + assertFalse(fieldMap.containsKey("struct_nested")); + assertFalse(fieldMap.containsKey("struct_nested.struct")); + assertEquals(STRING, fieldMap.get("string")); + } + + @Test + void testInvalidName() { + Flatten actualUnresolvedPlan = + AstDSL.flatten(AstDSL.relation("schema"), AstDSL.field("invalid")); + String msg = + assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) + .getMessage(); + assertEquals("Invalid field name for flatten command", msg); + } + + @Test + void testInvalidType() { + Flatten actualUnresolvedPlan = + AstDSL.flatten(AstDSL.relation("schema"), AstDSL.field("integer_value")); + + String actualMsg = + assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) + .getMessage(); + assertEquals("Invalid field type 'INTEGER' for flatten command", actualMsg); + } + + @Test + void testInvalidDuplicate() { Flatten actualUnresolvedPlan = - AstDSL.flatten(AstDSL.relation(tableName), AstDSL.field(fieldName)); + AstDSL.flatten(AstDSL.relation("schema"), AstDSL.field("duplicate")); - assertAnalyzeEqual(expectedLogicalPlan, actualUnresolvedPlan); + String msg = + assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) + .getMessage(); + assertEquals("Flatten command cannot overwrite field 'integer_value'", msg); } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 05701deb00..f3e6e7d878 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -196,7 +196,7 @@ public String visitEval(Eval node, String context) { @Override public String visitFlatten(Flatten node, String context) { String child = node.getChild().getFirst().accept(this, context); - Field field = node.getField(); + String field = visitExpression(node.getField()); return StringUtils.format("%s | flatten %s", child, field); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index fef1661165..4129b493b3 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -99,9 +99,9 @@ public void testTrendlineCommand() { @Test public void testFlattenCommand() { assertEquals( + "Flatten command is not modified by anonymizer", "source=t | flatten field_name", - anonymize("source=t | flatten field_name"), - "Flatten command is not modified by anonymizer"); + anonymize("source=t | flatten field_name")); } @Test From 237b69eea07999c137a0c701c35402748262a5d5 Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 31 Jan 2025 09:01:49 -0800 Subject: [PATCH 07/81] Update integration tests to align with doc tests. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 3 +- .../planner/physical/FlattenOperatorTest.java | 24 ++-- .../sql/legacy/SQLIntegTestCase.java | 6 + .../org/opensearch/sql/legacy/TestUtils.java | 5 + .../opensearch/sql/legacy/TestsConstants.java | 1 + .../opensearch/sql/ppl/FlattenCommandIT.java | 105 ++---------------- integ-test/src/test/resources/cities.json | 4 + .../indexDefinitions/cities_mapping.json | 34 ++++++ 8 files changed, 74 insertions(+), 108 deletions(-) create mode 100644 integ-test/src/test/resources/cities.json create mode 100644 integ-test/src/test/resources/indexDefinitions/cities_mapping.json diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 24a3178b98..08c13a6466 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -467,7 +467,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { LogicalPlan child = node.getChild().getFirst().accept(this, context); TypeEnvironment env = context.peek(); - // Verify that the field type is valid. + // Verify that the field name is valid. ReferenceExpression fieldExpr; try { fieldExpr = (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); @@ -475,6 +475,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { throw new IllegalArgumentException("Invalid field name for flatten command", e); } + // Verify that the field type is valid. ExprType fieldType = fieldExpr.type(); if (fieldType != STRUCT) { throw new IllegalArgumentException( diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index 3d0f5b5418..798b2c21e0 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -34,19 +34,14 @@ class FlattenOperatorTest extends PhysicalPlanTestBase { @Mock private PhysicalPlan inputPlan; @Test - void testFlattenStruct() { + void testFlattenStructBasic() { Map structMap = - ImmutableMap.of( - "string_field", - "string_value", - "integer_field", - 1, - "long_field", - 1L, - "boolean_field", - true, - "list_field", - List.of("a", "b")); + ImmutableMap.ofEntries( + Map.entry("string_field", "string_value"), + Map.entry("integer_field", 1), + Map.entry("long_field", 1L), + Map.entry("boolean_field", true), + Map.entry("list_field", List.of("a", "b"))); Map rowMap = ImmutableMap.of("struct_field", structMap); ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); @@ -77,9 +72,10 @@ void testFlattenStructEmpty() { @Test void testFlattenStructNested() { + Map nestedStructMap = + ImmutableMap.ofEntries(Map.entry("nested_string_field", "string_value")); Map structMap = - ImmutableMap.of( - "nested_struct_field", ImmutableMap.of("nested_string_field", "string_value")); + ImmutableMap.ofEntries(Map.entry("nested_struct_field", nestedStructMap)); Map rowMap = ImmutableMap.of("struct_field", structMap); ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 328e76013f..9c877c52de 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -11,6 +11,7 @@ import static org.opensearch.sql.legacy.TestUtils.getAliasIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getBankIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getBankWithNullValuesIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getCitiesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDataTypeNonnumericIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDataTypeNumericIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDateIndexMapping; @@ -759,6 +760,11 @@ public enum Index { "json", getJsonTestIndexMapping(), "src/test/resources/json_test.json"), + CITIES( + TestsConstants.TEST_INDEX_CITIES, + "cities", + getCitiesIndexMapping(), + "src/test/resources/cities.json"), DATA_TYPE_ALIAS( TestsConstants.TEST_INDEX_ALIAS, "alias", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index 27963e3787..af62936b92 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -265,6 +265,11 @@ public static String getAliasIndexMapping() { return getMappingFile(mappingFile); } + public static String getCitiesIndexMapping() { + String mappingFile = "cities.mapping.json"; + return getMappingFile(mappingFile); + } + public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 4601aadf7f..b421efc47f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -60,6 +60,7 @@ public class TestsConstants { public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; public static final String TEST_INDEX_JSON_TEST = TEST_INDEX + "_json_test"; public static final String TEST_INDEX_ALIAS = TEST_INDEX + "_alias"; + public static final String TEST_INDEX_CITIES = TEST_INDEX + "_cities"; public static final String TEST_INDEX_GEOIP = TEST_INDEX + "_geoip"; public static final String DATASOURCES = ".ql-datasources"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 56355d1001..aef5fcaf71 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -5,9 +5,7 @@ package org.opensearch.sql.ppl; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MULTI_NESTED_TYPE; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_WITH_NULLS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CITIES; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -21,106 +19,27 @@ public class FlattenCommandIT extends PPLIntegTestCase { @Override public void init() throws IOException { - loadIndex(Index.NESTED_WITHOUT_ARRAYS); - loadIndex(Index.NESTED_WITH_NULLS); - loadIndex(Index.MULTI_NESTED); + loadIndex(Index.CITIES); } @Test - public void testFlattenStructBasic() throws IOException { + public void testFlattenStruct() throws IOException { String query = String.format( - "source=%s | flatten message | fields info, author, dayOfWeek", - TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS); + "source=%s | flatten location | fields state, province, country, latitude, longitude", + TEST_INDEX_CITIES); JSONObject result = executeQuery(query); verifySchema( result, - schema("info", "string"), - schema("author", "string"), - schema("dayOfWeek", "integer")); + schema("state", "string"), + schema("province", "string"), + schema("country", "string"), + schema("latitude", "float"), + schema("longitude", "float")); verifyDataRows( result, - rows("a", "e", 1), - rows("b", "f", 2), - rows("c", "g", 1), - rows("c", "h", 4), - rows("zz", "zz", 6)); - } - - @Test - public void testFlattenStructMultiple() throws IOException { - String query = - String.format( - "source=%s | flatten message | flatten comment " - + "| fields info, author, dayOfWeek, data, likes", - TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS); - JSONObject result = executeQuery(query); - - verifySchema( - result, - schema("info", "string"), - schema("author", "string"), - schema("dayOfWeek", "integer"), - schema("data", "string"), - schema("likes", "integer")); - verifyDataRows( - result, - rows("a", "e", 1, "ab", 1), - rows("b", "f", 2, "aa", 2), - rows("c", "g", 1, "aa", 3), - rows("c", "h", 4, "ab", 1), - rows("zz", "zz", 6, "bb", 10)); - } - - @Test - public void testFlattenStructNull() throws IOException { - String query = - String.format( - "source=%s | flatten message | fields info, author, dayOfWeek", - TEST_INDEX_NESTED_WITH_NULLS); - JSONObject result = executeQuery(query); - - verifySchema( - result, - schema("info", "string"), - schema("author", "string"), - schema("dayOfWeek", "integer")); - verifyDataRows( - result, - rows("e", null, 5), - rows("c", "g", 1), - rows("c", "h", 4), - rows("zz", "zz", 6), - rows("zz", "z\"z", 6), - rows(null, "e", 7), - rows("a", "e", 1), - rows("rr", "this \"value\" contains quotes", 3), - rows(null, null, null), - rows(null, null, null)); - } - - @Test - public void testFlattenStructNested() throws IOException { - String query = - String.format( - "source=%s | flatten message | fields info, name, street, number, dayOfWeek", - TEST_INDEX_MULTI_NESTED_TYPE); - JSONObject result = executeQuery(query); - - verifySchema( - result, - schema("info", "string"), - schema("name", "string"), - schema("street", "string"), - schema("number", "integer"), - schema("dayOfWeek", "integer")); - verifyDataRows( - result, - rows("a", "e", "bc", 1, 1), - rows("b", "f", "ab", 2, 2), - rows("c", "g", "sk", 3, 1), - rows("d", "h", "mb", 4, 4), - rows("zz", "yy", "qc", 6, 6)); + rows("Washington", null, "United States", 47.6061, -122.3328), + rows(null, "British Columbia", "Canada", 49.2827, -123.1207)); } } diff --git a/integ-test/src/test/resources/cities.json b/integ-test/src/test/resources/cities.json new file mode 100644 index 0000000000..bc74a0d17d --- /dev/null +++ b/integ-test/src/test/resources/cities.json @@ -0,0 +1,4 @@ +{"index":{"_id":"1"}} +{"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}} +{"index":{"_id":"2"}} +{"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}} diff --git a/integ-test/src/test/resources/indexDefinitions/cities_mapping.json b/integ-test/src/test/resources/indexDefinitions/cities_mapping.json new file mode 100644 index 0000000000..e85047c8a8 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/cities_mapping.json @@ -0,0 +1,34 @@ +{ + "mappings": { + "properties": { + "name": { + "type": "keyword" + }, + "location": { + "type": "object", + "properties": { + "state": { + "type": "keyword" + }, + "province": { + "type": "keyword" + }, + "country": { + "type": "keyword" + }, + "coordinates": { + "type": "object", + "properties": { + "latitude": { + "type": "double" + }, + "longitude": { + "type": "double" + } + } + } + } + } + } + } +} \ No newline at end of file From 3981c3832c4e47a99e5d148e072ea6339f758caa Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 00:19:05 -0800 Subject: [PATCH 08/81] Minor cleanup. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 4 +-- .../sql/planner/DefaultImplementor.java | 2 +- .../sql/planner/logical/LogicalFlatten.java | 6 ++-- .../sql/planner/logical/LogicalPlanDSL.java | 4 +-- .../sql/planner/physical/FlattenOperator.java | 4 +-- .../sql/planner/physical/PhysicalPlanDSL.java | 4 +-- .../sql/planner/DefaultImplementorTest.java | 11 ++------ .../planner/physical/FlattenOperatorTest.java | 28 +++++++++---------- .../OpenSearchExecutionProtectorTest.java | 5 ++-- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 5 +--- 10 files changed, 32 insertions(+), 41 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 08c13a6466..724c2c12ac 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -134,7 +134,7 @@ public class Analyzer extends AbstractNodeVisitor private final BuiltinFunctionRepository repository; private static final String PATH_SEPARATOR = "."; - private static final Pattern PATH_COMPONENT_PATTERN = + private static final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(PATH_SEPARATOR, Pattern.LITERAL); /** Constructor. */ @@ -491,7 +491,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { for (java.util.Map.Entry entry : fieldsMap.entrySet()) { String path = entry.getKey(); - List pathComponents = Arrays.stream(PATH_COMPONENT_PATTERN.split(path)).toList(); + List pathComponents = Arrays.stream(PATH_SEPARATOR_PATTERN.split(path)).toList(); // Verify that path starts with the field name. if (!pathComponents.getFirst().equals(fieldName)) { diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index da3704dd4e..07ef0fd867 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -103,7 +103,7 @@ public PhysicalPlan visitEval(LogicalEval node, C context) { @Override public PhysicalPlan visitFlatten(LogicalFlatten node, C context) { - return new FlattenOperator(visitChild(node, context), node.getField()); + return new FlattenOperator(visitChild(node, context), node.getFieldRefExp()); } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java index a4b018543d..01edf0ab84 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFlatten.java @@ -16,11 +16,11 @@ @ToString @EqualsAndHashCode(callSuper = true) public class LogicalFlatten extends LogicalPlan { - private final ReferenceExpression field; + private final ReferenceExpression fieldRefExp; - public LogicalFlatten(LogicalPlan child, ReferenceExpression field) { + public LogicalFlatten(LogicalPlan child, ReferenceExpression fieldRefExp) { super(Collections.singletonList(child)); - this.field = field; + this.fieldRefExp = fieldRefExp; } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index e401feb024..1a5b569ef4 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -97,8 +97,8 @@ public static LogicalPlan eval( return new LogicalEval(input, Arrays.asList(expressions)); } - public LogicalPlan flatten(LogicalPlan input, ReferenceExpression field) { - return new LogicalFlatten(input, field); + public LogicalPlan flatten(LogicalPlan input, ReferenceExpression fieldRefExp) { + return new LogicalFlatten(input, fieldRefExp); } public static LogicalPlan sort(LogicalPlan input, Pair... sorts) { diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index f343c2cea4..91b8016911 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -53,7 +53,7 @@ public ExprValue next() { ExprValue inputExprValue = input.next(); Map fieldsMap = ExprValueUtils.getTupleValue(inputExprValue); - // Get the flattened field map. + // Build the flattened field map. String fieldName = field.getAttr(); ExprValue exprValue = fieldsMap.get(fieldName); @@ -63,7 +63,7 @@ public ExprValue next() { fieldsMap.putAll(flattenedFieldsMap); fieldsMap.remove(fieldName); - // Update environment. + // Update the environment. Environment env = inputExprValue.bindingTuples(); for (Entry entry : flattenedFieldsMap.entrySet()) { diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index 2a07ec3eee..0986469d15 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -60,8 +60,8 @@ public static EvalOperator eval( return new EvalOperator(input, Arrays.asList(expressions)); } - public FlattenOperator flatten(PhysicalPlan input, ReferenceExpression field) { - return new FlattenOperator(input, field); + public FlattenOperator flatten(PhysicalPlan input, ReferenceExpression fieldRefExp) { + return new FlattenOperator(input, fieldRefExp); } public static SortOperator sort(PhysicalPlan input, Pair... sorts) { diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 25a32e1b9e..a56a014d55 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -342,13 +342,8 @@ void visitFlatten_should_build_FlattenOperator() { var logicalPlan = new LogicalFlatten(logicalChild, ref(fieldName, STRUCT)); var implemented = logicalPlan.accept(implementor, null); - assertInstanceOf( - FlattenOperator.class, implemented, "Visiting logical flatten builds physical flatten"); - assertEquals( - fieldName, - ((FlattenOperator) implemented).getField().getAttr(), - "Physical flatten has expected field"); - assertSame( - physicalChild, implemented.getChild().getFirst(), "Physical flatten has expected child"); + assertInstanceOf(FlattenOperator.class, implemented); + assertEquals(fieldName, ((FlattenOperator) implemented).getField().getAttr()); + assertSame(physicalChild, implemented.getChild().getFirst()); } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index 798b2c21e0..df6d817cf1 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -33,6 +33,20 @@ class FlattenOperatorTest extends PhysicalPlanTestBase { @Mock private PhysicalPlan inputPlan; + @Test + void testFlattenStructEmpty() { + Map structMap = ImmutableMap.of(); + Map rowMap = ImmutableMap.of("struct_field", structMap); + ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); + + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()).thenReturn(rowValue); + + PhysicalPlan plan = flatten(inputPlan, DSL.ref("struct_field", STRUCT)); + + assertThat(execute(plan), allOf(iterableWithSize(1), hasItems())); + } + @Test void testFlattenStructBasic() { Map structMap = @@ -56,20 +70,6 @@ void testFlattenStructBasic() { assertThat(execute(plan), allOf(iterableWithSize(1), hasItems(expectedRowValue))); } - @Test - void testFlattenStructEmpty() { - Map structMap = ImmutableMap.of(); - Map rowMap = ImmutableMap.of("struct_field", structMap); - ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); - - when(inputPlan.hasNext()).thenReturn(true, false); - when(inputPlan.next()).thenReturn(rowValue); - - PhysicalPlan plan = flatten(inputPlan, DSL.ref("struct_field", STRUCT)); - - assertThat(execute(plan), allOf(iterableWithSize(1), hasItems())); - } - @Test void testFlattenStructNested() { Map nestedStructMap = diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java index 650d324b9f..99e958db1b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java @@ -369,10 +369,9 @@ void test_visitOpenSearchEval() { void test_visitFlatten() { FlattenOperator flattenOperator = new FlattenOperator(values(emptyList()), ref("field_name", STRUCT)); + assertEquals( - resourceMonitor(flattenOperator), - executionProtector.visitFlatten(flattenOperator, null), - "flatten operator is protected"); + resourceMonitor(flattenOperator), executionProtector.visitFlatten(flattenOperator, null)); } PhysicalPlan resourceMonitor(PhysicalPlan input) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 4129b493b3..c2a94e1c5f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -98,10 +98,7 @@ public void testTrendlineCommand() { @Test public void testFlattenCommand() { - assertEquals( - "Flatten command is not modified by anonymizer", - "source=t | flatten field_name", - anonymize("source=t | flatten field_name")); + assertEquals("source=t | flatten field_name", anonymize("source=t | flatten field_name")); } @Test From 2ca71942a3be6ac67caf9b60b0351681a896e22a Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 00:57:29 -0800 Subject: [PATCH 09/81] Add `ExplainIT` tests for flatten Signed-off-by: currantw --- .../org/opensearch/sql/ppl/ExplainIT.java | 9 ++++-- .../expectedOutput/ppl/explain_flatten.json | 31 +++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index dd382381ee..1ecefe8b69 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.ppl; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CITIES; import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; import com.google.common.io.Resources; @@ -19,6 +20,7 @@ public class ExplainIT extends PPLIntegTestCase { @Override public void init() throws IOException { loadIndex(Index.ACCOUNT); + loadIndex(Index.CITIES); } @Test @@ -127,9 +129,10 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { } @Test - void testFlatten() { - - // TODO #3030: Test + public void testFlatten() throws Exception { + String actual = explainQueryToString(String.format("source=%s | flatten location", TEST_INDEX_CITIES)); + String expected = loadFromFile("expectedOutput/ppl/explain_flatten.json"); + assertJsonEquals(expected, actual); } private static String loadFromFile(String filename) diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json new file mode 100644 index 0000000000..c894445fd1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json @@ -0,0 +1,31 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[name, country, province, latitude, state, longitude]" + }, + "children": [ + { + "name": "FlattenOperator", + "description": { + "flattenField": { + "attr": "location", + "paths": [ + "location" + ], + "type": "STRUCT" + } + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_cities, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + }, + "children": [] + } + ] + } + ] + } +} \ No newline at end of file From 9ddfc4a9174505ab14165b80aceebe1282ba5c62 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 03:11:38 -0800 Subject: [PATCH 10/81] Revert recursive flattening, add documentation, more test updates Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 21 +++++------ .../sql/planner/physical/FlattenOperator.java | 26 +------------- .../planner/logical/LogicalFlattenTest.java | 4 +-- .../planner/physical/FlattenOperatorTest.java | 20 ----------- docs/user/dql/metadata.rst | 3 +- docs/user/ppl/cmd/flatten.rst | 35 ++++++++++++++++++- doctest/test_data/cities.json | 2 ++ doctest/test_docs.py | 5 +-- doctest/test_mapping/cities.json | 34 ++++++++++++++++++ .../org/opensearch/sql/ppl/ExplainIT.java | 3 +- 10 files changed, 89 insertions(+), 64 deletions(-) create mode 100644 doctest/test_data/cities.json create mode 100644 doctest/test_mapping/cities.json diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 724c2c12ac..cff833954e 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -489,31 +489,28 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { java.util.Map addFieldsMap = new HashMap<>(); java.util.Map removeFieldsMap = new HashMap<>(); + removeFieldsMap.put(fieldName, fieldType); + for (java.util.Map.Entry entry : fieldsMap.entrySet()) { String path = entry.getKey(); List pathComponents = Arrays.stream(PATH_SEPARATOR_PATTERN.split(path)).toList(); // Verify that path starts with the field name. - if (!pathComponents.getFirst().equals(fieldName)) { - continue; - } - - // Remove non-leaf nodes. - ExprType type = entry.getValue(); - if (type == STRUCT) { - removeFieldsMap.put(path, STRUCT); + if (pathComponents.size() < 2 || !pathComponents.getFirst().equals(fieldName)) { continue; } - String newFieldName = pathComponents.getLast(); + String newPath = + String.join(PATH_SEPARATOR, pathComponents.subList(1, pathComponents.size())); // Verify that new field does not overwrite an existing field. - if (fieldsMap.containsKey(newFieldName)) { + if (fieldsMap.containsKey(newPath)) { throw new IllegalArgumentException( - String.format("Flatten command cannot overwrite field '%s'", newFieldName)); + String.format("Flatten command cannot overwrite field '%s'", newPath)); } - addFieldsMap.put(newFieldName, type); + ExprType type = entry.getValue(); + addFieldsMap.put(newPath, type); removeFieldsMap.put(path, type); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 91b8016911..92cf719ab2 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -5,10 +5,7 @@ package org.opensearch.sql.planner.physical; -import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; - import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -57,7 +54,7 @@ public ExprValue next() { String fieldName = field.getAttr(); ExprValue exprValue = fieldsMap.get(fieldName); - Map flattenedFieldsMap = flatten(exprValue); + Map flattenedFieldsMap = exprValue.tupleValue(); // Update field map. fieldsMap.putAll(flattenedFieldsMap); @@ -74,25 +71,4 @@ public ExprValue next() { return ExprTupleValue.fromExprValueMap(fieldsMap); } - - /** Flattens the given expression value tuple and returns the result. */ - private static Map flatten(ExprValue exprValue) { - - // Build flattened map from field name to value. - Map flattenedFieldMap = new HashMap<>(); - - for (Entry entry : exprValue.tupleValue().entrySet()) { - ExprValue entryExprValue = entry.getValue(); - - // Recursively flatten. - Map flattenedEntryMap = - (entryExprValue.type() == STRUCT) - ? flatten(entryExprValue) - : Map.of(entry.getKey(), entryExprValue); - - flattenedFieldMap.putAll(flattenedEntryMap); - } - - return flattenedFieldMap; - } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index c0e6afc4c5..373543a4df 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -97,8 +97,8 @@ void testStructNested() { Map fieldMap = analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); assertFalse(fieldMap.containsKey("struct_nested")); - assertFalse(fieldMap.containsKey("struct_nested.struct")); - assertEquals(STRING, fieldMap.get("string")); + assertEquals(STRUCT, fieldMap.get("struct")); + assertEquals(STRING, fieldMap.get("struct.string")); } @Test diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index df6d817cf1..c2e54c7e37 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -69,24 +69,4 @@ void testFlattenStructBasic() { assertThat(execute(plan), allOf(iterableWithSize(1), hasItems(expectedRowValue))); } - - @Test - void testFlattenStructNested() { - Map nestedStructMap = - ImmutableMap.ofEntries(Map.entry("nested_string_field", "string_value")); - Map structMap = - ImmutableMap.ofEntries(Map.entry("nested_struct_field", nestedStructMap)); - Map rowMap = ImmutableMap.of("struct_field", structMap); - ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); - - Map expectedRowMap = ImmutableMap.of("nested_string_field", "string_value"); - ExprValue expectedRowValue = ExprValueUtils.tupleValue(expectedRowMap); - - when(inputPlan.hasNext()).thenReturn(true, false); - when(inputPlan.next()).thenReturn(rowValue); - - PhysicalPlan plan = flatten(inputPlan, DSL.ref("struct_field", STRUCT)); - - assertThat(execute(plan), allOf(iterableWithSize(1), hasItems(expectedRowValue))); - } } diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index b059c0cded..74ce3566c3 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 11/11 + fetched rows / total rows = 12/12 +----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -44,6 +44,7 @@ SQL query:: | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | cities | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 969f283000..07ac8cdcf8 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -2,4 +2,37 @@ flatten ============= -TODO #3030: Documentation and doctests \ No newline at end of file +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + +Description +============ + +Using ``flatten`` command to flatten an `object`. New fields are added to the search results corresponding +to each of the object's fields, while the object field itself is removed from the search results. + +Syntax +============ + +`flatten field` + +* `field`: reference to the `object` field to flatten. + +Example 1: Flatten an object field +================================== + +PPL query:: + + os> source=cities | flatten location | fields name, province, state, country, coordinates + fetched rows / total rows = 2/2 + +-----------+------------------+------------+---------------+-----------------------------------------------+ + | name | province | state | country | coordinates | + +-----------+------------------+------------+---------------+-----------------------------------------------+ + | Seattle | null | Washington | United States | {'latitude': 47.6061, 'longitude': -122.3328} | + | Vancouver | British Columbia | null | Canada | {'latitude': 49.2827, 'longitude': -123.1207} | + +-----------+------------------+------------+---------------+-----------------------------------------------+ + + diff --git a/doctest/test_data/cities.json b/doctest/test_data/cities.json new file mode 100644 index 0000000000..62a96fb21a --- /dev/null +++ b/doctest/test_data/cities.json @@ -0,0 +1,2 @@ +{"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}} +{"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 906bbd65b5..50cfc1c2df 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -31,6 +31,7 @@ DATASOURCES = ".ql-datasources" WEBLOGS = "weblogs" JSON_TEST = "json_test" +CITIES = "cities" class DocTestConnection(OpenSearchConnection): @@ -125,7 +126,7 @@ def set_up_test_indices(test): load_file("datasources.json", index_name=DATASOURCES) load_file("weblogs.json", index_name=WEBLOGS) load_file("json_test.json", index_name=JSON_TEST) - + load_file("cities.json", index_name=CITIES) def load_file(filename, index_name): # Create index with the mapping if mapping file exists @@ -153,7 +154,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST], ignore_unavailable=True) + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, CITIES], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, diff --git a/doctest/test_mapping/cities.json b/doctest/test_mapping/cities.json new file mode 100644 index 0000000000..6b96246d5d --- /dev/null +++ b/doctest/test_mapping/cities.json @@ -0,0 +1,34 @@ +{ + "mappings": { + "properties": { + "name": { + "type": "keyword" + }, + "location": { + "type": "object", + "properties": { + "state": { + "type": "keyword" + }, + "province": { + "type": "keyword" + }, + "country": { + "type": "keyword" + }, + "coordinates": { + "type": "object", + "properties": { + "latitude": { + "type": "double" + }, + "longitude": { + "type": "double" + } + } + } + } + } + } + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 1ecefe8b69..acaf5d5e43 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -130,7 +130,8 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { @Test public void testFlatten() throws Exception { - String actual = explainQueryToString(String.format("source=%s | flatten location", TEST_INDEX_CITIES)); + String actual = + explainQueryToString(String.format("source=%s | flatten location", TEST_INDEX_CITIES)); String expected = loadFromFile("expectedOutput/ppl/explain_flatten.json"); assertJsonEquals(expected, actual); } From c54c1f55749e014d4a4d5ad70d1af1e815ed3e26 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 03:27:16 -0800 Subject: [PATCH 11/81] One more doctest fix Signed-off-by: currantw --- docs/user/ppl/cmd/flatten.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 07ac8cdcf8..376aeef13c 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -27,12 +27,10 @@ Example 1: Flatten an object field PPL query:: os> source=cities | flatten location | fields name, province, state, country, coordinates - fetched rows / total rows = 2/2 + fetched rows / total rows = 2/2 +-----------+------------------+------------+---------------+-----------------------------------------------+ | name | province | state | country | coordinates | - +-----------+------------------+------------+---------------+-----------------------------------------------+ + |-----------+------------------+------------+---------------+-----------------------------------------------| | Seattle | null | Washington | United States | {'latitude': 47.6061, 'longitude': -122.3328} | | Vancouver | British Columbia | null | Canada | {'latitude': 49.2827, 'longitude': -123.1207} | +-----------+------------------+------------+---------------+-----------------------------------------------+ - - From 8993e11097bb7387a16b90b2308acd1d3bcf9495 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 09:05:49 -0800 Subject: [PATCH 12/81] Fix `ExplainIT` error Signed-off-by: currantw --- .../src/test/java/org/opensearch/sql/ppl/ExplainIT.java | 4 ++-- .../test/resources/expectedOutput/ppl/explain_flatten.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index acaf5d5e43..8f6913a8a2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -130,8 +130,8 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { @Test public void testFlatten() throws Exception { - String actual = - explainQueryToString(String.format("source=%s | flatten location", TEST_INDEX_CITIES)); + String query = String.format("source=%s | flatten location", TEST_INDEX_CITIES); + String actual = explainQueryToString(query); String expected = loadFromFile("expectedOutput/ppl/explain_flatten.json"); assertJsonEquals(expected, actual); } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json index c894445fd1..b729bf415f 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json @@ -2,7 +2,7 @@ "root": { "name": "ProjectOperator", "description": { - "fields": "[name, country, province, latitude, state, longitude]" + "fields": "[name, country, province, coordinates, state]" }, "children": [ { From 288add224dec6e1fd0c20faf336b72297c8df988 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 09:06:10 -0800 Subject: [PATCH 13/81] Add additional test case to `flatten.rst` Signed-off-by: currantw --- docs/user/ppl/cmd/flatten.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 376aeef13c..a7b7d27e18 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -34,3 +34,17 @@ PPL query:: | Seattle | null | Washington | United States | {'latitude': 47.6061, 'longitude': -122.3328} | | Vancouver | British Columbia | null | Canada | {'latitude': 49.2827, 'longitude': -123.1207} | +-----------+------------------+------------+---------------+-----------------------------------------------+ + +Example 2: Flatten multiple object fields +========================================= + +PPL query:: + + os> source=cities | flatten location | flatten coordinates | fields name, province, state, country, coordinates + fetched rows / total rows = 2/2 + +-----------+------------------+------------+---------------+----------+-----------+ + | name | province | state | country | latitude | longitude | + +-----------+------------------+------------+---------------+----------+-----------+ + | Seattle | null | Washington | United States | 47.6061 | -122.3328 | + | Vancouver | British Columbia | null | Canada | 49.2827 | -123.1207 | + +-----------+------------------+------------+---------------+----------+-----------+ From eca31544cea0280e48c98319b32b1a1c24188c92 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 09:20:10 -0800 Subject: [PATCH 14/81] Fix `FlattenCommandIT`, add additional test case. Signed-off-by: currantw --- .../opensearch/sql/ppl/FlattenCommandIT.java | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index aef5fcaf71..5bc5924e0d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -12,6 +12,7 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.Map; import org.json.JSONObject; import org.junit.jupiter.api.Test; @@ -26,7 +27,36 @@ public void init() throws IOException { public void testFlattenStruct() throws IOException { String query = String.format( - "source=%s | flatten location | fields state, province, country, latitude, longitude", + "source=%s | flatten location | fields state, province, country, coordinates", + TEST_INDEX_CITIES); + JSONObject result = executeQuery(query); + + verifySchema( + result, + schema("state", "string"), + schema("province", "string"), + schema("country", "string"), + schema("coordinates", "struct")); + verifyDataRows( + result, + rows( + "Washington", + null, + "United States", + Map.of("latitude", 47.6061, "longitude", -122.3328)), + rows( + null, + "British Columbia", + "Canada", + Map.of("latitude", 49.2827, "longitude", -123.1207))); + } + + @Test + public void testFlattenStructMultiple() throws IOException { + String query = + String.format( + "source=%s | flatten location | flatten coordinates | fields state, province, country," + + " latitude, longitude", TEST_INDEX_CITIES); JSONObject result = executeQuery(query); From c89a302659ec12d56fad9b585c29cfade9c4b654 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 09:43:10 -0800 Subject: [PATCH 15/81] Fix `PhysicalPlanNodeVisitor` test coverage. Signed-off-by: currantw --- .../sql/planner/physical/PhysicalPlanNodeVisitorTest.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java index 26f288e6b6..bd67c9076d 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java @@ -17,6 +17,7 @@ import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.dedupe; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.eval; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.filter; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.flatten; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.limit; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.project; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.rareTopN; @@ -129,6 +130,8 @@ public static Stream getPhysicalPlanForTest() { PhysicalPlan eval = eval(plan, Pair.of(ref, ref)); + PhysicalPlan flatten = flatten(plan, ref); + PhysicalPlan sort = sort(plan, Pair.of(SortOption.DEFAULT_ASC, ref)); PhysicalPlan takeOrdered = takeOrdered(plan, 1, 1, Pair.of(SortOption.DEFAULT_ASC, ref)); @@ -161,6 +164,7 @@ public static Stream getPhysicalPlanForTest() { Arguments.of(window, "window"), Arguments.of(remove, "remove"), Arguments.of(eval, "eval"), + Arguments.of(flatten, "flatten"), Arguments.of(sort, "sort"), Arguments.of(takeOrdered, "takeOrdered"), Arguments.of(dedupe, "dedupe"), From 9b2e9ce19d8b541da63d31b013030e6e577e4419 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 19:21:05 -0800 Subject: [PATCH 16/81] Review: use `StringUtils.format` instead of `String.format`. Signed-off-by: currantw --- .../java/org/opensearch/sql/analysis/Analyzer.java | 10 +++++++--- .../test/java/org/opensearch/sql/ppl/ExplainIT.java | 3 ++- .../java/org/opensearch/sql/ppl/FlattenCommandIT.java | 5 +++-- .../org/opensearch/sql/ppl/parser/AstBuilderTest.java | 3 ++- .../sql/ppl/parser/AstExpressionBuilderTest.java | 6 +++--- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index cff833954e..7d4cd60092 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -74,6 +74,7 @@ import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.data.model.ExprMissingValue; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -478,8 +479,11 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // Verify that the field type is valid. ExprType fieldType = fieldExpr.type(); if (fieldType != STRUCT) { - throw new IllegalArgumentException( - String.format("Invalid field type '%s' for flatten command", fieldType)); + String msg = + StringUtils.format( + "Invalid field type '%s' for flatten command. Supported field types: '%s'.", + fieldType, STRUCT.typeName()); + throw new IllegalArgumentException(msg); } // Get fields to add and remove. @@ -506,7 +510,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // Verify that new field does not overwrite an existing field. if (fieldsMap.containsKey(newPath)) { throw new IllegalArgumentException( - String.format("Flatten command cannot overwrite field '%s'", newPath)); + StringUtils.format("Flatten command cannot overwrite field '%s'", newPath)); } ExprType type = entry.getValue(); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 8f6913a8a2..6fb30a3b26 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -14,6 +14,7 @@ import java.nio.file.Files; import java.nio.file.Paths; import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.utils.StringUtils; public class ExplainIT extends PPLIntegTestCase { @@ -130,7 +131,7 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { @Test public void testFlatten() throws Exception { - String query = String.format("source=%s | flatten location", TEST_INDEX_CITIES); + String query = StringUtils.format("source=%s | flatten location", TEST_INDEX_CITIES); String actual = explainQueryToString(query); String expected = loadFromFile("expectedOutput/ppl/explain_flatten.json"); assertJsonEquals(expected, actual); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 5bc5924e0d..b6f70f32d4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -15,6 +15,7 @@ import java.util.Map; import org.json.JSONObject; import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.utils.StringUtils; public class FlattenCommandIT extends PPLIntegTestCase { @@ -26,7 +27,7 @@ public void init() throws IOException { @Test public void testFlattenStruct() throws IOException { String query = - String.format( + StringUtils.format( "source=%s | flatten location | fields state, province, country, coordinates", TEST_INDEX_CITIES); JSONObject result = executeQuery(query); @@ -54,7 +55,7 @@ public void testFlattenStruct() throws IOException { @Test public void testFlattenStructMultiple() throws IOException { String query = - String.format( + StringUtils.format( "source=%s | flatten location | flatten coordinates | fields state, province, country," + " latitude, longitude", TEST_INDEX_CITIES); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 0ca20f1dc3..7ace9fc831 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -67,6 +67,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; public class AstBuilderTest { @@ -451,7 +452,7 @@ public void testEvalCommand() { public void testFlattenCommand() { String fieldName = "field_name"; assertEqual( - String.format("source=t | flatten %s", fieldName), + StringUtils.format("source=t | flatten %s", fieldName), flatten(relation("t"), field(fieldName))); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 1f28f2e490..ea4388b163 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -46,13 +46,13 @@ import com.google.common.collect.ImmutableMap; import java.util.Arrays; import java.util.List; -import java.util.Locale; import java.util.stream.Collectors; import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.RelevanceFieldList; +import org.opensearch.sql.common.utils.StringUtils; public class AstExpressionBuilderTest extends AstBuilderTest { @@ -257,7 +257,7 @@ public void testEvalIfFunctionExpr() { public void testFlattenExpr() { String fieldName = "field_name"; assertEqual( - String.format("source=t | flatten %s", fieldName), + StringUtils.format("source=t | flatten %s", fieldName), flatten(relation("t"), field(fieldName))); } @@ -784,7 +784,7 @@ void assertFunctionNameCouldBeId(String antlrFunctionName) { assertFalse(functionList.isEmpty()); for (String functionName : functionList) { assertEqual( - String.format(Locale.ROOT, "source=t | fields %s", functionName), + StringUtils.format("source=t | fields %s", functionName), projectWithArg(relation("t"), defaultFieldsArgs(), field(qualifiedName(functionName)))); } } From 82c8ccb32cad2735960e6c364c6683c48d0a630e Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 19:21:42 -0800 Subject: [PATCH 17/81] Fix `LogicalFlattenTest`. Signed-off-by: currantw --- .../org/opensearch/sql/planner/logical/LogicalFlattenTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 373543a4df..2e2e93140f 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -120,7 +120,7 @@ void testInvalidType() { String actualMsg = assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) .getMessage(); - assertEquals("Invalid field type 'INTEGER' for flatten command", actualMsg); + assertEquals("Invalid field type 'INTEGER' for flatten command. Supported field types: 'STRUCT'.", actualMsg); } @Test From b7d8794f519d7a2e2115ffb5a9ec7f0c35aa6606 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 28 Jan 2025 19:44:01 -0800 Subject: [PATCH 18/81] Simplify algorithm for `Analyzer`. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 15 +++++++-------- .../sql/planner/logical/LogicalFlattenTest.java | 4 +++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 7d4cd60092..1a845298dd 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -25,12 +25,12 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -135,8 +135,6 @@ public class Analyzer extends AbstractNodeVisitor private final BuiltinFunctionRepository repository; private static final String PATH_SEPARATOR = "."; - private static final Pattern PATH_SEPARATOR_PATTERN = - Pattern.compile(PATH_SEPARATOR, Pattern.LITERAL); /** Constructor. */ public Analyzer( @@ -495,17 +493,18 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { removeFieldsMap.put(fieldName, fieldType); + final Pattern fieldNamePathPattern = + Pattern.compile(fieldName + PATH_SEPARATOR, Pattern.LITERAL); + for (java.util.Map.Entry entry : fieldsMap.entrySet()) { String path = entry.getKey(); - List pathComponents = Arrays.stream(PATH_SEPARATOR_PATTERN.split(path)).toList(); - // Verify that path starts with the field name. - if (pathComponents.size() < 2 || !pathComponents.getFirst().equals(fieldName)) { + Matcher fieldNamePathMatcher = fieldNamePathPattern.matcher(path); + if (!fieldNamePathMatcher.find() || fieldNamePathMatcher.hitEnd()) { continue; } - String newPath = - String.join(PATH_SEPARATOR, pathComponents.subList(1, pathComponents.size())); + String newPath = path.substring(fieldNamePathMatcher.end()); // Verify that new field does not overwrite an existing field. if (fieldsMap.containsKey(newPath)) { diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 2e2e93140f..2c87ca39fb 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -120,7 +120,9 @@ void testInvalidType() { String actualMsg = assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) .getMessage(); - assertEquals("Invalid field type 'INTEGER' for flatten command. Supported field types: 'STRUCT'.", actualMsg); + assertEquals( + "Invalid field type 'INTEGER' for flatten command. Supported field types: 'STRUCT'.", + actualMsg); } @Test From ca013ef22401123602bc1a2e49ae15f407d04f01 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 30 Jan 2025 08:48:08 -0800 Subject: [PATCH 19/81] Update to support flattening nested structs. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 99 +++++++++++++------ .../sql/planner/physical/FlattenOperator.java | 43 ++++---- .../storage/bindingtuple/BindingTuple.java | 2 +- .../planner/logical/LogicalFlattenTest.java | 75 +++++++++----- .../planner/physical/FlattenOperatorTest.java | 83 ++++++++++------ docs/user/ppl/cmd/flatten.rst | 23 ++++- 6 files changed, 216 insertions(+), 109 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 1a845298dd..355eb265eb 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -28,10 +28,9 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Optional; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -43,7 +42,6 @@ import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Let; import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.expression.Map; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.UnresolvedExpression; @@ -286,7 +284,7 @@ public LogicalPlan visitRename(Rename node, AnalysisContext context) { LogicalPlan child = node.getChild().get(0).accept(this, context); ImmutableMap.Builder renameMapBuilder = new ImmutableMap.Builder<>(); - for (Map renameMap : node.getRenameList()) { + for (org.opensearch.sql.ast.expression.Map renameMap : node.getRenameList()) { Expression origin = expressionAnalyzer.analyze(renameMap.getOrigin(), context); // We should define the new target field in the context instead of analyze it. if (renameMap.getTarget() instanceof Field) { @@ -461,10 +459,13 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { * Builds and returns a {@link org.opensearch.sql.planner.logical.LogicalFlatten} corresponding to * the given flatten node. */ + @SuppressWarnings("NonConstantStringShouldBeStringBuffer") @Override public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { LogicalPlan child = node.getChild().getFirst().accept(this, context); - TypeEnvironment env = context.peek(); + + // [A] Get field name and type + // --------------------------- // Verify that the field name is valid. ReferenceExpression fieldExpr; @@ -474,8 +475,10 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { throw new IllegalArgumentException("Invalid field name for flatten command", e); } - // Verify that the field type is valid. + String fieldName = fieldExpr.getAttr(); ExprType fieldType = fieldExpr.type(); + + // Verify that the field type is valid. if (fieldType != STRUCT) { String msg = StringUtils.format( @@ -484,43 +487,83 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { throw new IllegalArgumentException(msg); } - // Get fields to add and remove. - String fieldName = fieldExpr.getAttr(); - java.util.Map fieldsMap = env.lookupAllTupleFields(Namespace.FIELD_NAME); - - java.util.Map addFieldsMap = new HashMap<>(); - java.util.Map removeFieldsMap = new HashMap<>(); - + // [B] Get fields to add and remove + // -------------------------------- + + // Iterate over all the fields defined in the type environment. Find all those that are + // descended from field that is being flattened. Determine the new path to add and remove the + // existing path. When determining the new path, we need to preserve the portion of the + // path corresponding to the flattened field's parent, if one exists, in order to support + // flattening nested structs - see example below. + // + // Input Data: + // + // { struct: { + // integer: 0, + // nested_struct: { + // string: "value" }}} + // + // Example 1: 'flatten struct' + // + // { integer: 0, + // nested_struct: { + // string: "value" }} + // + // Example 2: 'flatten nested_struct' + // + // { struct: { + // integer: 0, + // string: "value" }} + + Map addFieldsMap = new HashMap<>(); + Map removeFieldsMap = new HashMap<>(); removeFieldsMap.put(fieldName, fieldType); - final Pattern fieldNamePathPattern = - Pattern.compile(fieldName + PATH_SEPARATOR, Pattern.LITERAL); + TypeEnvironment env = context.peek(); + Map fieldsMap = env.lookupAllTupleFields(Namespace.FIELD_NAME); + + final String fieldDescendantPath = fieldName + PATH_SEPARATOR; + final Optional fieldParentPath = + fieldName.contains(PATH_SEPARATOR) + ? Optional.of(fieldName.substring(0, fieldName.lastIndexOf(PATH_SEPARATOR))) + : Optional.empty(); - for (java.util.Map.Entry entry : fieldsMap.entrySet()) { - String path = entry.getKey(); + for (String path : fieldsMap.keySet()) { - Matcher fieldNamePathMatcher = fieldNamePathPattern.matcher(path); - if (!fieldNamePathMatcher.find() || fieldNamePathMatcher.hitEnd()) { + // Verify that the path is descended from the flattened field. + if (!path.startsWith(fieldDescendantPath)) { continue; } - String newPath = path.substring(fieldNamePathMatcher.end()); - - // Verify that new field does not overwrite an existing field. - if (fieldsMap.containsKey(newPath)) { - throw new IllegalArgumentException( - StringUtils.format("Flatten command cannot overwrite field '%s'", newPath)); + // Build the new path. + String newPath = path.substring(fieldDescendantPath.length()); + if (fieldParentPath.isPresent()) { + newPath = fieldParentPath.get() + PATH_SEPARATOR + newPath; } - ExprType type = entry.getValue(); + ExprType type = fieldsMap.get(path); addFieldsMap.put(newPath, type); removeFieldsMap.put(path, type); } - // Update environment. - addFieldsMap.forEach((name, type) -> env.define(DSL.ref(name, type))); + // [C] Update environment + // ---------------------- + removeFieldsMap.forEach((name, type) -> env.remove(DSL.ref(name, type))); + for (Map.Entry entry : addFieldsMap.entrySet()) { + String name = entry.getKey(); + ExprType type = entry.getValue(); + + // Verify that new field does not overwrite an existing field. + if (fieldsMap.containsKey(name)) { + throw new IllegalArgumentException( + StringUtils.format("Flatten command cannot overwrite field '%s'", name)); + } + + env.define(DSL.ref(name, type)); + } + return new LogicalFlatten(child, DSL.ref(fieldName, STRUCT)); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 92cf719ab2..d3ff7c9739 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -8,7 +8,8 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -16,9 +17,7 @@ import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; -import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.ReferenceExpression; -import org.opensearch.sql.expression.env.Environment; @Getter @ToString @@ -29,6 +28,10 @@ public class FlattenOperator extends PhysicalPlan { private final PhysicalPlan input; private final ReferenceExpression field; + private static final String PATH_SEPARATOR = "."; + private static final Pattern PATH_SEPARATOR_PATTERN = + Pattern.compile(PATH_SEPARATOR, Pattern.LITERAL); + @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { return visitor.visitFlatten(this, context); @@ -46,29 +49,27 @@ public boolean hasNext() { @Override public ExprValue next() { + return flattenExprValueAtPath(input.next(), field.getAttr()); + } - ExprValue inputExprValue = input.next(); - Map fieldsMap = ExprValueUtils.getTupleValue(inputExprValue); - - // Build the flattened field map. - String fieldName = field.getAttr(); - ExprValue exprValue = fieldsMap.get(fieldName); - - Map flattenedFieldsMap = exprValue.tupleValue(); + /** Flattens the value at the specified path and returns the result. */ + private static ExprValue flattenExprValueAtPath(ExprValue value, String path) { - // Update field map. - fieldsMap.putAll(flattenedFieldsMap); - fieldsMap.remove(fieldName); + Matcher matcher = PATH_SEPARATOR_PATTERN.matcher(path); + Map exprValueMap = ExprValueUtils.getTupleValue(value); - // Update the environment. - Environment env = inputExprValue.bindingTuples(); + if (matcher.find()) { + String currentPathComponent = path.substring(0, matcher.start()); + String remainingPath = path.substring(matcher.end()); - for (Entry entry : flattenedFieldsMap.entrySet()) { - ExprValue fieldValue = entry.getValue(); - Expression fieldRefExp = new ReferenceExpression(entry.getKey(), fieldValue.type()); - Environment.extendEnv(env, fieldRefExp, fieldValue); + ExprValue flattenedExprValue = + flattenExprValueAtPath(exprValueMap.get(currentPathComponent), remainingPath); + exprValueMap.put(currentPathComponent, flattenedExprValue); + } else { + exprValueMap.putAll(ExprValueUtils.getTupleValue(exprValueMap.get(path))); + exprValueMap.remove(path); } - return ExprTupleValue.fromExprValueMap(fieldsMap); + return ExprTupleValue.fromExprValueMap(exprValueMap); } } diff --git a/core/src/main/java/org/opensearch/sql/storage/bindingtuple/BindingTuple.java b/core/src/main/java/org/opensearch/sql/storage/bindingtuple/BindingTuple.java index c5c12584fd..634273f862 100644 --- a/core/src/main/java/org/opensearch/sql/storage/bindingtuple/BindingTuple.java +++ b/core/src/main/java/org/opensearch/sql/storage/bindingtuple/BindingTuple.java @@ -13,7 +13,7 @@ import org.opensearch.sql.expression.env.Environment; /** - * BindingTuple represents the a relationship between bindingName and ExprValue. e.g. The operation + * BindingTuple represents the relationship between bindingName and ExprValue. e.g. The operation * output column name is bindingName, the value is the ExprValue. */ public abstract class BindingTuple implements Environment { diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 2c87ca39fb..03e587e79d 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -8,6 +8,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -37,13 +38,11 @@ protected Map typeMapping() { // Override mapping for testing. mapping.put("struct_empty", STRUCT); - mapping.put("struct_basic", STRUCT); - mapping.put("struct_basic.integer", INTEGER); - mapping.put("struct_basic.double", DOUBLE); - - mapping.put("struct_nested", STRUCT); - mapping.put("struct_nested.struct", STRUCT); - mapping.put("struct_nested.struct.string", STRING); + mapping.put("struct", STRUCT); + mapping.put("struct.integer", INTEGER); + mapping.put("struct.double", DOUBLE); + mapping.put("struct.nested_struct", STRUCT); + mapping.put("struct.nested_struct.string", STRING); mapping.put("duplicate", STRUCT); mapping.put("duplicate.integer_value", INTEGER); @@ -58,47 +57,51 @@ void testStructEmpty() { LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct_empty", STRUCT)); LogicalPlan actualLogicalPlan = analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct_empty"))); + assertEquals(expectedLogicalPlan, actualLogicalPlan); - Map fieldMap = - analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); - assertFalse(fieldMap.containsKey("struct_empty")); + assertTypeNotDefined("struct_empty"); } @Test - void testStructBasic() { + void testStruct() { LogicalPlan expectedLogicalPlan = LogicalPlanDSL.flatten( - LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct_basic", STRUCT)); + LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct", STRUCT)); LogicalPlan actualLogicalPlan = - analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct_basic"))); + analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct"))); assertEquals(expectedLogicalPlan, actualLogicalPlan); - Map fieldMap = - analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); - assertFalse(fieldMap.containsKey("struct_basic")); - assertFalse(fieldMap.containsKey("struct_basic.integer")); - assertFalse(fieldMap.containsKey("struct_basic.double")); - assertEquals(INTEGER, fieldMap.get("integer")); - assertEquals(DOUBLE, fieldMap.get("double")); + assertTypeNotDefined("struct"); + assertTypeNotDefined("struct.integer"); + assertTypeNotDefined("struct.double"); + assertTypeNotDefined("struct.nested_struct"); + assertTypeNotDefined("struct.nested_struct.string"); + + assertTypeDefined("integer", INTEGER); + assertTypeDefined("double", DOUBLE); + assertTypeDefined("nested_struct", STRUCT); + assertTypeDefined("nested_struct.string", STRING); } @Test void testStructNested() { LogicalPlan expectedLogicalPlan = LogicalPlanDSL.flatten( - LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct_nested", STRUCT)); + LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct.nested_struct", STRUCT)); LogicalPlan actualLogicalPlan = - analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct_nested"))); + analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct.nested_struct"))); assertEquals(expectedLogicalPlan, actualLogicalPlan); - Map fieldMap = - analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); - assertFalse(fieldMap.containsKey("struct_nested")); - assertEquals(STRUCT, fieldMap.get("struct")); - assertEquals(STRING, fieldMap.get("struct.string")); + assertTypeNotDefined("struct.nested_struct"); + assertTypeNotDefined("struct.nested_struct.string"); + + assertTypeDefined("struct", STRUCT); + assertTypeDefined("struct.integer", INTEGER); + assertTypeDefined("struct.double", DOUBLE); + assertTypeDefined("struct.string", STRING); } @Test @@ -135,4 +138,22 @@ void testInvalidDuplicate() { .getMessage(); assertEquals("Flatten command cannot overwrite field 'integer_value'", msg); } + + /** Asserts that the given field name is not defined in the type environment */ + private void assertTypeNotDefined(String fieldName) { + Map fieldsMap = + analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); + assertFalse(fieldsMap.containsKey(fieldName)); + } + + /** + * Asserts that the given field name is defined in the type environment and corresponds to the + * given type. + */ + private void assertTypeDefined(String fieldName, ExprType fieldType) { + Map fieldsMap = + analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); + assertTrue(fieldsMap.containsKey(fieldName)); + assertEquals(fieldType, fieldsMap.get(fieldName)); + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index c2e54c7e37..52f2bbe0ce 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -5,18 +5,14 @@ package org.opensearch.sql.planner.physical; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.allOf; -import static org.hamcrest.Matchers.hasItems; -import static org.hamcrest.Matchers.iterableWithSize; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.when; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.flatten; -import com.google.common.collect.ImmutableMap; -import java.util.List; import java.util.Map; import lombok.ToString; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; @@ -33,40 +29,71 @@ class FlattenOperatorTest extends PhysicalPlanTestBase { @Mock private PhysicalPlan inputPlan; - @Test - void testFlattenStructEmpty() { - Map structMap = ImmutableMap.of(); - Map rowMap = ImmutableMap.of("struct_field", structMap); - ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); + @BeforeEach + void setup() { + ExprValue rowValue = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", Map.of()), + Map.entry( + "struct", + Map.ofEntries( + Map.entry("integer", 0), + Map.entry("double", 0), + Map.entry("struct_nested", Map.of("string", "value")))))); when(inputPlan.hasNext()).thenReturn(true, false); when(inputPlan.next()).thenReturn(rowValue); + } + + @Test + void testStructEmpty() { + ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_empty", STRUCT))).getFirst(); - PhysicalPlan plan = flatten(inputPlan, DSL.ref("struct_field", STRUCT)); + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry( + "struct", + Map.ofEntries( + Map.entry("integer", 0), + Map.entry("double", 0), + Map.entry("struct_nested", Map.of("string", "value")))))); - assertThat(execute(plan), allOf(iterableWithSize(1), hasItems())); + assertEquals(expected, actual); } @Test - void testFlattenStructBasic() { - Map structMap = - ImmutableMap.ofEntries( - Map.entry("string_field", "string_value"), - Map.entry("integer_field", 1), - Map.entry("long_field", 1L), - Map.entry("boolean_field", true), - Map.entry("list_field", List.of("a", "b"))); + void testStruct() { + ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct", STRUCT))).getFirst(); - Map rowMap = ImmutableMap.of("struct_field", structMap); - ExprValue rowValue = ExprValueUtils.tupleValue(rowMap); + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", Map.of()), + Map.entry("integer", 0), + Map.entry("double", 0), + Map.entry("struct_nested", Map.of("string", "value")))); - ExprValue expectedRowValue = ExprValueUtils.tupleValue(structMap); + assertEquals(expected, actual); + } - when(inputPlan.hasNext()).thenReturn(true, false); - when(inputPlan.next()).thenReturn(rowValue); + @Test + void testStructNested() { + ExprValue actual = + execute(flatten(inputPlan, DSL.ref("struct.struct_nested", STRUCT))).getFirst(); - PhysicalPlan plan = flatten(inputPlan, DSL.ref("struct_field", STRUCT)); + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", Map.of()), + Map.entry( + "struct", + Map.ofEntries( + Map.entry("integer", 0), + Map.entry("double", 0), + Map.entry("string", "value"))))); - assertThat(execute(plan), allOf(iterableWithSize(1), hasItems(expectedRowValue))); + assertEquals(expected, actual); } } diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index a7b7d27e18..059ef7a90b 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -27,10 +27,10 @@ Example 1: Flatten an object field PPL query:: os> source=cities | flatten location | fields name, province, state, country, coordinates - fetched rows / total rows = 2/2 + fetched rows / total rows = 2/2 +-----------+------------------+------------+---------------+-----------------------------------------------+ | name | province | state | country | coordinates | - |-----------+------------------+------------+---------------+-----------------------------------------------| + +-----------+------------------+------------+---------------+-----------------------------------------------+ | Seattle | null | Washington | United States | {'latitude': 47.6061, 'longitude': -122.3328} | | Vancouver | British Columbia | null | Canada | {'latitude': 49.2827, 'longitude': -123.1207} | +-----------+------------------+------------+---------------+-----------------------------------------------+ @@ -40,11 +40,26 @@ Example 2: Flatten multiple object fields PPL query:: - os> source=cities | flatten location | flatten coordinates | fields name, province, state, country, coordinates - fetched rows / total rows = 2/2 + os> source=cities | flatten location | flatten coordinates | fields name, province, state, country, latitude, longitude + fetched rows / total rows = 2/2 +-----------+------------------+------------+---------------+----------+-----------+ | name | province | state | country | latitude | longitude | +-----------+------------------+------------+---------------+----------+-----------+ | Seattle | null | Washington | United States | 47.6061 | -122.3328 | | Vancouver | British Columbia | null | Canada | 49.2827 | -123.1207 | +-----------+------------------+------------+---------------+----------+-----------+ + +Example 3: Flatten a nested object field +======================================== + +PPL query:: + + os> source=cities | flatten location.coordinates | fields name, location + fetched rows / total rows = 2/2 + +-----------+----------------------------------------------------------------------------------------------------+ + | name | location | + +-----------+----------------------------------------------------------------------------------------------------+ + | Seattle | {'state': 'Washington', 'country': 'United States', 'latitude': 47.6061, 'longitude': -122.3328} | + | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'latitude': 49.2827, 'longitude': -123.1207} | + +-----------+----------------------------------------------------------------------------------------------------+ + From 7920bd8afe188af6aa96e70e8987ecdea61eccbf Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 30 Jan 2025 08:48:27 -0800 Subject: [PATCH 20/81] Fix unrelated bug in `IPFUnctionsTest`. Signed-off-by: currantw --- .../sql/expression/ip/IPFunctionsTest.java | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java index a74bbda3a1..693c6eb0cd 100644 --- a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.when; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; @@ -25,7 +26,7 @@ import org.opensearch.sql.expression.env.Environment; @ExtendWith(MockitoExtension.class) -public class IPFunctionsTest { +class IPFunctionsTest { // IP range and address constants for testing. private static final ExprValue IPv4Range = ExprValueUtils.stringValue("198.51.100.0/24"); @@ -48,19 +49,27 @@ public class IPFunctionsTest { @Mock private Environment env; @Test - public void cidrmatch_invalid_arguments() { - assertThrows( - SemanticCheckException.class, - () -> execute(ExprValueUtils.ipValue("INVALID"), IPv4Range), - "IP address string 'INVALID' is not valid. Error details: .*"); - assertThrows( - SemanticCheckException.class, - () -> execute(IPv4AddressWithin, ExprValueUtils.stringValue("INVALID")), - "IP address range string 'INVALID' is not valid. Error details: .*"); + void cidrmatch_invalid_arguments() { + Exception ex; + + ex = + assertThrows( + SemanticCheckException.class, + () -> execute(ExprValueUtils.ipValue("INVALID"), IPv4Range)); + assertTrue( + ex.getMessage().matches("IP address string 'INVALID' is not valid. Error details: .*")); + + ex = + assertThrows( + SemanticCheckException.class, + () -> execute(IPv4AddressWithin, ExprValueUtils.stringValue("INVALID"))); + assertTrue( + ex.getMessage() + .matches("IP address range string 'INVALID' is not valid. Error details: .*")); } @Test - public void cidrmatch_valid_arguments() { + void cidrmatch_valid_arguments() { assertEquals(LITERAL_FALSE, execute(IPv4AddressBelow, IPv4Range)); assertEquals(LITERAL_TRUE, execute(IPv4AddressWithin, IPv4Range)); From 9d6459f49a42b856bf88f16c763c47b87a978476 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 30 Jan 2025 08:53:59 -0800 Subject: [PATCH 21/81] Update `IPFUnctionsTest` to anchor at start. Signed-off-by: currantw --- .../org/opensearch/sql/expression/ip/IPFunctionsTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java index 693c6eb0cd..95211d004b 100644 --- a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java @@ -13,6 +13,7 @@ import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static org.opensearch.sql.data.type.ExprCoreType.IP; +import lombok.ToString; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -25,6 +26,7 @@ import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.env.Environment; +@ToString @ExtendWith(MockitoExtension.class) class IPFunctionsTest { @@ -57,7 +59,7 @@ void cidrmatch_invalid_arguments() { SemanticCheckException.class, () -> execute(ExprValueUtils.ipValue("INVALID"), IPv4Range)); assertTrue( - ex.getMessage().matches("IP address string 'INVALID' is not valid. Error details: .*")); + ex.getMessage().matches("^IP address string 'INVALID' is not valid. Error details: .*")); ex = assertThrows( @@ -65,7 +67,7 @@ void cidrmatch_invalid_arguments() { () -> execute(IPv4AddressWithin, ExprValueUtils.stringValue("INVALID"))); assertTrue( ex.getMessage() - .matches("IP address range string 'INVALID' is not valid. Error details: .*")); + .matches("^IP address range string 'INVALID' is not valid. Error details: .*")); } @Test From 6d040ebabcab7cac8ca1b5bea5350bf8848323f7 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 30 Jan 2025 10:03:30 -0800 Subject: [PATCH 22/81] Minor cleanup. Signed-off-by: currantw --- core/src/main/java/org/opensearch/sql/analysis/Analyzer.java | 4 ++-- .../org/opensearch/sql/planner/physical/FlattenOperator.java | 4 +--- .../opensearch/sql/planner/logical/LogicalFlattenTest.java | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 355eb265eb..3719d95c30 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -482,8 +482,8 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { if (fieldType != STRUCT) { String msg = StringUtils.format( - "Invalid field type '%s' for flatten command. Supported field types: '%s'.", - fieldType, STRUCT.typeName()); + "Invalid field type for flatten command. Expected '%s' but got '%s'.", + STRUCT.typeName(), fieldType); throw new IllegalArgumentException(msg); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index d3ff7c9739..c5bd7530ac 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -28,9 +28,7 @@ public class FlattenOperator extends PhysicalPlan { private final PhysicalPlan input; private final ReferenceExpression field; - private static final String PATH_SEPARATOR = "."; - private static final Pattern PATH_SEPARATOR_PATTERN = - Pattern.compile(PATH_SEPARATOR, Pattern.LITERAL); + private static final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 03e587e79d..068091388f 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -124,8 +124,7 @@ void testInvalidType() { assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) .getMessage(); assertEquals( - "Invalid field type 'INTEGER' for flatten command. Supported field types: 'STRUCT'.", - actualMsg); + "Invalid field type for flatten command. Expected 'STRUCT' but got 'INTEGER'.", actualMsg); } @Test From 43c0902052f288612ecfd91271aec6f8d23189e8 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 30 Jan 2025 11:19:31 -0800 Subject: [PATCH 23/81] Fix doctest formatting. Signed-off-by: currantw --- docs/user/ppl/cmd/flatten.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 059ef7a90b..da8739d2cb 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -30,7 +30,7 @@ PPL query:: fetched rows / total rows = 2/2 +-----------+------------------+------------+---------------+-----------------------------------------------+ | name | province | state | country | coordinates | - +-----------+------------------+------------+---------------+-----------------------------------------------+ + |-----------+------------------+------------+---------------+-----------------------------------------------| | Seattle | null | Washington | United States | {'latitude': 47.6061, 'longitude': -122.3328} | | Vancouver | British Columbia | null | Canada | {'latitude': 49.2827, 'longitude': -123.1207} | +-----------+------------------+------------+---------------+-----------------------------------------------+ @@ -44,7 +44,7 @@ PPL query:: fetched rows / total rows = 2/2 +-----------+------------------+------------+---------------+----------+-----------+ | name | province | state | country | latitude | longitude | - +-----------+------------------+------------+---------------+----------+-----------+ + |-----------+------------------+------------+---------------+----------+-----------| | Seattle | null | Washington | United States | 47.6061 | -122.3328 | | Vancouver | British Columbia | null | Canada | 49.2827 | -123.1207 | +-----------+------------------+------------+---------------+----------+-----------+ @@ -58,7 +58,7 @@ PPL query:: fetched rows / total rows = 2/2 +-----------+----------------------------------------------------------------------------------------------------+ | name | location | - +-----------+----------------------------------------------------------------------------------------------------+ + |-----------+----------------------------------------------------------------------------------------------------| | Seattle | {'state': 'Washington', 'country': 'United States', 'latitude': 47.6061, 'longitude': -122.3328} | | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'latitude': 49.2827, 'longitude': -123.1207} | +-----------+----------------------------------------------------------------------------------------------------+ From 40362bf68c613fe01e52ca4a18622a004e5f98b7 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 30 Jan 2025 11:32:05 -0800 Subject: [PATCH 24/81] Address minor review comments. Signed-off-by: currantw --- .../java/org/opensearch/sql/analysis/Analyzer.java | 12 +++++------- .../sql/planner/DefaultImplementorTest.java | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 3719d95c30..6e008e6fd3 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -132,8 +132,6 @@ public class Analyzer extends AbstractNodeVisitor private final BuiltinFunctionRepository repository; - private static final String PATH_SEPARATOR = "."; - /** Constructor. */ public Analyzer( ExpressionAnalyzer expressionAnalyzer, @@ -459,7 +457,6 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { * Builds and returns a {@link org.opensearch.sql.planner.logical.LogicalFlatten} corresponding to * the given flatten node. */ - @SuppressWarnings("NonConstantStringShouldBeStringBuffer") @Override public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { LogicalPlan child = node.getChild().getFirst().accept(this, context); @@ -522,10 +519,11 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { TypeEnvironment env = context.peek(); Map fieldsMap = env.lookupAllTupleFields(Namespace.FIELD_NAME); - final String fieldDescendantPath = fieldName + PATH_SEPARATOR; + final String pathSeparator = "."; + final String fieldDescendantPath = fieldName + pathSeparator; final Optional fieldParentPath = - fieldName.contains(PATH_SEPARATOR) - ? Optional.of(fieldName.substring(0, fieldName.lastIndexOf(PATH_SEPARATOR))) + fieldName.contains(pathSeparator) + ? Optional.of(fieldName.substring(0, fieldName.lastIndexOf(pathSeparator))) : Optional.empty(); for (String path : fieldsMap.keySet()) { @@ -538,7 +536,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // Build the new path. String newPath = path.substring(fieldDescendantPath.length()); if (fieldParentPath.isPresent()) { - newPath = fieldParentPath.get() + PATH_SEPARATOR + newPath; + newPath = fieldParentPath.get() + pathSeparator + newPath; } ExprType type = fieldsMap.get(path); diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index a56a014d55..7fe3a473d9 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -330,7 +330,7 @@ public void visitTrendline_should_build_TrendlineOperator() { } @Test - void visitFlatten_should_build_FlattenOperator() { + public void visitFlatten_should_build_FlattenOperator() { // Mock physical and logical plan children. var logicalChild = mock(LogicalPlan.class); From b0a671000241ad4cc5bca28dfb0e690188c8f5e1 Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 31 Jan 2025 08:27:18 -0800 Subject: [PATCH 25/81] Fix doc tests. Signed-off-by: currantw --- .../sql/planner/physical/FlattenOperator.java | 39 ++++- .../planner/physical/FlattenOperatorTest.java | 159 +++++++++++++++--- docs/user/ppl/cmd/flatten.rst | 57 ++++--- doctest/test_data/cities.json | 2 + 4 files changed, 201 insertions(+), 56 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index c5bd7530ac..7a771289ee 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -50,24 +50,51 @@ public ExprValue next() { return flattenExprValueAtPath(input.next(), field.getAttr()); } - /** Flattens the value at the specified path and returns the result. */ - private static ExprValue flattenExprValueAtPath(ExprValue value, String path) { + /** + * Flattens the {@link ExprTupleValue} at the specified path and returns the update value.. If the + * value is null or missing, the unmodified value is returned.. + */ + private static ExprValue flattenExprValueAtPath(ExprValue exprValue, String path) { Matcher matcher = PATH_SEPARATOR_PATTERN.matcher(path); - Map exprValueMap = ExprValueUtils.getTupleValue(value); + Map exprValueMap = ExprValueUtils.getTupleValue(exprValue); + + // [A] Flatten nested struct value + // ------------------------------- if (matcher.find()) { String currentPathComponent = path.substring(0, matcher.start()); String remainingPath = path.substring(matcher.end()); + if (!exprValueMap.containsKey(currentPathComponent)) { + return exprValue; + } + + ExprValue childExprValue = exprValueMap.get(currentPathComponent); + if (childExprValue.isNull() || childExprValue.isMissing()) { + return exprValue; + } + ExprValue flattenedExprValue = flattenExprValueAtPath(exprValueMap.get(currentPathComponent), remainingPath); exprValueMap.put(currentPathComponent, flattenedExprValue); - } else { - exprValueMap.putAll(ExprValueUtils.getTupleValue(exprValueMap.get(path))); - exprValueMap.remove(path); + return ExprTupleValue.fromExprValueMap(exprValueMap); } + // [B] Flatten child struct value + // ------------------------------ + + if (!exprValueMap.containsKey(path)) { + return exprValue; + } + + ExprValue childExprValue = exprValueMap.get(path); + if (!childExprValue.isNull() && !childExprValue.isMissing()) { + exprValueMap.putAll(ExprValueUtils.getTupleValue(childExprValue)); + } + + exprValueMap.remove(path); + return ExprTupleValue.fromExprValueMap(exprValueMap); } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index 52f2bbe0ce..bc3c8b6015 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -29,19 +29,37 @@ class FlattenOperatorTest extends PhysicalPlanTestBase { @Mock private PhysicalPlan inputPlan; + // Define input values for testing. + private final ExprValue doubleExprValue = ExprValueUtils.integerValue(0); + private final ExprValue integerExprValue = ExprValueUtils.doubleValue(0.0); + private final ExprValue stringExprValue = ExprValueUtils.stringValue("value"); + + private final ExprValue structEmptyExprValue = ExprValueUtils.tupleValue(Map.of()); + private final ExprValue structNullExprValue = ExprValueUtils.nullValue(); + private final ExprValue structMissingExprValue = ExprValueUtils.missingValue(); + + private final ExprValue structNestedExprValue = + ExprValueUtils.tupleValue(Map.of("string", stringExprValue)); + + private final ExprValue structExprValue = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("integer", integerExprValue), + Map.entry("double", doubleExprValue), + Map.entry("struct_nested", structNestedExprValue))); + + private final ExprValue rowValue = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry("struct", structExprValue))); + @BeforeEach void setup() { - ExprValue rowValue = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("struct_empty", Map.of()), - Map.entry( - "struct", - Map.ofEntries( - Map.entry("integer", 0), - Map.entry("double", 0), - Map.entry("struct_nested", Map.of("string", "value")))))); + // Mock input values. when(inputPlan.hasNext()).thenReturn(true, false); when(inputPlan.next()).thenReturn(rowValue); } @@ -53,12 +71,37 @@ void testStructEmpty() { ExprValue expected = ExprValueUtils.tupleValue( Map.ofEntries( - Map.entry( - "struct", - Map.ofEntries( - Map.entry("integer", 0), - Map.entry("double", 0), - Map.entry("struct_nested", Map.of("string", "value")))))); + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry("struct", structExprValue))); + + assertEquals(expected, actual); + } + + @Test + void testStructNull() { + ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_null", STRUCT))).getFirst(); + + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry("struct", structExprValue))); + + assertEquals(expected, actual); + } + + @Test + void testStructMissing() { + ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_missing", STRUCT))).getFirst(); + + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct", structExprValue))); assertEquals(expected, actual); } @@ -70,10 +113,12 @@ void testStruct() { ExprValue expected = ExprValueUtils.tupleValue( Map.ofEntries( - Map.entry("struct_empty", Map.of()), - Map.entry("integer", 0), - Map.entry("double", 0), - Map.entry("struct_nested", Map.of("string", "value")))); + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry("integer", integerExprValue), + Map.entry("double", doubleExprValue), + Map.entry("struct_nested", structNestedExprValue))); assertEquals(expected, actual); } @@ -86,13 +131,77 @@ void testStructNested() { ExprValue expected = ExprValueUtils.tupleValue( Map.ofEntries( - Map.entry("struct_empty", Map.of()), + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), Map.entry( "struct", - Map.ofEntries( - Map.entry("integer", 0), - Map.entry("double", 0), - Map.entry("string", "value"))))); + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("integer", integerExprValue), + Map.entry("double", doubleExprValue), + Map.entry("string", stringExprValue)))))); + + assertEquals(expected, actual); + } + + @Test + void testAncestorStructNull() { + ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_null.path", STRUCT))).getFirst(); + + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry("struct", structExprValue))); + + assertEquals(expected, actual); + } + + @Test + void testAncestorStructMissing() { + ExprValue actual = + execute(flatten(inputPlan, DSL.ref("struct_missing.path", STRUCT))).getFirst(); + + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry("struct", structExprValue))); + + assertEquals(expected, actual); + } + + @Test + void testPathMissing() { + ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct.unknown", STRUCT))).getFirst(); + + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry("struct", structExprValue))); + + assertEquals(expected, actual); + } + + @Test + void testAncestorPathMissing() { + ExprValue actual = execute(flatten(inputPlan, DSL.ref("unknown", STRUCT))).getFirst(); + + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry("struct", structExprValue))); assertEquals(expected, actual); } diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index da8739d2cb..d9d0f6bd8e 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -12,7 +12,8 @@ Description ============ Using ``flatten`` command to flatten an `object`. New fields are added to the search results corresponding -to each of the object's fields, while the object field itself is removed from the search results. +to each of the object's fields, while the object field itself is removed from the search results. If the +specified `object` is null, missing, the search results are not modified. Syntax ============ @@ -26,40 +27,46 @@ Example 1: Flatten an object field PPL query:: - os> source=cities | flatten location | fields name, province, state, country, coordinates - fetched rows / total rows = 2/2 - +-----------+------------------+------------+---------------+-----------------------------------------------+ - | name | province | state | country | coordinates | - |-----------+------------------+------------+---------------+-----------------------------------------------| - | Seattle | null | Washington | United States | {'latitude': 47.6061, 'longitude': -122.3328} | - | Vancouver | British Columbia | null | Canada | {'latitude': 49.2827, 'longitude': -123.1207} | - +-----------+------------------+------------+---------------+-----------------------------------------------+ + os> source=cities | flatten location + fetched rows / total rows = 4/4 + +------------------+---------------+------------------+-----------------------------------------------+------------+ + | name | country | province | coordinates | state | + |------------------+---------------+------------------+-----------------------------------------------+------------| + | Seattle | United States | null | {'latitude': 47.6061, 'longitude': -122.3328} | Washington | + | Vancouver | Canada | British Columbia | {'latitude': 49.2827, 'longitude': -123.1207} | null | + | Null Location | null | null | null | null | + | Null Coordinates | Australia | null | null | Victoria | + +------------------+---------------+------------------+-----------------------------------------------+------------+ Example 2: Flatten multiple object fields ========================================= PPL query:: - os> source=cities | flatten location | flatten coordinates | fields name, province, state, country, latitude, longitude - fetched rows / total rows = 2/2 - +-----------+------------------+------------+---------------+----------+-----------+ - | name | province | state | country | latitude | longitude | - |-----------+------------------+------------+---------------+----------+-----------| - | Seattle | null | Washington | United States | 47.6061 | -122.3328 | - | Vancouver | British Columbia | null | Canada | 49.2827 | -123.1207 | - +-----------+------------------+------------+---------------+----------+-----------+ + os> source=cities | flatten location | flatten coordinates + fetched rows / total rows = 4/4 + +------------------+---------------+------------------+------------+----------+-----------+ + | name | country | province | state | latitude | longitude | + |------------------+---------------+------------------+------------+----------+-----------| + | Seattle | United States | null | Washington | 47.6061 | -122.3328 | + | Vancouver | Canada | British Columbia | null | 49.2827 | -123.1207 | + | Null Location | null | null | null | null | null | + | Null Coordinates | Australia | null | Victoria | null | null | + +------------------+---------------+------------------+------------+----------+-----------+ Example 3: Flatten a nested object field ======================================== PPL query:: - os> source=cities | flatten location.coordinates | fields name, location - fetched rows / total rows = 2/2 - +-----------+----------------------------------------------------------------------------------------------------+ - | name | location | - |-----------+----------------------------------------------------------------------------------------------------| - | Seattle | {'state': 'Washington', 'country': 'United States', 'latitude': 47.6061, 'longitude': -122.3328} | - | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'latitude': 49.2827, 'longitude': -123.1207} | - +-----------+----------------------------------------------------------------------------------------------------+ + os> source=cities | flatten location.coordinates + fetched rows / total rows = 4/4 + +------------------+----------------------------------------------------------------------------------------------------+ + | name | location | + |------------------+----------------------------------------------------------------------------------------------------| + | Seattle | {'state': 'Washington', 'country': 'United States', 'latitude': 47.6061, 'longitude': -122.3328} | + | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'latitude': 49.2827, 'longitude': -123.1207} | + | Null Location | null | + | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | + +------------------+----------------------------------------------------------------------------------------------------+ diff --git a/doctest/test_data/cities.json b/doctest/test_data/cities.json index 62a96fb21a..eb4cf6c2a2 100644 --- a/doctest/test_data/cities.json +++ b/doctest/test_data/cities.json @@ -1,2 +1,4 @@ {"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}} {"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}} +{"name": "Null Location", "location": null} +{"name": "Null Coordinates", "location": { "state": "Victoria", "country": "Australia", "coordinates": null}} \ No newline at end of file From be26660b498800a00909e9901722c06d8af9eecb Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 31 Jan 2025 08:57:59 -0800 Subject: [PATCH 26/81] Update integratation tests to align with doc tests. Signed-off-by: currantw --- docs/user/ppl/cmd/flatten.rst | 2 +- .../opensearch/sql/ppl/FlattenCommandIT.java | 75 +++++++++++++------ integ-test/src/test/resources/cities.json | 4 + 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index d9d0f6bd8e..847229e05c 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -13,7 +13,7 @@ Description Using ``flatten`` command to flatten an `object`. New fields are added to the search results corresponding to each of the object's fields, while the object field itself is removed from the search results. If the -specified `object` is null, missing, the search results are not modified. +specified `object` is null or missing, the search results are not modified. Syntax ============ diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index b6f70f32d4..468e522048 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -25,52 +25,81 @@ public void init() throws IOException { } @Test - public void testFlattenStruct() throws IOException { - String query = - StringUtils.format( - "source=%s | flatten location | fields state, province, country, coordinates", - TEST_INDEX_CITIES); + public void testBasic() throws IOException { + String query = StringUtils.format("source=%s | flatten location", TEST_INDEX_CITIES); JSONObject result = executeQuery(query); verifySchema( result, - schema("state", "string"), - schema("province", "string"), + schema("name", "string"), schema("country", "string"), - schema("coordinates", "struct")); + schema("province", "string"), + schema("coordinates", "struct"), + schema("state", "string")); verifyDataRows( result, rows( - "Washington", - null, + "Seattle", "United States", - Map.of("latitude", 47.6061, "longitude", -122.3328)), - rows( null, - "British Columbia", + Map.ofEntries(Map.entry("latitude", 47.6061), Map.entry("longitude", -122.3328)), + "Washington"), + rows( + "Vancouver", "Canada", - Map.of("latitude", 49.2827, "longitude", -123.1207))); + "British Columbia", + Map.ofEntries(Map.entry("latitude", 49.2827), Map.entry("longitude", -123.1207)), + null), + rows("Null Location", null, null, null, null), + rows("Null Coordinates", "Australia", null, null, "Victoria")); } @Test - public void testFlattenStructMultiple() throws IOException { + public void testMultiple() throws IOException { String query = - StringUtils.format( - "source=%s | flatten location | flatten coordinates | fields state, province, country," - + " latitude, longitude", - TEST_INDEX_CITIES); + StringUtils.format("source=%s | flatten location | flatten coordinates", TEST_INDEX_CITIES); JSONObject result = executeQuery(query); verifySchema( result, - schema("state", "string"), - schema("province", "string"), + schema("name", "string"), schema("country", "string"), + schema("province", "string"), + schema("state", "string"), schema("latitude", "float"), schema("longitude", "float")); verifyDataRows( result, - rows("Washington", null, "United States", 47.6061, -122.3328), - rows(null, "British Columbia", "Canada", 49.2827, -123.1207)); + rows("Seattle", "United States", null, "Washington", 47.6061, -122.3328), + rows("Vancouver", "Canada", "British Columbia", null, 49.2827, -123.1207), + rows("Null Location", null, null, null, null, null), + rows("Null Coordinates", "Australia", null, "Victoria", null, null)); + } + + @Test + public void testNested() throws IOException { + String query = + StringUtils.format("source=%s | flatten location.coordinates", TEST_INDEX_CITIES); + JSONObject result = executeQuery(query); + + verifySchema(result, schema("name", "string"), schema("location", "struct")); + verifyDataRows( + result, + rows( + "Seattle", + Map.ofEntries( + Map.entry("state", "Washington"), + Map.entry("country", "United States"), + Map.entry("latitude", 47.6061), + Map.entry("longitude", -122.3328))), + rows( + "Vancouver", + Map.ofEntries( + Map.entry("country", "Canada"), + Map.entry("province", "British Columbia"), + Map.entry("latitude", 49.2827), + Map.entry("longitude", -123.1207))), + rows("Null Location", null), + rows("Null Coordinates", Map.of("state", "Victoria", "country", "Australia"))); } } diff --git a/integ-test/src/test/resources/cities.json b/integ-test/src/test/resources/cities.json index bc74a0d17d..28c8ce4940 100644 --- a/integ-test/src/test/resources/cities.json +++ b/integ-test/src/test/resources/cities.json @@ -2,3 +2,7 @@ {"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}} {"index":{"_id":"2"}} {"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}} +{"index":{"_id":"3"}} +{"name": "Null Location", "location": null} +{"index":{"_id":"4"}} +{"name": "Null Coordinates", "location": { "state": "Victoria", "country": "Australia", "coordinates": null}} From b3e4401039606dc2e6e0bd0bab9f192fcfe2adbe Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 31 Jan 2025 09:33:40 -0800 Subject: [PATCH 27/81] Review - minor documentation updates. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 28 +++++++++++-------- docs/user/ppl/cmd/flatten.rst | 4 +-- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 6e008e6fd3..f8ad2dae29 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -495,22 +495,28 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // // Input Data: // - // { struct: { - // integer: 0, - // nested_struct: { - // string: "value" }}} + // [ + // struct: { + // integer: 0, + // nested_struct: { string: "value" } + // } + // ] // // Example 1: 'flatten struct' // - // { integer: 0, - // nested_struct: { - // string: "value" }} + // [ + // integer: 0, + // nested_struct: { string: "value" } + // ] // - // Example 2: 'flatten nested_struct' + // Example 2: 'flatten struct.nested_struct' // - // { struct: { - // integer: 0, - // string: "value" }} + // [ + // struct: { + // integer: 0, + // string: "value" + // { + // ] Map addFieldsMap = new HashMap<>(); Map removeFieldsMap = new HashMap<>(); diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 847229e05c..bb5c1b197c 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -11,14 +11,14 @@ flatten Description ============ -Using ``flatten`` command to flatten an `object`. New fields are added to the search results corresponding +The ``flatten`` command flattens an `object`'s fields. New fields are added to the search results corresponding to each of the object's fields, while the object field itself is removed from the search results. If the specified `object` is null or missing, the search results are not modified. Syntax ============ -`flatten field` +`flatten ` * `field`: reference to the `object` field to flatten. From 4099f103130d6555941858cf4ad18a7f80452694 Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 31 Jan 2025 16:00:25 -0800 Subject: [PATCH 28/81] Remove double periods Signed-off-by: currantw --- .../org/opensearch/sql/planner/physical/FlattenOperator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 7a771289ee..e51ccc0736 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -51,8 +51,8 @@ public ExprValue next() { } /** - * Flattens the {@link ExprTupleValue} at the specified path and returns the update value.. If the - * value is null or missing, the unmodified value is returned.. + * Flattens the {@link ExprTupleValue} at the specified path and returns the update value. If the + * value is null or missing, the unmodified value is returned. */ private static ExprValue flattenExprValueAtPath(ExprValue exprValue, String path) { From b96cefa411a9308d3ee2121aa0203b5ee757c847 Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 31 Jan 2025 16:52:29 -0800 Subject: [PATCH 29/81] Add comment on `Map.equals`. Signed-off-by: currantw --- .../java/org/opensearch/sql/data/model/ExprTupleValue.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java index 643c2cd8cd..b7abe0d256 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java @@ -75,6 +75,12 @@ public boolean equal(ExprValue other) { return false; } + /** + * {@link Map#equals} returns true if the two maps' entry sets are equal, and works properly + * across all implementation of the {@link Map} interface. See {@link + * https://docs.oracle.com/javase/8/docs/api/java/util/Map.html#equals-java.lang.Object-} for + * more details. + */ return valueMap.equals(other.tupleValue()); } From 72d98edc11d1824a71429281d6ac0a0b40d2232f Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 31 Jan 2025 17:04:20 -0800 Subject: [PATCH 30/81] Remove unnecessary error checks. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 25 ++----------------- .../planner/logical/LogicalFlattenTest.java | 17 +++---------- .../planner/physical/FlattenOperatorTest.java | 16 ++++++++++-- 3 files changed, 19 insertions(+), 39 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index f8ad2dae29..753d615a53 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -461,32 +461,11 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { LogicalPlan child = node.getChild().getFirst().accept(this, context); - // [A] Get field name and type - // --------------------------- - - // Verify that the field name is valid. - ReferenceExpression fieldExpr; - try { - fieldExpr = (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); - } catch (SemanticCheckException e) { - throw new IllegalArgumentException("Invalid field name for flatten command", e); - } - + ReferenceExpression fieldExpr = + (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); String fieldName = fieldExpr.getAttr(); ExprType fieldType = fieldExpr.type(); - // Verify that the field type is valid. - if (fieldType != STRUCT) { - String msg = - StringUtils.format( - "Invalid field type for flatten command. Expected '%s' but got '%s'.", - STRUCT.typeName(), fieldType); - throw new IllegalArgumentException(msg); - } - - // [B] Get fields to add and remove - // -------------------------------- - // Iterate over all the fields defined in the type environment. Find all those that are // descended from field that is being flattened. Determine the new path to add and remove the // existing path. When determining the new path, we need to preserve the portion of the diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 068091388f..18e441643f 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -24,6 +24,7 @@ import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @ExtendWith(MockitoExtension.class) @@ -110,21 +111,9 @@ void testInvalidName() { AstDSL.flatten(AstDSL.relation("schema"), AstDSL.field("invalid")); String msg = - assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) - .getMessage(); - assertEquals("Invalid field name for flatten command", msg); - } - - @Test - void testInvalidType() { - Flatten actualUnresolvedPlan = - AstDSL.flatten(AstDSL.relation("schema"), AstDSL.field("integer_value")); - - String actualMsg = - assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) + assertThrows(SemanticCheckException.class, () -> analyze(actualUnresolvedPlan)) .getMessage(); - assertEquals( - "Invalid field type for flatten command. Expected 'STRUCT' but got 'INTEGER'.", actualMsg); + assertEquals("can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env", msg); } @Test diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index bc3c8b6015..6ec4e8b786 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -6,7 +6,9 @@ package org.opensearch.sql.planner.physical; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.flatten; @@ -21,6 +23,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.expression.DSL; @ToString @@ -30,8 +33,8 @@ class FlattenOperatorTest extends PhysicalPlanTestBase { @Mock private PhysicalPlan inputPlan; // Define input values for testing. - private final ExprValue doubleExprValue = ExprValueUtils.integerValue(0); - private final ExprValue integerExprValue = ExprValueUtils.doubleValue(0.0); + private final ExprValue integerExprValue = ExprValueUtils.integerValue(0); + private final ExprValue doubleExprValue = ExprValueUtils.doubleValue(0.0); private final ExprValue stringExprValue = ExprValueUtils.stringValue("value"); private final ExprValue structEmptyExprValue = ExprValueUtils.tupleValue(Map.of()); @@ -205,4 +208,13 @@ void testAncestorPathMissing() { assertEquals(expected, actual); } + + @Test + void testInvalidType() { + Exception ex = + assertThrows( + ExpressionEvaluationException.class, + () -> execute(flatten(inputPlan, DSL.ref("struct.integer", INTEGER)))); + assertEquals("invalid to get tupleValue from value of type INTEGER", ex.getMessage()); + } } From 4632c03e6334e9e1194600f147302fa1fc582f97 Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 3 Feb 2025 10:58:58 -0800 Subject: [PATCH 31/81] Update to maintain existing field. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 10 +-- .../org/opensearch/sql/ast/tree/Flatten.java | 1 + .../sql/planner/physical/FlattenOperator.java | 3 +- .../planner/logical/LogicalFlattenTest.java | 19 ----- .../planner/physical/FlattenOperatorTest.java | 76 +++---------------- docs/user/ppl/cmd/flatten.rst | 25 +++--- .../opensearch/sql/ppl/FlattenCommandIT.java | 15 +++- 7 files changed, 39 insertions(+), 110 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 753d615a53..519289c77c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -466,6 +466,9 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { String fieldName = fieldExpr.getAttr(); ExprType fieldType = fieldExpr.type(); + // [A] Determine fields to add + // --------------------------- + // Iterate over all the fields defined in the type environment. Find all those that are // descended from field that is being flattened. Determine the new path to add and remove the // existing path. When determining the new path, we need to preserve the portion of the @@ -498,8 +501,6 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // ] Map addFieldsMap = new HashMap<>(); - Map removeFieldsMap = new HashMap<>(); - removeFieldsMap.put(fieldName, fieldType); TypeEnvironment env = context.peek(); Map fieldsMap = env.lookupAllTupleFields(Namespace.FIELD_NAME); @@ -526,14 +527,11 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { ExprType type = fieldsMap.get(path); addFieldsMap.put(newPath, type); - removeFieldsMap.put(path, type); } - // [C] Update environment + // [B] Update environment // ---------------------- - removeFieldsMap.forEach((name, type) -> env.remove(DSL.ref(name, type))); - for (Map.Entry entry : addFieldsMap.entrySet()) { String name = entry.getKey(); ExprType type = entry.getValue(); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java b/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java index aa402d9cd9..64549410e4 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java @@ -13,6 +13,7 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Field; +/** AST node representing a flatten operation. */ @Getter @ToString @RequiredArgsConstructor diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index e51ccc0736..59468f3bbc 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -19,6 +19,7 @@ import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; +/** Flattens the specified field from the input and returns the result. */ @Getter @ToString @RequiredArgsConstructor @@ -93,8 +94,6 @@ private static ExprValue flattenExprValueAtPath(ExprValue exprValue, String path exprValueMap.putAll(ExprValueUtils.getTupleValue(childExprValue)); } - exprValueMap.remove(path); - return ExprTupleValue.fromExprValueMap(exprValueMap); } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 18e441643f..b8907639be 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -6,7 +6,6 @@ package org.opensearch.sql.planner.logical; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; @@ -60,8 +59,6 @@ void testStructEmpty() { analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct_empty"))); assertEquals(expectedLogicalPlan, actualLogicalPlan); - - assertTypeNotDefined("struct_empty"); } @Test @@ -74,12 +71,6 @@ void testStruct() { assertEquals(expectedLogicalPlan, actualLogicalPlan); - assertTypeNotDefined("struct"); - assertTypeNotDefined("struct.integer"); - assertTypeNotDefined("struct.double"); - assertTypeNotDefined("struct.nested_struct"); - assertTypeNotDefined("struct.nested_struct.string"); - assertTypeDefined("integer", INTEGER); assertTypeDefined("double", DOUBLE); assertTypeDefined("nested_struct", STRUCT); @@ -96,9 +87,6 @@ void testStructNested() { assertEquals(expectedLogicalPlan, actualLogicalPlan); - assertTypeNotDefined("struct.nested_struct"); - assertTypeNotDefined("struct.nested_struct.string"); - assertTypeDefined("struct", STRUCT); assertTypeDefined("struct.integer", INTEGER); assertTypeDefined("struct.double", DOUBLE); @@ -127,13 +115,6 @@ void testInvalidDuplicate() { assertEquals("Flatten command cannot overwrite field 'integer_value'", msg); } - /** Asserts that the given field name is not defined in the type environment */ - private void assertTypeNotDefined(String fieldName) { - Map fieldsMap = - analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); - assertFalse(fieldsMap.containsKey(fieldName)); - } - /** * Asserts that the given field name is defined in the type environment and corresponds to the * given type. diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index 6ec4e8b786..7abe43517f 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -70,43 +70,19 @@ void setup() { @Test void testStructEmpty() { ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_empty", STRUCT))).getFirst(); - - ExprValue expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("struct_null", structNullExprValue), - Map.entry("struct_missing", structMissingExprValue), - Map.entry("struct", structExprValue))); - - assertEquals(expected, actual); + assertEquals(rowValue, actual); } @Test void testStructNull() { ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_null", STRUCT))).getFirst(); - - ExprValue expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("struct_empty", structEmptyExprValue), - Map.entry("struct_missing", structMissingExprValue), - Map.entry("struct", structExprValue))); - - assertEquals(expected, actual); + assertEquals(rowValue, actual); } @Test void testStructMissing() { ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_missing", STRUCT))).getFirst(); - - ExprValue expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("struct_empty", structEmptyExprValue), - Map.entry("struct_null", structNullExprValue), - Map.entry("struct", structExprValue))); - - assertEquals(expected, actual); + assertEquals(rowValue, actual); } @Test @@ -119,6 +95,7 @@ void testStruct() { Map.entry("struct_empty", structEmptyExprValue), Map.entry("struct_null", structNullExprValue), Map.entry("struct_missing", structMissingExprValue), + Map.entry("struct", structExprValue), Map.entry("integer", integerExprValue), Map.entry("double", doubleExprValue), Map.entry("struct_nested", structNestedExprValue))); @@ -143,6 +120,7 @@ void testStructNested() { Map.ofEntries( Map.entry("integer", integerExprValue), Map.entry("double", doubleExprValue), + Map.entry("struct_nested", structNestedExprValue), Map.entry("string", stringExprValue)))))); assertEquals(expected, actual); @@ -151,62 +129,26 @@ void testStructNested() { @Test void testAncestorStructNull() { ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_null.path", STRUCT))).getFirst(); - - ExprValue expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("struct_empty", structEmptyExprValue), - Map.entry("struct_null", structNullExprValue), - Map.entry("struct_missing", structMissingExprValue), - Map.entry("struct", structExprValue))); - - assertEquals(expected, actual); + assertEquals(rowValue, actual); } @Test void testAncestorStructMissing() { ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_missing.path", STRUCT))).getFirst(); - - ExprValue expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("struct_empty", structEmptyExprValue), - Map.entry("struct_null", structNullExprValue), - Map.entry("struct_missing", structMissingExprValue), - Map.entry("struct", structExprValue))); - - assertEquals(expected, actual); + assertEquals(rowValue, actual); } @Test void testPathMissing() { ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct.unknown", STRUCT))).getFirst(); - - ExprValue expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("struct_empty", structEmptyExprValue), - Map.entry("struct_null", structNullExprValue), - Map.entry("struct_missing", structMissingExprValue), - Map.entry("struct", structExprValue))); - - assertEquals(expected, actual); + assertEquals(rowValue, actual); } @Test void testAncestorPathMissing() { ExprValue actual = execute(flatten(inputPlan, DSL.ref("unknown", STRUCT))).getFirst(); - - ExprValue expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("struct_empty", structEmptyExprValue), - Map.entry("struct_null", structNullExprValue), - Map.entry("struct_missing", structMissingExprValue), - Map.entry("struct", structExprValue))); - - assertEquals(expected, actual); + assertEquals(rowValue, actual); } @Test diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index bb5c1b197c..98b43ef386 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -12,8 +12,7 @@ Description ============ The ``flatten`` command flattens an `object`'s fields. New fields are added to the search results corresponding -to each of the object's fields, while the object field itself is removed from the search results. If the -specified `object` is null or missing, the search results are not modified. +to each of the object's fields; if the specified `object` is null or missing, the search results are not modified. Syntax ============ @@ -27,7 +26,7 @@ Example 1: Flatten an object field PPL query:: - os> source=cities | flatten location + os> source=cities | flatten location | fields name, country, province, coordinates, state fetched rows / total rows = 4/4 +------------------+---------------+------------------+-----------------------------------------------+------------+ | name | country | province | coordinates | state | @@ -43,7 +42,7 @@ Example 2: Flatten multiple object fields PPL query:: - os> source=cities | flatten location | flatten coordinates + os> source=cities | flatten location | flatten coordinates | fields name, country, province, state, latitude, longitude fetched rows / total rows = 4/4 +------------------+---------------+------------------+------------+----------+-----------+ | name | country | province | state | latitude | longitude | @@ -59,14 +58,14 @@ Example 3: Flatten a nested object field PPL query:: - os> source=cities | flatten location.coordinates + os> source=cities | flatten location.coordinates | fields name, location fetched rows / total rows = 4/4 - +------------------+----------------------------------------------------------------------------------------------------+ - | name | location | - |------------------+----------------------------------------------------------------------------------------------------| - | Seattle | {'state': 'Washington', 'country': 'United States', 'latitude': 47.6061, 'longitude': -122.3328} | - | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'latitude': 49.2827, 'longitude': -123.1207} | - | Null Location | null | - | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | - +------------------+----------------------------------------------------------------------------------------------------+ + +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | name | location | + |------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | Seattle | {'state': 'Washington', 'country': 'United States', 'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}, 'latitude': 47.6061, 'longitude': -122.3328} | + | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}, 'latitude': 49.2827, 'longitude': -123.1207} | + | Null Location | null | + | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | + +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 468e522048..65816bcb3f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -26,7 +26,10 @@ public void init() throws IOException { @Test public void testBasic() throws IOException { - String query = StringUtils.format("source=%s | flatten location", TEST_INDEX_CITIES); + String query = + StringUtils.format( + "source=%s | flatten location | fields name, country, province, coordinates, state", + TEST_INDEX_CITIES); JSONObject result = executeQuery(query); verifySchema( @@ -57,7 +60,10 @@ public void testBasic() throws IOException { @Test public void testMultiple() throws IOException { String query = - StringUtils.format("source=%s | flatten location | flatten coordinates", TEST_INDEX_CITIES); + StringUtils.format( + "source=%s | flatten location | flatten coordinates | fields name, country, province," + + " state, latitude, longitude", + TEST_INDEX_CITIES); JSONObject result = executeQuery(query); verifySchema( @@ -79,7 +85,8 @@ public void testMultiple() throws IOException { @Test public void testNested() throws IOException { String query = - StringUtils.format("source=%s | flatten location.coordinates", TEST_INDEX_CITIES); + StringUtils.format( + "source=%s | flatten location.coordinates | fields name, location", TEST_INDEX_CITIES); JSONObject result = executeQuery(query); verifySchema(result, schema("name", "string"), schema("location", "struct")); @@ -90,6 +97,7 @@ public void testNested() throws IOException { Map.ofEntries( Map.entry("state", "Washington"), Map.entry("country", "United States"), + Map.entry("coordinates", Map.of("latitude", 47.6061, "longitude", -122.3328)), Map.entry("latitude", 47.6061), Map.entry("longitude", -122.3328))), rows( @@ -97,6 +105,7 @@ public void testNested() throws IOException { Map.ofEntries( Map.entry("country", "Canada"), Map.entry("province", "British Columbia"), + Map.entry("coordinates", Map.of("latitude", 49.2827, "longitude", -123.1207)), Map.entry("latitude", 49.2827), Map.entry("longitude", -123.1207))), rows("Null Location", null), From d75520852f47d5891f005163e8b137b884bfaace Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 3 Feb 2025 13:01:42 -0800 Subject: [PATCH 32/81] Update for test coverage Signed-off-by: currantw --- .../planner/physical/FlattenOperatorTest.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index 7abe43517f..83eedfb38a 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -85,6 +85,12 @@ void testStructMissing() { assertEquals(rowValue, actual); } + @Test + void testStructUnknown() { + ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct_unknown", STRUCT))).getFirst(); + assertEquals(rowValue, actual); + } + @Test void testStruct() { ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct", STRUCT))).getFirst(); @@ -140,14 +146,9 @@ void testAncestorStructMissing() { } @Test - void testPathMissing() { - ExprValue actual = execute(flatten(inputPlan, DSL.ref("struct.unknown", STRUCT))).getFirst(); - assertEquals(rowValue, actual); - } - - @Test - void testAncestorPathMissing() { - ExprValue actual = execute(flatten(inputPlan, DSL.ref("unknown", STRUCT))).getFirst(); + void testAncestorStructUnknown() { + ExprValue actual = + execute(flatten(inputPlan, DSL.ref("struct_unknown.path", STRUCT))).getFirst(); assertEquals(rowValue, actual); } From 09563abec44afc1dcdf5edcee76be9bc2de3501a Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 3 Feb 2025 15:23:48 -0800 Subject: [PATCH 33/81] Simplify `Analyzer` implementation Signed-off-by: currantw --- core/src/main/java/org/opensearch/sql/analysis/Analyzer.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 519289c77c..9cc99d3d6c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -464,7 +464,6 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { ReferenceExpression fieldExpr = (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); String fieldName = fieldExpr.getAttr(); - ExprType fieldType = fieldExpr.type(); // [A] Determine fields to add // --------------------------- @@ -545,7 +544,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { env.define(DSL.ref(name, type)); } - return new LogicalFlatten(child, DSL.ref(fieldName, STRUCT)); + return new LogicalFlatten(child, fieldExpr); } /** Build {@link ParseExpression} to context and skip to child nodes. */ From 1d391ce46cbde4bf04db9e646b16ad3fecc363f2 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 10:00:31 -0800 Subject: [PATCH 34/81] Rename `cities` dataset to `flatten` Signed-off-by: currantw --- docs/user/dql/metadata.rst | 2 +- docs/user/ppl/cmd/flatten.rst | 6 +++--- doctest/test_data/{cities.json => flatten.json} | 0 doctest/test_docs.py | 6 +++--- doctest/test_mapping/{cities.json => flatten.json} | 0 .../org/opensearch/sql/legacy/SQLIntegTestCase.java | 12 ++++++------ .../java/org/opensearch/sql/legacy/TestUtils.java | 4 ++-- .../org/opensearch/sql/legacy/TestsConstants.java | 2 +- .../test/java/org/opensearch/sql/ppl/ExplainIT.java | 6 +++--- .../org/opensearch/sql/ppl/FlattenCommandIT.java | 10 +++++----- .../expectedOutput/ppl/explain_flatten.json | 2 +- .../src/test/resources/{cities.json => flatten.json} | 0 .../{cities_mapping.json => flatten_mapping.json} | 0 13 files changed, 25 insertions(+), 25 deletions(-) rename doctest/test_data/{cities.json => flatten.json} (100%) rename doctest/test_mapping/{cities.json => flatten.json} (100%) rename integ-test/src/test/resources/{cities.json => flatten.json} (100%) rename integ-test/src/test/resources/indexDefinitions/{cities_mapping.json => flatten_mapping.json} (100%) diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index 74ce3566c3..d617ce946b 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -44,7 +44,7 @@ SQL query:: | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | cities | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | flatten | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 98b43ef386..6dbdeff2ab 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -26,7 +26,7 @@ Example 1: Flatten an object field PPL query:: - os> source=cities | flatten location | fields name, country, province, coordinates, state + os> source=flatten | flatten location | fields name, country, province, coordinates, state fetched rows / total rows = 4/4 +------------------+---------------+------------------+-----------------------------------------------+------------+ | name | country | province | coordinates | state | @@ -42,7 +42,7 @@ Example 2: Flatten multiple object fields PPL query:: - os> source=cities | flatten location | flatten coordinates | fields name, country, province, state, latitude, longitude + os> source=flatten | flatten location | flatten coordinates | fields name, country, province, state, latitude, longitude fetched rows / total rows = 4/4 +------------------+---------------+------------------+------------+----------+-----------+ | name | country | province | state | latitude | longitude | @@ -58,7 +58,7 @@ Example 3: Flatten a nested object field PPL query:: - os> source=cities | flatten location.coordinates | fields name, location + os> source=flatten | flatten location.coordinates | fields name, location fetched rows / total rows = 4/4 +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | name | location | diff --git a/doctest/test_data/cities.json b/doctest/test_data/flatten.json similarity index 100% rename from doctest/test_data/cities.json rename to doctest/test_data/flatten.json diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 50cfc1c2df..b0a4bc2a09 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -31,7 +31,7 @@ DATASOURCES = ".ql-datasources" WEBLOGS = "weblogs" JSON_TEST = "json_test" -CITIES = "cities" +FLATTEN = "flatten" class DocTestConnection(OpenSearchConnection): @@ -126,7 +126,7 @@ def set_up_test_indices(test): load_file("datasources.json", index_name=DATASOURCES) load_file("weblogs.json", index_name=WEBLOGS) load_file("json_test.json", index_name=JSON_TEST) - load_file("cities.json", index_name=CITIES) + load_file("flatten.json", index_name=FLATTEN) def load_file(filename, index_name): # Create index with the mapping if mapping file exists @@ -154,7 +154,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, CITIES], ignore_unavailable=True) + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, FLATTEN], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, diff --git a/doctest/test_mapping/cities.json b/doctest/test_mapping/flatten.json similarity index 100% rename from doctest/test_mapping/cities.json rename to doctest/test_mapping/flatten.json diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 9c877c52de..4bb790a758 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -11,7 +11,7 @@ import static org.opensearch.sql.legacy.TestUtils.getAliasIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getBankIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getBankWithNullValuesIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getCitiesIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getFlattenIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDataTypeNonnumericIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDataTypeNumericIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDateIndexMapping; @@ -760,11 +760,11 @@ public enum Index { "json", getJsonTestIndexMapping(), "src/test/resources/json_test.json"), - CITIES( - TestsConstants.TEST_INDEX_CITIES, - "cities", - getCitiesIndexMapping(), - "src/test/resources/cities.json"), + FLATTEN( + TestsConstants.TEST_INDEX_FLATTEN, + "flatten", + getFlattenIndexMapping(), + "src/test/resources/flatten.json"), DATA_TYPE_ALIAS( TestsConstants.TEST_INDEX_ALIAS, "alias", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index af62936b92..e3b8d21793 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -265,8 +265,8 @@ public static String getAliasIndexMapping() { return getMappingFile(mappingFile); } - public static String getCitiesIndexMapping() { - String mappingFile = "cities.mapping.json"; + public static String getFlattenIndexMapping() { + String mappingFile = "flatten_mapping.json"; return getMappingFile(mappingFile); } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index b421efc47f..0739628a41 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -60,7 +60,7 @@ public class TestsConstants { public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; public static final String TEST_INDEX_JSON_TEST = TEST_INDEX + "_json_test"; public static final String TEST_INDEX_ALIAS = TEST_INDEX + "_alias"; - public static final String TEST_INDEX_CITIES = TEST_INDEX + "_cities"; + public static final String TEST_INDEX_FLATTEN = TEST_INDEX + "_flatten"; public static final String TEST_INDEX_GEOIP = TEST_INDEX + "_geoip"; public static final String DATASOURCES = ".ql-datasources"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 6fb30a3b26..c98d879230 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -5,7 +5,7 @@ package org.opensearch.sql.ppl; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CITIES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_FLATTEN; import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; import com.google.common.io.Resources; @@ -21,7 +21,7 @@ public class ExplainIT extends PPLIntegTestCase { @Override public void init() throws IOException { loadIndex(Index.ACCOUNT); - loadIndex(Index.CITIES); + loadIndex(Index.FLATTEN); } @Test @@ -131,7 +131,7 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { @Test public void testFlatten() throws Exception { - String query = StringUtils.format("source=%s | flatten location", TEST_INDEX_CITIES); + String query = StringUtils.format("source=%s | flatten location", TEST_INDEX_FLATTEN); String actual = explainQueryToString(query); String expected = loadFromFile("expectedOutput/ppl/explain_flatten.json"); assertJsonEquals(expected, actual); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 65816bcb3f..131f1a60b6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -5,7 +5,7 @@ package org.opensearch.sql.ppl; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CITIES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_FLATTEN; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -21,7 +21,7 @@ public class FlattenCommandIT extends PPLIntegTestCase { @Override public void init() throws IOException { - loadIndex(Index.CITIES); + loadIndex(Index.FLATTEN); } @Test @@ -29,7 +29,7 @@ public void testBasic() throws IOException { String query = StringUtils.format( "source=%s | flatten location | fields name, country, province, coordinates, state", - TEST_INDEX_CITIES); + TEST_INDEX_FLATTEN); JSONObject result = executeQuery(query); verifySchema( @@ -63,7 +63,7 @@ public void testMultiple() throws IOException { StringUtils.format( "source=%s | flatten location | flatten coordinates | fields name, country, province," + " state, latitude, longitude", - TEST_INDEX_CITIES); + TEST_INDEX_FLATTEN); JSONObject result = executeQuery(query); verifySchema( @@ -86,7 +86,7 @@ public void testMultiple() throws IOException { public void testNested() throws IOException { String query = StringUtils.format( - "source=%s | flatten location.coordinates | fields name, location", TEST_INDEX_CITIES); + "source=%s | flatten location.coordinates | fields name, location", TEST_INDEX_FLATTEN); JSONObject result = executeQuery(query); verifySchema(result, schema("name", "string"), schema("location", "struct")); diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json index b729bf415f..bbc731160f 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json @@ -20,7 +20,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_cities, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_flatten, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/cities.json b/integ-test/src/test/resources/flatten.json similarity index 100% rename from integ-test/src/test/resources/cities.json rename to integ-test/src/test/resources/flatten.json diff --git a/integ-test/src/test/resources/indexDefinitions/cities_mapping.json b/integ-test/src/test/resources/indexDefinitions/flatten_mapping.json similarity index 100% rename from integ-test/src/test/resources/indexDefinitions/cities_mapping.json rename to integ-test/src/test/resources/indexDefinitions/flatten_mapping.json From ef750f4d5e1c61ec341e78774b821ad16250c22e Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 12:41:52 -0800 Subject: [PATCH 35/81] SpotlessApply Signed-off-by: currantw --- .../test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java | 2 +- .../test/java/org/opensearch/sql/ppl/FlattenCommandIT.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 4bb790a758..7bd291e6cc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -11,7 +11,6 @@ import static org.opensearch.sql.legacy.TestUtils.getAliasIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getBankIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getBankWithNullValuesIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getFlattenIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDataTypeNonnumericIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDataTypeNumericIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDateIndexMapping; @@ -21,6 +20,7 @@ import static org.opensearch.sql.legacy.TestUtils.getDogs2IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDogs3IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getEmployeeNestedTypeIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getFlattenIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGeoIpIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 131f1a60b6..916f5eb674 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -29,7 +29,7 @@ public void testBasic() throws IOException { String query = StringUtils.format( "source=%s | flatten location | fields name, country, province, coordinates, state", - TEST_INDEX_FLATTEN); + TEST_INDEX_FLATTEN); JSONObject result = executeQuery(query); verifySchema( @@ -63,7 +63,7 @@ public void testMultiple() throws IOException { StringUtils.format( "source=%s | flatten location | flatten coordinates | fields name, country, province," + " state, latitude, longitude", - TEST_INDEX_FLATTEN); + TEST_INDEX_FLATTEN); JSONObject result = executeQuery(query); verifySchema( From 14e005e684274ceb1ed46cefe1d6e037504dbee7 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 12:45:13 -0800 Subject: [PATCH 36/81] Minor doc cleanup. Signed-off-by: currantw --- docs/user/ppl/cmd/flatten.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 6dbdeff2ab..6d1f26c841 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -11,15 +11,15 @@ flatten Description ============ -The ``flatten`` command flattens an `object`'s fields. New fields are added to the search results corresponding -to each of the object's fields; if the specified `object` is null or missing, the search results are not modified. +The ``flatten`` command flattens an object's field by adding a new field to the search results corresponding +to each of the object's fields. If the specified object is null or missing, the search results are not modified. Syntax ============ -`flatten ` +``flatten `` -* `field`: reference to the `object` field to flatten. +* ``field``: reference to the object field to flatten. Example 1: Flatten an object field ================================== @@ -69,3 +69,7 @@ PPL query:: | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +Example 4: Flatten and expand +============================= + +TODO #3016: Add test once flatten merged. \ No newline at end of file From 73885a79290b75809689a56ac1bf0f52203ccf90 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 15:17:33 -0800 Subject: [PATCH 37/81] Fix failing IT Signed-off-by: currantw --- .../test/java/org/opensearch/sql/ppl/FlattenCommandIT.java | 4 ++-- .../test/resources/expectedOutput/ppl/explain_flatten.json | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 916f5eb674..06186193e7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -72,8 +72,8 @@ public void testMultiple() throws IOException { schema("country", "string"), schema("province", "string"), schema("state", "string"), - schema("latitude", "float"), - schema("longitude", "float")); + schema("latitude", "double"), + schema("longitude", "double")); verifyDataRows( result, rows("Seattle", "United States", null, "Washington", 47.6061, -122.3328), diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json index bbc731160f..2483720800 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json @@ -2,7 +2,7 @@ "root": { "name": "ProjectOperator", "description": { - "fields": "[name, country, province, coordinates, state]" + "fields": "[name, location, country, province, coordinates, state]" }, "children": [ { @@ -10,6 +10,7 @@ "description": { "flattenField": { "attr": "location", + "rawPath": "location", "paths": [ "location" ], From 4fbd3204db9b719c8f71539e8871c749cc3f775b Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 15:50:48 -0800 Subject: [PATCH 38/81] Update incorrect documentation in `Analyzer.visitFlatten`. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 9cc99d3d6c..dce8813764 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -469,10 +469,10 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // --------------------------- // Iterate over all the fields defined in the type environment. Find all those that are - // descended from field that is being flattened. Determine the new path to add and remove the - // existing path. When determining the new path, we need to preserve the portion of the - // path corresponding to the flattened field's parent, if one exists, in order to support - // flattening nested structs - see example below. + // descended from field that is being flattened, and determine the new paths to add. When + // determining the new paths, we need to preserve the portion of the path corresponding to the + // flattened field's parent, if one exists, in order to support flattening nested structs - see + // example below. // // Input Data: // @@ -486,8 +486,12 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // Example 1: 'flatten struct' // // [ - // integer: 0, - // nested_struct: { string: "value" } + // struct: { + // integer: 0, + // nested_struct: { string: "value" } + // }, + // integer: 0, + // nested_struct: { string: "value" } // ] // // Example 2: 'flatten struct.nested_struct' @@ -495,6 +499,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // [ // struct: { // integer: 0, + // nested_struct: { string: "value" } // string: "value" // { // ] From 337fb0166caa6214ca30849fc0fda3f8955c00ae Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 16:51:32 -0800 Subject: [PATCH 39/81] Update integ and doc tests to add another example of original field being preserved. Signed-off-by: currantw --- docs/user/ppl/cmd/flatten.rst | 18 +++++----- .../opensearch/sql/ppl/FlattenCommandIT.java | 34 +++++++++++++------ 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 6d1f26c841..c81e571b01 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -42,16 +42,16 @@ Example 2: Flatten multiple object fields PPL query:: - os> source=flatten | flatten location | flatten coordinates | fields name, country, province, state, latitude, longitude + os> source=flatten | flatten location | flatten coordinates | fields name, location, latitude, longitude fetched rows / total rows = 4/4 - +------------------+---------------+------------------+------------+----------+-----------+ - | name | country | province | state | latitude | longitude | - |------------------+---------------+------------------+------------+----------+-----------| - | Seattle | United States | null | Washington | 47.6061 | -122.3328 | - | Vancouver | Canada | British Columbia | null | 49.2827 | -123.1207 | - | Null Location | null | null | null | null | null | - | Null Coordinates | Australia | null | Victoria | null | null | - +------------------+---------------+------------------+------------+----------+-----------+ + +------------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------+ + | name | location | latitude | longitude | + |------------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------| + | Seattle | {'state': 'Washington', 'country': 'United States', 'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}} | 47.6061 | -122.3328 | + | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}} | 49.2827 | -123.1207 | + | Null Location | null | null | null | + | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | null | null | + +------------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------+ Example 3: Flatten a nested object field ======================================== diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index 06186193e7..d3ae3f7ee7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -45,13 +45,13 @@ public void testBasic() throws IOException { "Seattle", "United States", null, - Map.ofEntries(Map.entry("latitude", 47.6061), Map.entry("longitude", -122.3328)), + Map.of("latitude", 47.6061, "longitude", -122.3328), "Washington"), rows( "Vancouver", "Canada", "British Columbia", - Map.ofEntries(Map.entry("latitude", 49.2827), Map.entry("longitude", -123.1207)), + Map.of("latitude", 49.2827, "longitude", -123.1207), null), rows("Null Location", null, null, null, null), rows("Null Coordinates", "Australia", null, null, "Victoria")); @@ -61,25 +61,37 @@ public void testBasic() throws IOException { public void testMultiple() throws IOException { String query = StringUtils.format( - "source=%s | flatten location | flatten coordinates | fields name, country, province," - + " state, latitude, longitude", + "source=%s | flatten location | flatten coordinates | fields name, location, latitude," + + " longitude", TEST_INDEX_FLATTEN); JSONObject result = executeQuery(query); verifySchema( result, schema("name", "string"), - schema("country", "string"), - schema("province", "string"), - schema("state", "string"), + schema("location", "struct"), schema("latitude", "double"), schema("longitude", "double")); verifyDataRows( result, - rows("Seattle", "United States", null, "Washington", 47.6061, -122.3328), - rows("Vancouver", "Canada", "British Columbia", null, 49.2827, -123.1207), - rows("Null Location", null, null, null, null, null), - rows("Null Coordinates", "Australia", null, "Victoria", null, null)); + rows( + "Seattle", + Map.ofEntries( + Map.entry("state", "Washington"), + Map.entry("country", "United States"), + Map.entry("coordinates", Map.of("latitude", 47.6061, "longitude", -122.3328))), + 47.6061, + -122.3328), + rows( + "Vancouver", + Map.ofEntries( + Map.entry("country", "Canada"), + Map.entry("province", "British Columbia"), + Map.entry("coordinates", Map.of("latitude", 49.2827, "longitude", -123.1207))), + 49.2827, + -123.1207), + rows("Null Location", null, null, null), + rows("Null Coordinates", Map.of("state", "Victoria", "country", "Australia"), null, null)); } @Test From abe5c6cb7bf91f6bc3f807fab692a5ee38021396 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 6 Feb 2025 08:32:37 -0800 Subject: [PATCH 40/81] Review comment - move example to `Analyzer.visitFlatten` Javadoc. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 46 ++++++------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index dce8813764..7077187f59 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -456,6 +456,19 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { /** * Builds and returns a {@link org.opensearch.sql.planner.logical.LogicalFlatten} corresponding to * the given flatten node. + * + *

Input Data: + * + *

[ struct: { integer: 0, nested_struct: { string: "value" } } ] + * + *

Example 1: 'flatten struct' + * + *

[ struct: { integer: 0, nested_struct: { string: "value" } }, integer: 0, nested_struct: { + * string: "value" } ] + * + *

Example 2: 'flatten struct.nested_struct' + * + *

[ struct: { integer: 0, nested_struct: { string: "value" } string: "value" { ] */ @Override public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { @@ -471,38 +484,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // Iterate over all the fields defined in the type environment. Find all those that are // descended from field that is being flattened, and determine the new paths to add. When // determining the new paths, we need to preserve the portion of the path corresponding to the - // flattened field's parent, if one exists, in order to support flattening nested structs - see - // example below. - // - // Input Data: - // - // [ - // struct: { - // integer: 0, - // nested_struct: { string: "value" } - // } - // ] - // - // Example 1: 'flatten struct' - // - // [ - // struct: { - // integer: 0, - // nested_struct: { string: "value" } - // }, - // integer: 0, - // nested_struct: { string: "value" } - // ] - // - // Example 2: 'flatten struct.nested_struct' - // - // [ - // struct: { - // integer: 0, - // nested_struct: { string: "value" } - // string: "value" - // { - // ] + // flattened field's parent, if one exists, in order to support flattening nested structs. Map addFieldsMap = new HashMap<>(); From a0022f46318fb06eb7ed91a255e7ec7dcc06eb74 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 6 Feb 2025 09:21:59 -0800 Subject: [PATCH 41/81] Review comment - update `Analyzer.visitFlatten` Javadoc to specify that it also updates env, plus Javadoc formatting. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 7077187f59..6b442cb37c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -455,20 +455,45 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { /** * Builds and returns a {@link org.opensearch.sql.planner.logical.LogicalFlatten} corresponding to - * the given flatten node. + * the given flatten node, and adds the new fields to the current type environment. + * + *

Example * *

Input Data: * - *

[ struct: { integer: 0, nested_struct: { string: "value" } } ] + *

+   * [
+   *   struct: {
+   *     integer: 0,
+   *     nested_struct: { string: "value" }
+   *   }
+   * ]
+   * 
* - *

Example 1: 'flatten struct' + * Query 1: flatten struct * - *

[ struct: { integer: 0, nested_struct: { string: "value" } }, integer: 0, nested_struct: { - * string: "value" } ] + *

+   * [
+   *   struct: {
+   *     integer: 0,
+   *     nested_struct: { string: "value" }
+   *   },
+   *   integer: 0,
+   *   nested_struct: { string: "value" }
+   * ]
+   * 
* - *

Example 2: 'flatten struct.nested_struct' + * Query 2: flatten struct.nested_struct * - *

[ struct: { integer: 0, nested_struct: { string: "value" } string: "value" { ] + *

+   * [
+   *   struct: {
+   *     integer: 0,
+   *     nested_struct: { string: "value" },
+   *     string: "value"
+   *   }
+   * ]
+   * 
*/ @Override public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { @@ -515,8 +540,8 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { addFieldsMap.put(newPath, type); } - // [B] Update environment - // ---------------------- + // [B] Add new fields to type environment + // -------------------------------------- for (Map.Entry entry : addFieldsMap.entrySet()) { String name = entry.getKey(); From df99d377ceff4bdbbbe079b7966de24b87c4bbc4 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 6 Feb 2025 09:23:06 -0800 Subject: [PATCH 42/81] Review comment - remove unnecessary @Getter Signed-off-by: currantw --- core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java b/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java index 64549410e4..b78152eee8 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Flatten.java @@ -13,13 +13,12 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Field; -/** AST node representing a flatten operation. */ -@Getter +/** AST node representing a {@code flatten } operation. */ @ToString @RequiredArgsConstructor public class Flatten extends UnresolvedPlan { - private UnresolvedPlan child; + private UnresolvedPlan child; @Getter private final Field field; @Override From 6883214aaac558cd9e0f2b591a29365215720a56 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 6 Feb 2025 09:31:44 -0800 Subject: [PATCH 43/81] Review comments - add `testStructNestedDeep` test case Signed-off-by: currantw --- .../planner/physical/FlattenOperatorTest.java | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java index 83eedfb38a..3fdf6867f7 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FlattenOperatorTest.java @@ -36,13 +36,19 @@ class FlattenOperatorTest extends PhysicalPlanTestBase { private final ExprValue integerExprValue = ExprValueUtils.integerValue(0); private final ExprValue doubleExprValue = ExprValueUtils.doubleValue(0.0); private final ExprValue stringExprValue = ExprValueUtils.stringValue("value"); + private final ExprValue booleanExprValue = ExprValueUtils.booleanValue(true); private final ExprValue structEmptyExprValue = ExprValueUtils.tupleValue(Map.of()); private final ExprValue structNullExprValue = ExprValueUtils.nullValue(); private final ExprValue structMissingExprValue = ExprValueUtils.missingValue(); + private final ExprValue structNestedDeepExprValue = + ExprValueUtils.tupleValue(Map.of("boolean", booleanExprValue)); private final ExprValue structNestedExprValue = - ExprValueUtils.tupleValue(Map.of("string", stringExprValue)); + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("string", stringExprValue), + Map.entry("struct_nested_deep", structNestedDeepExprValue))); private final ExprValue structExprValue = ExprValueUtils.tupleValue( @@ -127,8 +133,37 @@ void testStructNested() { Map.entry("integer", integerExprValue), Map.entry("double", doubleExprValue), Map.entry("struct_nested", structNestedExprValue), - Map.entry("string", stringExprValue)))))); + Map.entry("string", stringExprValue), + Map.entry("struct_nested_deep", structNestedDeepExprValue)))))); + + assertEquals(expected, actual); + } + @Test + void testStructNestedDeep() { + ExprValue actual = + execute(flatten(inputPlan, DSL.ref("struct.struct_nested.struct_nested_deep", STRUCT))) + .getFirst(); + + ExprValue expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("struct_empty", structEmptyExprValue), + Map.entry("struct_null", structNullExprValue), + Map.entry("struct_missing", structMissingExprValue), + Map.entry( + "struct", + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("integer", integerExprValue), + Map.entry("double", doubleExprValue), + Map.entry( + "struct_nested", + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("string", stringExprValue), + Map.entry("struct_nested_deep", structNestedDeepExprValue), + Map.entry("boolean", booleanExprValue))))))))); assertEquals(expected, actual); } From 94a4c8a6e689dc841d60edbaed24d65467b4401d Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 6 Feb 2025 09:53:52 -0800 Subject: [PATCH 44/81] Review comments - add `testStructNestedDeep` test case Signed-off-by: currantw --- .../planner/logical/LogicalFlattenTest.java | 96 +++++++++++-------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index b8907639be..0e98703826 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -8,6 +8,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -21,7 +22,6 @@ import org.opensearch.sql.analysis.AnalyzerTestBase; import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.ast.dsl.AstDSL; -import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -29,6 +29,7 @@ @ExtendWith(MockitoExtension.class) class LogicalFlattenTest extends AnalyzerTestBase { + // Test constants private static final String TABLE_NAME = "schema"; @Override @@ -41,8 +42,10 @@ protected Map typeMapping() { mapping.put("struct", STRUCT); mapping.put("struct.integer", INTEGER); mapping.put("struct.double", DOUBLE); - mapping.put("struct.nested_struct", STRUCT); - mapping.put("struct.nested_struct.string", STRING); + mapping.put("struct.struct_nested", STRUCT); + mapping.put("struct.struct_nested.string", STRING); + mapping.put("struct.struct_nested.struct_nested_deep", STRUCT); + mapping.put("struct.struct_nested.struct_nested_deep.boolean", BOOLEAN); mapping.put("duplicate", STRUCT); mapping.put("duplicate.integer_value", INTEGER); @@ -52,73 +55,90 @@ protected Map typeMapping() { @Test void testStructEmpty() { - LogicalPlan expectedLogicalPlan = - LogicalPlanDSL.flatten( - LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct_empty", STRUCT)); - LogicalPlan actualLogicalPlan = - analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct_empty"))); + executeFlatten("struct_empty"); - assertEquals(expectedLogicalPlan, actualLogicalPlan); + assertTypeDefined("struct_empty", STRUCT); } @Test void testStruct() { - LogicalPlan expectedLogicalPlan = - LogicalPlanDSL.flatten( - LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct", STRUCT)); - LogicalPlan actualLogicalPlan = - analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct"))); + executeFlatten("struct"); - assertEquals(expectedLogicalPlan, actualLogicalPlan); + assertTypeDefined("struct", STRUCT); + assertTypeDefined("struct.integer", INTEGER); + assertTypeDefined("struct.double", DOUBLE); + assertTypeDefined("struct.struct_nested", STRUCT); + assertTypeDefined("struct.struct_nested.string", STRING); + assertTypeDefined("struct", STRUCT); assertTypeDefined("integer", INTEGER); assertTypeDefined("double", DOUBLE); - assertTypeDefined("nested_struct", STRUCT); - assertTypeDefined("nested_struct.string", STRING); + assertTypeDefined("struct_nested", STRUCT); + assertTypeDefined("struct_nested.string", STRING); } @Test void testStructNested() { - LogicalPlan expectedLogicalPlan = - LogicalPlanDSL.flatten( - LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("struct.nested_struct", STRUCT)); - LogicalPlan actualLogicalPlan = - analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field("struct.nested_struct"))); - - assertEquals(expectedLogicalPlan, actualLogicalPlan); + executeFlatten("struct.struct_nested"); assertTypeDefined("struct", STRUCT); - assertTypeDefined("struct.integer", INTEGER); - assertTypeDefined("struct.double", DOUBLE); + assertTypeDefined("struct.struct_nested", STRUCT); + assertTypeDefined("struct.struct_nested.string", STRING); + assertTypeDefined("struct.string", STRING); } + @Test + void testStructNestedDeep() { + executeFlatten("struct.struct_nested.struct_nested_deep"); + + assertTypeDefined("struct", STRUCT); + assertTypeDefined("struct.struct_nested", STRUCT); + assertTypeDefined("struct.struct_nested.struct_nested_deep", STRUCT); + assertTypeDefined("struct.struct_nested.struct_nested_deep.boolean", BOOLEAN); + + assertTypeDefined("struct.struct_nested.boolean", BOOLEAN); + } + @Test void testInvalidName() { - Flatten actualUnresolvedPlan = - AstDSL.flatten(AstDSL.relation("schema"), AstDSL.field("invalid")); + Exception ex; - String msg = - assertThrows(SemanticCheckException.class, () -> analyze(actualUnresolvedPlan)) - .getMessage(); - assertEquals("can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env", msg); + ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("invalid")); + assertEquals( + "can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env", ex.getMessage()); + + ex = assertThrows(SemanticCheckException.class, () -> executeFlatten(".invalid")); + assertEquals( + "can't resolve Symbol(namespace=FIELD_NAME, name=.struct) in type env", ex.getMessage()); + + ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("invalid.")); + assertEquals( + "can't resolve Symbol(namespace=FIELD_NAME, name=struct.) in type env", ex.getMessage()); } @Test void testInvalidDuplicate() { - Flatten actualUnresolvedPlan = - AstDSL.flatten(AstDSL.relation("schema"), AstDSL.field("duplicate")); - String msg = - assertThrows(IllegalArgumentException.class, () -> analyze(actualUnresolvedPlan)) + assertThrows(IllegalArgumentException.class, () -> executeFlatten("duplicate")) .getMessage(); assertEquals("Flatten command cannot overwrite field 'integer_value'", msg); } /** - * Asserts that the given field name is defined in the type environment and corresponds to the - * given type. + * Builds the actual and expected logical plans by flattening the field with the given name, and + * tests whether they are equal. */ + private void executeFlatten(String fieldName) { + LogicalPlan expected = + LogicalPlanDSL.flatten( + LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref(fieldName, STRUCT)); + LogicalPlan actual = + analyze(AstDSL.flatten(AstDSL.relation(TABLE_NAME), AstDSL.field(fieldName))); + assertEquals(expected, actual); + } + + /** Asserts that the given field name is defined in the type environment with the given type. */ private void assertTypeDefined(String fieldName, ExprType fieldType) { Map fieldsMap = analysisContext.peek().lookupAllTupleFields(Namespace.FIELD_NAME); From 26563c91ee7f9d70b7d4fcb331819b5a1bc28cdd Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 6 Feb 2025 10:59:24 -0800 Subject: [PATCH 45/81] Woops! Fix failing test. Signed-off-by: currantw --- .../opensearch/sql/planner/logical/LogicalFlattenTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index 0e98703826..d8f386ec00 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -110,11 +110,11 @@ void testInvalidName() { ex = assertThrows(SemanticCheckException.class, () -> executeFlatten(".invalid")); assertEquals( - "can't resolve Symbol(namespace=FIELD_NAME, name=.struct) in type env", ex.getMessage()); + "can't resolve Symbol(namespace=FIELD_NAME, name=.invalid) in type env", ex.getMessage()); ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("invalid.")); assertEquals( - "can't resolve Symbol(namespace=FIELD_NAME, name=struct.) in type env", ex.getMessage()); + "can't resolve Symbol(namespace=FIELD_NAME, name=invalid.) in type env", ex.getMessage()); } @Test From bfb51a51bed45dd87a1ab4fc7716e24332ce7e0a Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 6 Feb 2025 12:57:47 -0800 Subject: [PATCH 46/81] Review comments - extract `PathUtils` constants Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 10 +++++----- ...ataSourceSchemaIdentifierNameResolver.java | 5 ++--- .../sql/expression/ReferenceExpression.java | 5 ++--- .../sql/planner/physical/FlattenOperator.java | 6 ++---- .../opensearch/sql/utils/ExpressionUtils.java | 2 -- .../org/opensearch/sql/utils/PathUtils.java | 19 +++++++++++++++++++ 6 files changed, 30 insertions(+), 17 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/utils/PathUtils.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 6b442cb37c..717fbd147c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -115,6 +115,7 @@ import org.opensearch.sql.planner.physical.datasource.DataSourceTable; import org.opensearch.sql.storage.Table; import org.opensearch.sql.utils.ParseUtils; +import org.opensearch.sql.utils.PathUtils; /** * Analyze the {@link UnresolvedPlan} in the {@link AnalysisContext} to construct the {@link @@ -516,11 +517,10 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { TypeEnvironment env = context.peek(); Map fieldsMap = env.lookupAllTupleFields(Namespace.FIELD_NAME); - final String pathSeparator = "."; - final String fieldDescendantPath = fieldName + pathSeparator; + final String fieldDescendantPath = fieldName + PathUtils.SEPARATOR; final Optional fieldParentPath = - fieldName.contains(pathSeparator) - ? Optional.of(fieldName.substring(0, fieldName.lastIndexOf(pathSeparator))) + fieldName.contains(PathUtils.SEPARATOR) + ? Optional.of(fieldName.substring(0, fieldName.lastIndexOf(PathUtils.SEPARATOR))) : Optional.empty(); for (String path : fieldsMap.keySet()) { @@ -533,7 +533,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // Build the new path. String newPath = path.substring(fieldDescendantPath.length()); if (fieldParentPath.isPresent()) { - newPath = fieldParentPath.get() + pathSeparator + newPath; + newPath = fieldParentPath.get() + PathUtils.SEPARATOR + newPath; } ExprType type = fieldsMap.get(path); diff --git a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java index 31719d2fe3..098e3bee36 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java +++ b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java @@ -9,6 +9,7 @@ import java.util.List; import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.utils.PathUtils; public class DataSourceSchemaIdentifierNameResolver { @@ -21,8 +22,6 @@ public class DataSourceSchemaIdentifierNameResolver { private final String identifierName; private final DataSourceService dataSourceService; - private static final String DOT = "."; - /** * Data model for capturing dataSourceName, schema and identifier from fully qualifiedName. In the * current state, it is used to capture DataSourceSchemaTable name and DataSourceSchemaFunction in @@ -35,7 +34,7 @@ public DataSourceSchemaIdentifierNameResolver( DataSourceService dataSourceService, List parts) { this.dataSourceService = dataSourceService; List remainingParts = captureSchemaName(captureDataSourceName(parts)); - identifierName = String.join(DOT, remainingParts); + identifierName = String.join(PathUtils.SEPARATOR, remainingParts); } public String getIdentifierName() { diff --git a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java index 22a4607152..1069606300 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java @@ -5,8 +5,6 @@ package org.opensearch.sql.expression; -import static org.opensearch.sql.utils.ExpressionUtils.PATH_SEP; - import java.util.Arrays; import java.util.List; import lombok.EqualsAndHashCode; @@ -17,6 +15,7 @@ import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.utils.PathUtils; @EqualsAndHashCode @RequiredArgsConstructor @@ -106,7 +105,7 @@ public ExprValue resolve(ExprTupleValue value) { } private ExprValue resolve(ExprValue value, List paths) { - ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, paths)); + ExprValue wholePathValue = value.keyValue(String.join(PathUtils.SEPARATOR, paths)); // For array types only first index currently supported. if (value.type().equals(ExprCoreType.ARRAY)) { wholePathValue = value.collectionValue().get(0).keyValue(paths.get(0)); diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 59468f3bbc..60d8bad284 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -9,7 +9,6 @@ import java.util.List; import java.util.Map; import java.util.regex.Matcher; -import java.util.regex.Pattern; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -18,6 +17,7 @@ import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.utils.PathUtils; /** Flattens the specified field from the input and returns the result. */ @Getter @@ -29,8 +29,6 @@ public class FlattenOperator extends PhysicalPlan { private final PhysicalPlan input; private final ReferenceExpression field; - private static final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); - @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { return visitor.visitFlatten(this, context); @@ -57,7 +55,7 @@ public ExprValue next() { */ private static ExprValue flattenExprValueAtPath(ExprValue exprValue, String path) { - Matcher matcher = PATH_SEPARATOR_PATTERN.matcher(path); + Matcher matcher = PathUtils.SEPARATOR_PATTERN.matcher(path); Map exprValueMap = ExprValueUtils.getTupleValue(exprValue); // [A] Flatten nested struct value diff --git a/core/src/main/java/org/opensearch/sql/utils/ExpressionUtils.java b/core/src/main/java/org/opensearch/sql/utils/ExpressionUtils.java index 8ae0c6ba88..19dd22fc16 100644 --- a/core/src/main/java/org/opensearch/sql/utils/ExpressionUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/ExpressionUtils.java @@ -14,8 +14,6 @@ @UtilityClass public class ExpressionUtils { - public static final String PATH_SEP = "."; - /** Format the list of {@link Expression}. */ public static String format(List expressionList) { return expressionList.stream().map(Expression::toString).collect(Collectors.joining(",")); diff --git a/core/src/main/java/org/opensearch/sql/utils/PathUtils.java b/core/src/main/java/org/opensearch/sql/utils/PathUtils.java new file mode 100644 index 0000000000..3414cdfcfb --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/PathUtils.java @@ -0,0 +1,19 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import java.util.regex.Pattern; +import lombok.experimental.UtilityClass; + +@UtilityClass +public class PathUtils { + + /** Path separator string */ + public final String SEPARATOR = "."; + + /** Pattern that matches the path separator string */ + public final Pattern SEPARATOR_PATTERN = Pattern.compile(SEPARATOR, Pattern.LITERAL); +} From 22eccaf6b0cb0fb36777d29257f8d03817c8c9db Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 6 Feb 2025 16:25:19 -0800 Subject: [PATCH 47/81] Review comments - update `Analyzer` to not use `Optional`. Signed-off-by: currantw --- .../java/org/opensearch/sql/analysis/Analyzer.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 717fbd147c..1384c4cb7e 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -518,10 +518,10 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { Map fieldsMap = env.lookupAllTupleFields(Namespace.FIELD_NAME); final String fieldDescendantPath = fieldName + PathUtils.SEPARATOR; - final Optional fieldParentPath = + final String fieldParentPath = fieldName.contains(PathUtils.SEPARATOR) - ? Optional.of(fieldName.substring(0, fieldName.lastIndexOf(PathUtils.SEPARATOR))) - : Optional.empty(); + ? fieldName.substring(0, fieldName.lastIndexOf(PathUtils.SEPARATOR)) + : ""; for (String path : fieldsMap.keySet()) { @@ -532,8 +532,8 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // Build the new path. String newPath = path.substring(fieldDescendantPath.length()); - if (fieldParentPath.isPresent()) { - newPath = fieldParentPath.get() + PathUtils.SEPARATOR + newPath; + if (!fieldParentPath.isEmpty()) { + newPath = String.join(PathUtils.SEPARATOR, fieldParentPath, newPath); } ExprType type = fieldsMap.get(path); From dcd241a320dc4f95a33ca4c884de9a8ee54c4dd3 Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 7 Feb 2025 09:13:28 -0800 Subject: [PATCH 48/81] Bunch of additional review comments. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 109 +++++++++--------- ...ataSourceSchemaIdentifierNameResolver.java | 5 +- .../sql/expression/ReferenceExpression.java | 5 +- .../sql/expression/ip/IPFunctionsTest.java | 1 - .../planner/logical/LogicalFlattenTest.java | 16 ++- 5 files changed, 70 insertions(+), 66 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 1384c4cb7e..1b4e6b0a55 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -6,6 +6,9 @@ package org.opensearch.sql.analysis; import static org.opensearch.sql.analysis.DataSourceSchemaIdentifierNameResolver.DEFAULT_DATASOURCE_NAME; +import static org.opensearch.sql.analysis.symbol.Namespace.FIELD_NAME; +import static org.opensearch.sql.analysis.symbol.Namespace.HIDDEN_FIELD_NAME; +import static org.opensearch.sql.analysis.symbol.Namespace.INDEX_NAME; import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST; import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_LAST; import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; @@ -18,6 +21,7 @@ import static org.opensearch.sql.utils.MLCommonsConstants.RCF_ANOMALY_GRADE; import static org.opensearch.sql.utils.MLCommonsConstants.RCF_SCORE; import static org.opensearch.sql.utils.MLCommonsConstants.TIME_FIELD; +import static org.opensearch.sql.utils.PathUtils.SEPARATOR; import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; import com.google.common.collect.ImmutableList; @@ -35,7 +39,6 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.DataSourceSchemaName; -import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.analysis.symbol.Symbol; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Argument; @@ -115,7 +118,6 @@ import org.opensearch.sql.planner.physical.datasource.DataSourceTable; import org.opensearch.sql.storage.Table; import org.opensearch.sql.utils.ParseUtils; -import org.opensearch.sql.utils.PathUtils; /** * Analyze the {@link UnresolvedPlan} in the {@link AnalysisContext} to construct the {@link @@ -171,16 +173,15 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { dataSourceSchemaIdentifierNameResolver.getSchemaName()), dataSourceSchemaIdentifierNameResolver.getIdentifierName()); } - table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); + table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(FIELD_NAME, k), v)); table .getReservedFieldTypes() - .forEach((k, v) -> curEnv.define(new Symbol(Namespace.HIDDEN_FIELD_NAME, k), v)); + .forEach((k, v) -> curEnv.define(new Symbol(HIDDEN_FIELD_NAME, k), v)); // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. curEnv.define( - new Symbol(Namespace.INDEX_NAME, (node.getAlias() == null) ? tableName : node.getAlias()), - STRUCT); + new Symbol(INDEX_NAME, (node.getAlias() == null) ? tableName : node.getAlias()), STRUCT); return new LogicalRelation(tableName, table); } @@ -193,7 +194,7 @@ public LogicalPlan visitRelationSubquery(RelationSubquery node, AnalysisContext // Put subquery alias in index namespace so the qualifier can be removed // when analyzing qualified name in the subquery layer - curEnv.define(new Symbol(Namespace.INDEX_NAME, node.getAliasAsTableName()), STRUCT); + curEnv.define(new Symbol(INDEX_NAME, node.getAliasAsTableName()), STRUCT); return subquery; } @@ -225,14 +226,12 @@ public LogicalPlan visitTableFunction(TableFunction node, AnalysisContext contex context.push(); TypeEnvironment curEnv = context.peek(); Table table = tableFunctionImplementation.applyArguments(); - table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); + table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(FIELD_NAME, k), v)); table .getReservedFieldTypes() - .forEach((k, v) -> curEnv.define(new Symbol(Namespace.HIDDEN_FIELD_NAME, k), v)); + .forEach((k, v) -> curEnv.define(new Symbol(HIDDEN_FIELD_NAME, k), v)); curEnv.define( - new Symbol( - Namespace.INDEX_NAME, dataSourceSchemaIdentifierNameResolver.getIdentifierName()), - STRUCT); + new Symbol(INDEX_NAME, dataSourceSchemaIdentifierNameResolver.getIdentifierName()), STRUCT); return new LogicalRelation( dataSourceSchemaIdentifierNameResolver.getIdentifierName(), tableFunctionImplementation.applyArguments()); @@ -334,11 +333,9 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) { TypeEnvironment newEnv = context.peek(); aggregators.forEach( aggregator -> - newEnv.define( - new Symbol(Namespace.FIELD_NAME, aggregator.getName()), aggregator.type())); + newEnv.define(new Symbol(FIELD_NAME, aggregator.getName()), aggregator.type())); groupBys.forEach( - group -> - newEnv.define(new Symbol(Namespace.FIELD_NAME, group.getNameOrAlias()), group.type())); + group -> newEnv.define(new Symbol(FIELD_NAME, group.getNameOrAlias()), group.type())); return new LogicalAggregation(child, aggregators, groupBys); } @@ -363,9 +360,8 @@ public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) { context.push(); TypeEnvironment newEnv = context.peek(); groupBys.forEach( - group -> newEnv.define(new Symbol(Namespace.FIELD_NAME, group.toString()), group.type())); - fields.forEach( - field -> newEnv.define(new Symbol(Namespace.FIELD_NAME, field.toString()), field.type())); + group -> newEnv.define(new Symbol(FIELD_NAME, group.toString()), group.type())); + fields.forEach(field -> newEnv.define(new Symbol(FIELD_NAME, field.toString()), field.type())); List options = node.getNoOfResults(); Integer noOfResults = (Integer) options.get(0).getValue().getValue(); @@ -431,8 +427,7 @@ public LogicalPlan visitProject(Project node, AnalysisContext context) { context.push(); TypeEnvironment newEnv = context.peek(); namedExpressions.forEach( - expr -> - newEnv.define(new Symbol(Namespace.FIELD_NAME, expr.getNameOrAlias()), expr.type())); + expr -> newEnv.define(new Symbol(FIELD_NAME, expr.getNameOrAlias()), expr.type())); List namedParseExpressions = context.getNamedParseExpressions(); return new LogicalProject(child, namedExpressions, namedParseExpressions); } @@ -512,50 +507,51 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // determining the new paths, we need to preserve the portion of the path corresponding to the // flattened field's parent, if one exists, in order to support flattening nested structs. - Map addFieldsMap = new HashMap<>(); - TypeEnvironment env = context.peek(); - Map fieldsMap = env.lookupAllTupleFields(Namespace.FIELD_NAME); + Map fieldsMap = env.lookupAllTupleFields(FIELD_NAME); - final String fieldDescendantPath = fieldName + PathUtils.SEPARATOR; - final String fieldParentPath = - fieldName.contains(PathUtils.SEPARATOR) - ? fieldName.substring(0, fieldName.lastIndexOf(PathUtils.SEPARATOR)) + final String fieldParentPathPrefix = + fieldName.contains(SEPARATOR) + ? fieldName.substring(0, fieldName.lastIndexOf(SEPARATOR)) + SEPARATOR : ""; - for (String path : fieldsMap.keySet()) { + // Get entries for paths that are descended from the flattened field. + final String fieldDescendantPathPrefix = fieldName + SEPARATOR; + List> fieldDescendantEntries = + fieldsMap.entrySet().stream() + .filter(e -> e.getKey().startsWith(fieldDescendantPathPrefix)) + .toList(); - // Verify that the path is descended from the flattened field. - if (!path.startsWith(fieldDescendantPath)) { - continue; - } + // Get fields to add from descendant entries. + Map addFieldsMap = new HashMap<>(); + for (Map.Entry entry : fieldDescendantEntries) { + String path = entry.getKey(); // Build the new path. - String newPath = path.substring(fieldDescendantPath.length()); - if (!fieldParentPath.isEmpty()) { - newPath = String.join(PathUtils.SEPARATOR, fieldParentPath, newPath); + String newPath = path.substring(fieldDescendantPathPrefix.length()); + if (!fieldParentPathPrefix.isEmpty()) { + newPath = fieldParentPathPrefix + newPath; } - ExprType type = fieldsMap.get(path); - addFieldsMap.put(newPath, type); + addFieldsMap.put(newPath, entry.getValue()); } // [B] Add new fields to type environment // -------------------------------------- - for (Map.Entry entry : addFieldsMap.entrySet()) { - String name = entry.getKey(); - ExprType type = entry.getValue(); + // Verify that new fields do not overwrite an existing field. + List duplicateFieldNames = + addFieldsMap.keySet().stream().filter(fieldsMap::containsKey).toList(); - // Verify that new field does not overwrite an existing field. - if (fieldsMap.containsKey(name)) { - throw new IllegalArgumentException( - StringUtils.format("Flatten command cannot overwrite field '%s'", name)); - } - - env.define(DSL.ref(name, type)); + if (!duplicateFieldNames.isEmpty()) { + throw new SemanticCheckException( + StringUtils.format( + "Flatten command cannot overwrite fields: %s", + String.join(", ", duplicateFieldNames))); } + addFieldsMap.forEach((name, type) -> env.define(DSL.ref(name, type))); + return new LogicalFlatten(child, fieldExpr); } @@ -576,7 +572,7 @@ public LogicalPlan visitParse(Parse node, AnalysisContext context) { ParseExpression expr = ParseUtils.createParseExpression( parseMethod, sourceField, patternExpression, DSL.literal(group)); - curEnv.define(new Symbol(Namespace.FIELD_NAME, group), expr.type()); + curEnv.define(new Symbol(FIELD_NAME, group), expr.type()); context.getNamedParseExpressions().add(new NamedExpression(group, expr)); }); return child; @@ -635,7 +631,7 @@ public LogicalPlan visitKmeans(Kmeans node, AnalysisContext context) { java.util.Map options = node.getArguments(); TypeEnvironment currentEnv = context.peek(); - currentEnv.define(new Symbol(Namespace.FIELD_NAME, "ClusterID"), ExprCoreType.INTEGER); + currentEnv.define(new Symbol(FIELD_NAME, "ClusterID"), ExprCoreType.INTEGER); return new LogicalMLCommons(child, "kmeans", options); } @@ -648,13 +644,13 @@ public LogicalPlan visitAD(AD node, AnalysisContext context) { TypeEnvironment currentEnv = context.peek(); - currentEnv.define(new Symbol(Namespace.FIELD_NAME, RCF_SCORE), ExprCoreType.DOUBLE); + currentEnv.define(new Symbol(FIELD_NAME, RCF_SCORE), ExprCoreType.DOUBLE); if (Objects.isNull(node.getArguments().get(TIME_FIELD))) { - currentEnv.define(new Symbol(Namespace.FIELD_NAME, RCF_ANOMALOUS), ExprCoreType.BOOLEAN); + currentEnv.define(new Symbol(FIELD_NAME, RCF_ANOMALOUS), ExprCoreType.BOOLEAN); } else { - currentEnv.define(new Symbol(Namespace.FIELD_NAME, RCF_ANOMALY_GRADE), ExprCoreType.DOUBLE); + currentEnv.define(new Symbol(FIELD_NAME, RCF_ANOMALY_GRADE), ExprCoreType.DOUBLE); currentEnv.define( - new Symbol(Namespace.FIELD_NAME, (String) node.getArguments().get(TIME_FIELD).getValue()), + new Symbol(FIELD_NAME, (String) node.getArguments().get(TIME_FIELD).getValue()), ExprCoreType.TIMESTAMP); } return new LogicalAD(child, options); @@ -689,8 +685,7 @@ public LogicalPlan visitML(ML node, AnalysisContext context) { LogicalPlan child = node.getChild().get(0).accept(this, context); TypeEnvironment currentEnv = context.peek(); node.getOutputSchema(currentEnv).entrySet().stream() - .forEach( - v -> currentEnv.define(new Symbol(Namespace.FIELD_NAME, v.getKey()), v.getValue())); + .forEach(v -> currentEnv.define(new Symbol(FIELD_NAME, v.getKey()), v.getValue())); return new LogicalML(child, node.getArguments()); } @@ -731,7 +726,7 @@ public LogicalPlan visitTrendline(Trendline node, AnalysisContext context) { resolvedField.type().typeName())); } } - currEnv.define(new Symbol(Namespace.FIELD_NAME, computation.getAlias()), averageType); + currEnv.define(new Symbol(FIELD_NAME, computation.getAlias()), averageType); computationsAndTypes.add(Pair.of(computation, averageType)); }); diff --git a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java index 098e3bee36..baec575206 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java +++ b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java @@ -7,9 +7,10 @@ package org.opensearch.sql.analysis; +import static org.opensearch.sql.utils.PathUtils.SEPARATOR; + import java.util.List; import org.opensearch.sql.datasource.DataSourceService; -import org.opensearch.sql.utils.PathUtils; public class DataSourceSchemaIdentifierNameResolver { @@ -34,7 +35,7 @@ public DataSourceSchemaIdentifierNameResolver( DataSourceService dataSourceService, List parts) { this.dataSourceService = dataSourceService; List remainingParts = captureSchemaName(captureDataSourceName(parts)); - identifierName = String.join(PathUtils.SEPARATOR, remainingParts); + identifierName = String.join(SEPARATOR, remainingParts); } public String getIdentifierName() { diff --git a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java index 1069606300..62a8a43c91 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java @@ -5,6 +5,8 @@ package org.opensearch.sql.expression; +import static org.opensearch.sql.utils.PathUtils.SEPARATOR; + import java.util.Arrays; import java.util.List; import lombok.EqualsAndHashCode; @@ -15,7 +17,6 @@ import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.env.Environment; -import org.opensearch.sql.utils.PathUtils; @EqualsAndHashCode @RequiredArgsConstructor @@ -105,7 +106,7 @@ public ExprValue resolve(ExprTupleValue value) { } private ExprValue resolve(ExprValue value, List paths) { - ExprValue wholePathValue = value.keyValue(String.join(PathUtils.SEPARATOR, paths)); + ExprValue wholePathValue = value.keyValue(String.join(SEPARATOR, paths)); // For array types only first index currently supported. if (value.type().equals(ExprCoreType.ARRAY)) { wholePathValue = value.collectionValue().get(0).keyValue(paths.get(0)); diff --git a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java index 95211d004b..2de781cfbb 100644 --- a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java @@ -26,7 +26,6 @@ import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.env.Environment; -@ToString @ExtendWith(MockitoExtension.class) class IPFunctionsTest { diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index d8f386ec00..ecafd31124 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -50,6 +50,10 @@ protected Map typeMapping() { mapping.put("duplicate", STRUCT); mapping.put("duplicate.integer_value", INTEGER); + mapping.put("duplicate_2", STRUCT); + mapping.put("duplicate_2.integer_value", INTEGER); + mapping.put("duplicate_2.double_value", INTEGER); + return mapping; } @@ -119,10 +123,14 @@ void testInvalidName() { @Test void testInvalidDuplicate() { - String msg = - assertThrows(IllegalArgumentException.class, () -> executeFlatten("duplicate")) - .getMessage(); - assertEquals("Flatten command cannot overwrite field 'integer_value'", msg); + Exception ex; + + ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("duplicate")); + assertEquals("Flatten command cannot overwrite fields: integer_value", ex.getMessage()); + + ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("duplicate_2")); + assertEquals( + "Flatten command cannot overwrite fields: integer_value, double_value", ex.getMessage()); } /** From befe55b03bb467767ca6d378284671c5111a9925 Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 7 Feb 2025 09:13:45 -0800 Subject: [PATCH 49/81] Spotless Signed-off-by: currantw --- .../java/org/opensearch/sql/expression/ip/IPFunctionsTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java index 2de781cfbb..e3a7121852 100644 --- a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java @@ -13,7 +13,6 @@ import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static org.opensearch.sql.data.type.ExprCoreType.IP; -import lombok.ToString; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; From eb93cb155afb2be67ed8760bd1389cb4b70d2c25 Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 7 Feb 2025 09:41:29 -0800 Subject: [PATCH 50/81] Spotless Signed-off-by: currantw --- .../sql/planner/physical/FlattenOperator.java | 20 ++++++++++--------- .../planner/logical/LogicalFlattenTest.java | 16 +++------------ 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 60d8bad284..f187a0611b 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -5,6 +5,8 @@ package org.opensearch.sql.planner.physical; +import static org.opensearch.sql.utils.PathUtils.SEPARATOR_PATTERN; + import java.util.Collections; import java.util.List; import java.util.Map; @@ -17,7 +19,6 @@ import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; -import org.opensearch.sql.utils.PathUtils; /** Flattens the specified field from the input and returns the result. */ @Getter @@ -50,13 +51,14 @@ public ExprValue next() { } /** - * Flattens the {@link ExprTupleValue} at the specified path and returns the update value. If the - * value is null or missing, the unmodified value is returned. + * Flattens the {@link ExprTupleValue} at the specified path within the given root value and + * returns the result. Returns the unmodified root value if it does not contain a value at the + * specified path. */ - private static ExprValue flattenExprValueAtPath(ExprValue exprValue, String path) { + private static ExprValue flattenExprValueAtPath(ExprValue rootExprValue, String path) { - Matcher matcher = PathUtils.SEPARATOR_PATTERN.matcher(path); - Map exprValueMap = ExprValueUtils.getTupleValue(exprValue); + Matcher matcher = SEPARATOR_PATTERN.matcher(path); + Map exprValueMap = ExprValueUtils.getTupleValue(rootExprValue); // [A] Flatten nested struct value // ------------------------------- @@ -66,12 +68,12 @@ private static ExprValue flattenExprValueAtPath(ExprValue exprValue, String path String remainingPath = path.substring(matcher.end()); if (!exprValueMap.containsKey(currentPathComponent)) { - return exprValue; + return rootExprValue; } ExprValue childExprValue = exprValueMap.get(currentPathComponent); if (childExprValue.isNull() || childExprValue.isMissing()) { - return exprValue; + return rootExprValue; } ExprValue flattenedExprValue = @@ -84,7 +86,7 @@ private static ExprValue flattenExprValueAtPath(ExprValue exprValue, String path // ------------------------------ if (!exprValueMap.containsKey(path)) { - return exprValue; + return rootExprValue; } ExprValue childExprValue = exprValueMap.get(path); diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index ecafd31124..f291165559 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -106,19 +106,9 @@ void testStructNestedDeep() { @Test void testInvalidName() { - Exception ex; - - ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("invalid")); - assertEquals( - "can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env", ex.getMessage()); - - ex = assertThrows(SemanticCheckException.class, () -> executeFlatten(".invalid")); - assertEquals( - "can't resolve Symbol(namespace=FIELD_NAME, name=.invalid) in type env", ex.getMessage()); - - ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("invalid.")); - assertEquals( - "can't resolve Symbol(namespace=FIELD_NAME, name=invalid.) in type env", ex.getMessage()); + assertThrows(SemanticCheckException.class, () -> executeFlatten("invalid")); + assertThrows(SemanticCheckException.class, () -> executeFlatten(".invalid")); + assertThrows(SemanticCheckException.class, () -> executeFlatten("invalid.")); } @Test From 1f05e85958f84af474d28cfa500a230c03d5775d Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 7 Feb 2025 12:54:51 -0800 Subject: [PATCH 51/81] Additional review comments, including move constants to `ExprValueUtils`. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 19 +++++++------------ ...ataSourceSchemaIdentifierNameResolver.java | 5 ++--- .../sql/data/model/ExprValueUtils.java | 10 ++++++++++ .../sql/expression/ReferenceExpression.java | 6 +++--- .../sql/planner/physical/FlattenOperator.java | 6 ++---- .../org/opensearch/sql/utils/PathUtils.java | 19 ------------------- 6 files changed, 24 insertions(+), 41 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/utils/PathUtils.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 1b4e6b0a55..f8775adb1b 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -21,7 +21,6 @@ import static org.opensearch.sql.utils.MLCommonsConstants.RCF_ANOMALY_GRADE; import static org.opensearch.sql.utils.MLCommonsConstants.RCF_SCORE; import static org.opensearch.sql.utils.MLCommonsConstants.TIME_FIELD; -import static org.opensearch.sql.utils.PathUtils.SEPARATOR; import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; import com.google.common.collect.ImmutableList; @@ -77,6 +76,7 @@ import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.data.model.ExprMissingValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.datasource.DataSourceService; @@ -511,12 +511,13 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { Map fieldsMap = env.lookupAllTupleFields(FIELD_NAME); final String fieldParentPathPrefix = - fieldName.contains(SEPARATOR) - ? fieldName.substring(0, fieldName.lastIndexOf(SEPARATOR)) + SEPARATOR + fieldName.contains(ExprValueUtils.QUALIFIED_NAME_SEPARATOR) + ? fieldName.substring(0, fieldName.lastIndexOf(ExprValueUtils.QUALIFIED_NAME_SEPARATOR)) + + ExprValueUtils.QUALIFIED_NAME_SEPARATOR : ""; // Get entries for paths that are descended from the flattened field. - final String fieldDescendantPathPrefix = fieldName + SEPARATOR; + final String fieldDescendantPathPrefix = fieldName + ExprValueUtils.QUALIFIED_NAME_SEPARATOR; List> fieldDescendantEntries = fieldsMap.entrySet().stream() .filter(e -> e.getKey().startsWith(fieldDescendantPathPrefix)) @@ -525,14 +526,8 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { // Get fields to add from descendant entries. Map addFieldsMap = new HashMap<>(); for (Map.Entry entry : fieldDescendantEntries) { - String path = entry.getKey(); - - // Build the new path. - String newPath = path.substring(fieldDescendantPathPrefix.length()); - if (!fieldParentPathPrefix.isEmpty()) { - newPath = fieldParentPathPrefix + newPath; - } - + String newPath = + fieldParentPathPrefix + entry.getKey().substring(fieldDescendantPathPrefix.length()); addFieldsMap.put(newPath, entry.getValue()); } diff --git a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java index baec575206..99f0453427 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java +++ b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java @@ -7,9 +7,8 @@ package org.opensearch.sql.analysis; -import static org.opensearch.sql.utils.PathUtils.SEPARATOR; - import java.util.List; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.datasource.DataSourceService; public class DataSourceSchemaIdentifierNameResolver { @@ -35,7 +34,7 @@ public DataSourceSchemaIdentifierNameResolver( DataSourceService dataSourceService, List parts) { this.dataSourceService = dataSourceService; List remainingParts = captureSchemaName(captureDataSourceName(parts)); - identifierName = String.join(SEPARATOR, remainingParts); + identifierName = String.join(ExprValueUtils.QUALIFIED_NAME_SEPARATOR, remainingParts); } public String getIdentifierName() { diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index 890e0ef8d5..36be8dc648 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -16,6 +16,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; import lombok.experimental.UtilityClass; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; @@ -23,11 +24,20 @@ /** The definition of {@link ExprValue} factory. */ @UtilityClass public class ExprValueUtils { + + // Literal constants public static final ExprValue LITERAL_TRUE = ExprBooleanValue.of(true); public static final ExprValue LITERAL_FALSE = ExprBooleanValue.of(false); public static final ExprValue LITERAL_NULL = ExprNullValue.of(); public static final ExprValue LITERAL_MISSING = ExprMissingValue.of(); + /** Qualified name separator string */ + public final String QUALIFIED_NAME_SEPARATOR = "."; + + /** Pattern that matches the qualified name separator string */ + public final Pattern QUALIFIED_NAME_SEPARATOR_PATTERN = + Pattern.compile(QUALIFIED_NAME_SEPARATOR, Pattern.LITERAL); + public static ExprValue booleanValue(Boolean value) { return value ? LITERAL_TRUE : LITERAL_FALSE; } diff --git a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java index 62a8a43c91..c249b426f6 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java @@ -5,8 +5,6 @@ package org.opensearch.sql.expression; -import static org.opensearch.sql.utils.PathUtils.SEPARATOR; - import java.util.Arrays; import java.util.List; import lombok.EqualsAndHashCode; @@ -14,6 +12,7 @@ import lombok.RequiredArgsConstructor; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.env.Environment; @@ -106,7 +105,8 @@ public ExprValue resolve(ExprTupleValue value) { } private ExprValue resolve(ExprValue value, List paths) { - ExprValue wholePathValue = value.keyValue(String.join(SEPARATOR, paths)); + ExprValue wholePathValue = + value.keyValue(String.join(ExprValueUtils.QUALIFIED_NAME_SEPARATOR, paths)); // For array types only first index currently supported. if (value.type().equals(ExprCoreType.ARRAY)) { wholePathValue = value.collectionValue().get(0).keyValue(paths.get(0)); diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index f187a0611b..e42d8b65bd 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -5,8 +5,6 @@ package org.opensearch.sql.planner.physical; -import static org.opensearch.sql.utils.PathUtils.SEPARATOR_PATTERN; - import java.util.Collections; import java.util.List; import java.util.Map; @@ -53,11 +51,11 @@ public ExprValue next() { /** * Flattens the {@link ExprTupleValue} at the specified path within the given root value and * returns the result. Returns the unmodified root value if it does not contain a value at the - * specified path. + * specified path. rootExprValue is expected to be an {@link ExprTupleValue}. */ private static ExprValue flattenExprValueAtPath(ExprValue rootExprValue, String path) { - Matcher matcher = SEPARATOR_PATTERN.matcher(path); + Matcher matcher = ExprValueUtils.QUALIFIED_NAME_SEPARATOR_PATTERN.matcher(path); Map exprValueMap = ExprValueUtils.getTupleValue(rootExprValue); // [A] Flatten nested struct value diff --git a/core/src/main/java/org/opensearch/sql/utils/PathUtils.java b/core/src/main/java/org/opensearch/sql/utils/PathUtils.java deleted file mode 100644 index 3414cdfcfb..0000000000 --- a/core/src/main/java/org/opensearch/sql/utils/PathUtils.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.utils; - -import java.util.regex.Pattern; -import lombok.experimental.UtilityClass; - -@UtilityClass -public class PathUtils { - - /** Path separator string */ - public final String SEPARATOR = "."; - - /** Pattern that matches the path separator string */ - public final Pattern SEPARATOR_PATTERN = Pattern.compile(SEPARATOR, Pattern.LITERAL); -} From db96c5103db054c5ea7b3fb067e8f2f08c5c38f4 Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 7 Feb 2025 14:21:33 -0800 Subject: [PATCH 52/81] Review comments - update tests for exception msg Signed-off-by: currantw --- .../sql/planner/physical/FlattenOperator.java | 5 +++-- .../sql/planner/logical/LogicalFlattenTest.java | 15 +++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index e42d8b65bd..16a4c1dd87 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -88,10 +88,11 @@ private static ExprValue flattenExprValueAtPath(ExprValue rootExprValue, String } ExprValue childExprValue = exprValueMap.get(path); - if (!childExprValue.isNull() && !childExprValue.isMissing()) { - exprValueMap.putAll(ExprValueUtils.getTupleValue(childExprValue)); + if (childExprValue.isNull() || childExprValue.isMissing()) { + return rootExprValue; } + exprValueMap.putAll(ExprValueUtils.getTupleValue(childExprValue)); return ExprTupleValue.fromExprValueMap(exprValueMap); } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java index f291165559..75a800af00 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalFlattenTest.java @@ -113,14 +113,17 @@ void testInvalidName() { @Test void testInvalidDuplicate() { - Exception ex; + String msg; - ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("duplicate")); - assertEquals("Flatten command cannot overwrite fields: integer_value", ex.getMessage()); + msg = + assertThrows(SemanticCheckException.class, () -> executeFlatten("duplicate")).getMessage(); + assertTrue(msg.contains("integer_value")); - ex = assertThrows(SemanticCheckException.class, () -> executeFlatten("duplicate_2")); - assertEquals( - "Flatten command cannot overwrite fields: integer_value, double_value", ex.getMessage()); + msg = + assertThrows(SemanticCheckException.class, () -> executeFlatten("duplicate_2")) + .getMessage(); + assertTrue(msg.contains("integer_value")); + assertTrue(msg.contains("double_value")); } /** From c1666ee6aefd78790ce204cf0595d6f9e90a662a Mon Sep 17 00:00:00 2001 From: currantw Date: Fri, 7 Feb 2025 14:46:17 -0800 Subject: [PATCH 53/81] Review comments - simplify `FlattenOperator.flattenExprValueAtPath`. Signed-off-by: currantw --- .../sql/planner/physical/FlattenOperator.java | 48 ++++++++----------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 16a4c1dd87..c529920d3e 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -55,44 +55,36 @@ public ExprValue next() { */ private static ExprValue flattenExprValueAtPath(ExprValue rootExprValue, String path) { - Matcher matcher = ExprValueUtils.QUALIFIED_NAME_SEPARATOR_PATTERN.matcher(path); Map exprValueMap = ExprValueUtils.getTupleValue(rootExprValue); - // [A] Flatten nested struct value - // ------------------------------- - - if (matcher.find()) { - String currentPathComponent = path.substring(0, matcher.start()); - String remainingPath = path.substring(matcher.end()); - - if (!exprValueMap.containsKey(currentPathComponent)) { - return rootExprValue; - } - - ExprValue childExprValue = exprValueMap.get(currentPathComponent); - if (childExprValue.isNull() || childExprValue.isMissing()) { - return rootExprValue; - } - - ExprValue flattenedExprValue = - flattenExprValueAtPath(exprValueMap.get(currentPathComponent), remainingPath); - exprValueMap.put(currentPathComponent, flattenedExprValue); - return ExprTupleValue.fromExprValueMap(exprValueMap); - } - - // [B] Flatten child struct value - // ------------------------------ + // Get current path component. + Matcher matcher = ExprValueUtils.QUALIFIED_NAME_SEPARATOR_PATTERN.matcher(path); + boolean fieldIsNested = matcher.find(); + String currentPathComponent = fieldIsNested ? path.substring(0, matcher.start()) : path; - if (!exprValueMap.containsKey(path)) { + // Check for undefined, null, or missing values. + if (!exprValueMap.containsKey(currentPathComponent)) { return rootExprValue; } - ExprValue childExprValue = exprValueMap.get(path); + ExprValue childExprValue = exprValueMap.get(currentPathComponent); if (childExprValue.isNull() || childExprValue.isMissing()) { return rootExprValue; } - exprValueMap.putAll(ExprValueUtils.getTupleValue(childExprValue)); + // Get flattened values and add them to the field map. + Map flattenedExprValueMap; + if (fieldIsNested) { + String remainingPath = path.substring(matcher.end()); + flattenedExprValueMap = + Map.of( + currentPathComponent, + flattenExprValueAtPath(exprValueMap.get(currentPathComponent), remainingPath)); + } else { + flattenedExprValueMap = ExprValueUtils.getTupleValue(childExprValue); + } + + exprValueMap.putAll(flattenedExprValueMap); return ExprTupleValue.fromExprValueMap(exprValueMap); } } From 6e176a3e00819408384d925783e5a246fa1cadb7 Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 10 Feb 2025 13:40:23 -0800 Subject: [PATCH 54/81] Change braces in documentation. Signed-off-by: currantw --- .../java/org/opensearch/sql/analysis/Analyzer.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index f8775adb1b..18622e5bdb 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -458,37 +458,37 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { *

Input Data: * *

-   * [
+   * {
    *   struct: {
    *     integer: 0,
    *     nested_struct: { string: "value" }
    *   }
-   * ]
+   * }
    * 
* * Query 1: flatten struct * *
-   * [
+   * {
    *   struct: {
    *     integer: 0,
    *     nested_struct: { string: "value" }
    *   },
    *   integer: 0,
    *   nested_struct: { string: "value" }
-   * ]
+   * }
    * 
* * Query 2: flatten struct.nested_struct * *
-   * [
+   * {
    *   struct: {
    *     integer: 0,
    *     nested_struct: { string: "value" },
    *     string: "value"
    *   }
-   * ]
+   * }
    * 
*/ @Override From ab5a2fef767b8109213a6912a37f22a47be435c7 Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 3 Feb 2025 13:23:19 -0800 Subject: [PATCH 55/81] Initial implementation of skeleton classes and methods. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 27 ++++++++-- .../sql/ast/AbstractNodeVisitor.java | 5 ++ .../org/opensearch/sql/ast/dsl/AstDSL.java | 5 ++ .../org/opensearch/sql/ast/tree/Expand.java | 40 ++++++++++++++ .../org/opensearch/sql/executor/Explain.java | 9 ++++ .../sql/planner/DefaultImplementor.java | 7 +++ .../sql/planner/logical/LogicalExpand.java | 30 +++++++++++ .../sql/planner/logical/LogicalPlanDSL.java | 4 ++ .../logical/LogicalPlanNodeVisitor.java | 4 ++ .../sql/planner/physical/ExpandOperator.java | 54 +++++++++++++++++++ .../sql/planner/physical/PhysicalPlanDSL.java | 4 ++ .../physical/PhysicalPlanNodeVisitor.java | 4 ++ .../opensearch/sql/executor/ExplainTest.java | 20 +++++++ .../sql/planner/DefaultImplementorTest.java | 21 ++++++++ .../planner/logical/LogicalExpandTest.java | 16 ++++++ .../planner/physical/ExpandOperatorTest.java | 22 ++++++++ .../physical/PhysicalPlanNodeVisitorTest.java | 4 ++ docs/category.json | 1 + docs/user/ppl/cmd/expand.rst | 5 ++ docs/user/ppl/index.rst | 2 + .../opensearch/sql/ppl/ExpandCommandIT.java | 11 ++++ .../org/opensearch/sql/ppl/ExplainIT.java | 6 +++ .../expectedOutput/ppl/explain_expand.json | 1 + .../OpenSearchExecutionProtector.java | 6 +++ .../OpenSearchExecutionProtectorTest.java | 11 ++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 8 ++- .../opensearch/sql/ppl/parser/AstBuilder.java | 7 +++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 9 ++++ .../sql/ppl/parser/AstBuilderTest.java | 9 ++++ .../ppl/parser/AstExpressionBuilderTest.java | 9 ++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 5 ++ 32 files changed, 361 insertions(+), 6 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Expand.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java create mode 100644 core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java create mode 100644 core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java create mode 100644 docs/user/ppl/cmd/expand.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 18622e5bdb..7301ce442e 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -38,6 +38,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.DataSourceSchemaName; +import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.analysis.symbol.Symbol; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Argument; @@ -52,6 +53,7 @@ import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.Expand; import org.opensearch.sql.ast.tree.FetchCursor; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; @@ -333,9 +335,11 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) { TypeEnvironment newEnv = context.peek(); aggregators.forEach( aggregator -> - newEnv.define(new Symbol(FIELD_NAME, aggregator.getName()), aggregator.type())); + newEnv.define( + new Symbol(Namespace.FIELD_NAME, aggregator.getName()), aggregator.type())); groupBys.forEach( - group -> newEnv.define(new Symbol(FIELD_NAME, group.getNameOrAlias()), group.type())); + group -> + newEnv.define(new Symbol(Namespace.FIELD_NAME, group.getNameOrAlias()), group.type())); return new LogicalAggregation(child, aggregators, groupBys); } @@ -360,8 +364,9 @@ public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) { context.push(); TypeEnvironment newEnv = context.peek(); groupBys.forEach( - group -> newEnv.define(new Symbol(FIELD_NAME, group.toString()), group.type())); - fields.forEach(field -> newEnv.define(new Symbol(FIELD_NAME, field.toString()), field.type())); + group -> newEnv.define(new Symbol(Namespace.FIELD_NAME, group.toString()), group.type())); + fields.forEach( + field -> newEnv.define(new Symbol(Namespace.FIELD_NAME, field.toString()), field.type())); List options = node.getNoOfResults(); Integer noOfResults = (Integer) options.get(0).getValue().getValue(); @@ -427,7 +432,8 @@ public LogicalPlan visitProject(Project node, AnalysisContext context) { context.push(); TypeEnvironment newEnv = context.peek(); namedExpressions.forEach( - expr -> newEnv.define(new Symbol(FIELD_NAME, expr.getNameOrAlias()), expr.type())); + expr -> + newEnv.define(new Symbol(Namespace.FIELD_NAME, expr.getNameOrAlias()), expr.type())); List namedParseExpressions = context.getNamedParseExpressions(); return new LogicalProject(child, namedExpressions, namedParseExpressions); } @@ -449,6 +455,17 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { return new LogicalEval(child, expressionsBuilder.build()); } + /** + * Builds and returns a {@link org.opensearch.sql.planner.logical.logicalExpand} corresponding to + * the given expand node. + */ + @Override + public LogicalPlan visitExpand(Expand node, AnalysisContext context) { + + // TODO #3016: Implement expand command + return null; + } + /** * Builds and returns a {@link org.opensearch.sql.planner.logical.LogicalFlatten} corresponding to * the given flatten node, and adds the new fields to the current type environment. diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index adc804bab1..b4fd9712fe 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -44,6 +44,7 @@ import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.Expand; import org.opensearch.sql.ast.tree.FetchCursor; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; @@ -104,6 +105,10 @@ public T visitRelationSubquery(RelationSubquery node, C context) { return visitChildren(node, context); } + public T visitExpand(Expand node, C context) { + return visitChildren(node, context); + } + public T visitTableFunction(TableFunction node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 526a5e8def..62c0ad6a29 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -49,6 +49,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.Expand; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; @@ -105,6 +106,10 @@ public static Eval eval(UnresolvedPlan input, Let... projectList) { return new Eval(Arrays.asList(projectList)).attach(input); } + public Expand expand(UnresolvedPlan input, Field field) { + return new Expand(field).attach(input); + } + public Flatten flatten(UnresolvedPlan input, Field field) { return new Flatten(field).attach(input); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java new file mode 100644 index 0000000000..70fa3666b7 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java @@ -0,0 +1,40 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Field; + +/** AST node representing an expand operation. */ +@Getter +@ToString +@RequiredArgsConstructor +public class Expand extends UnresolvedPlan { + private UnresolvedPlan child; + + @Getter private final Field field; + + @Override + public Expand attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return ImmutableList.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitExpand(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index a5dc3b9c93..81bbdbcf9b 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -22,6 +22,7 @@ import org.opensearch.sql.planner.physical.AggregationOperator; import org.opensearch.sql.planner.physical.DedupeOperator; import org.opensearch.sql.planner.physical.EvalOperator; +import org.opensearch.sql.planner.physical.ExpandOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.LimitOperator; @@ -161,6 +162,14 @@ public ExplainResponseNode visitEval(EvalOperator node, Object context) { ImmutableMap.of("expressions", convertPairListToMap(node.getExpressionList())))); } + @Override + public ExplainResponseNode visitExpand(ExpandOperator node, Object context) { + return explain( + node, + context, + explainNode -> explainNode.setDescription(ImmutableMap.of("expandField", node.getField()))); + } + @Override public ExplainResponseNode visitFlatten(FlattenOperator node, Object context) { return explain( diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index 07ef0fd867..548f7cbe24 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -10,6 +10,7 @@ import org.opensearch.sql.planner.logical.LogicalCloseCursor; import org.opensearch.sql.planner.logical.LogicalDedupe; import org.opensearch.sql.planner.logical.LogicalEval; +import org.opensearch.sql.planner.logical.LogicalExpand; import org.opensearch.sql.planner.logical.LogicalFetchCursor; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalFlatten; @@ -31,6 +32,7 @@ import org.opensearch.sql.planner.physical.CursorCloseOperator; import org.opensearch.sql.planner.physical.DedupeOperator; import org.opensearch.sql.planner.physical.EvalOperator; +import org.opensearch.sql.planner.physical.ExpandOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.LimitOperator; @@ -101,6 +103,11 @@ public PhysicalPlan visitEval(LogicalEval node, C context) { return new EvalOperator(visitChild(node, context), node.getExpressions()); } + @Override + public PhysicalPlan visitExpand(LogicalExpand node, C context) { + return new ExpandOperator(visitChild(node, context), node.getFieldRefExp()); + } + @Override public PhysicalPlan visitFlatten(LogicalFlatten node, C context) { return new FlattenOperator(visitChild(node, context), node.getFieldRefExp()); diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java new file mode 100644 index 0000000000..4acf9ab968 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java @@ -0,0 +1,30 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; + +import java.util.Collections; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.expression.ReferenceExpression; + +/** Logical plan that represent the flatten command. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = true) +public class LogicalExpand extends LogicalPlan { + private final ReferenceExpression fieldRefExp; + + public LogicalExpand(LogicalPlan child, ReferenceExpression fieldRefExp) { + super(Collections.singletonList(child)); + this.fieldRefExp = fieldRefExp; + } + + @Override + public R accept(LogicalPlanNodeVisitor visitor, C context) { + return visitor.visitExpand(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index 1a5b569ef4..ffaad9295e 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -97,6 +97,10 @@ public static LogicalPlan eval( return new LogicalEval(input, Arrays.asList(expressions)); } + public LogicalPlan expand(LogicalPlan input, ReferenceExpression fieldRefExp) { + return new LogicalExpand(input, fieldRefExp); + } + public LogicalPlan flatten(LogicalPlan input, ReferenceExpression fieldRefExp) { return new LogicalFlatten(input, fieldRefExp); } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index 821f55ab93..b046220c00 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -72,6 +72,10 @@ public R visitEval(LogicalEval plan, C context) { return visitNode(plan, context); } + public R visitExpand(LogicalExpand plan, C context) { + return visitNode(plan, context); + } + public R visitFlatten(LogicalFlatten plan, C context) { return visitNode(plan, context); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java new file mode 100644 index 0000000000..ee524abbfb --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -0,0 +1,54 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.expression.ReferenceExpression; + +/** Flattens the specified field from the input and returns the result. */ +@Getter +@ToString +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +public class ExpandOperator extends PhysicalPlan { + + private final PhysicalPlan input; + private final ReferenceExpression field; + + private static final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); + + @Override + public R accept(PhysicalPlanNodeVisitor visitor, C context) { + return visitor.visitExpand(this, context); + } + + @Override + public List getChild() { + return Collections.singletonList(input); + } + + @Override + public boolean hasNext() { + + // TODO #3016: Implement expand command + return false; + } + + @Override + public ExprValue next() { + + // TODO #3016: Implement expand command + return ExprValueUtils.nullValue(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index 0986469d15..6d04a2a8a4 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -60,6 +60,10 @@ public static EvalOperator eval( return new EvalOperator(input, Arrays.asList(expressions)); } + public ExpandOperator expand(PhysicalPlan input, ReferenceExpression fieldRefExp) { + return new ExpandOperator(input, fieldRefExp); + } + public FlattenOperator flatten(PhysicalPlan input, ReferenceExpression fieldRefExp) { return new FlattenOperator(input, fieldRefExp); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java index 0d619ab8bc..b5c440f98c 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java @@ -56,6 +56,10 @@ public R visitEval(EvalOperator node, C context) { return visitNode(node, context); } + public R visitExpand(ExpandOperator node, C context) { + return visitNode(node, context); + } + public R visitFlatten(FlattenOperator node, C context) { return visitNode(node, context); } diff --git a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java index a47027fb52..c1bb123f8d 100644 --- a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java @@ -11,6 +11,7 @@ import static org.opensearch.sql.ast.tree.RareTopN.CommandType.TOP; import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC; import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -56,6 +57,7 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.expression.window.WindowDefinition; +import org.opensearch.sql.planner.physical.ExpandOperator; import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.TrendlineOperator; @@ -301,6 +303,24 @@ void can_explain_trendline() { explain.apply(plan)); } + @Test + void can_explain_expand() { + String fieldName = "field_name"; + ReferenceExpression fieldReference = ref(fieldName, ARRAY); + + PhysicalPlan plan = new ExpandOperator(tableScan, fieldReference); + ExplainResponse actual = explain.apply(plan); + + ExplainResponse expected = + new ExplainResponse( + new ExplainResponseNode( + "ExpandOperator", + ImmutableMap.of("expandField", fieldReference), + singletonList(tableScan.explainNode()))); + + assertEquals(expected, actual, "explain expand"); + } + @Test void can_explain_flatten() { String fieldName = "field_name"; diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 7fe3a473d9..f6b6829205 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -14,6 +14,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; @@ -63,6 +64,7 @@ import org.opensearch.sql.expression.window.WindowDefinition; import org.opensearch.sql.expression.window.ranking.RowNumberFunction; import org.opensearch.sql.planner.logical.LogicalCloseCursor; +import org.opensearch.sql.planner.logical.LogicalExpand; import org.opensearch.sql.planner.logical.LogicalFlatten; import org.opensearch.sql.planner.logical.LogicalPaginate; import org.opensearch.sql.planner.logical.LogicalPlan; @@ -72,6 +74,7 @@ import org.opensearch.sql.planner.logical.LogicalTrendline; import org.opensearch.sql.planner.logical.LogicalValues; import org.opensearch.sql.planner.physical.CursorCloseOperator; +import org.opensearch.sql.planner.physical.ExpandOperator; import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanDSL; @@ -329,6 +332,24 @@ public void visitTrendline_should_build_TrendlineOperator() { assertSame(physicalChild, implemented.getChild().get(0)); } + @Test + public void visitExpand_should_build_ExpandOperator() { + + // Mock physical and logical plan children. + var logicalChild = mock(LogicalPlan.class); + var physicalChild = mock(PhysicalPlan.class); + when(logicalChild.accept(implementor, null)).thenReturn(physicalChild); + + // Build physical plan from logical plan. + var fieldName = "field_name"; + var logicalPlan = new LogicalExpand(logicalChild, ref(fieldName, ARRAY)); + var implemented = logicalPlan.accept(implementor, null); + + assertInstanceOf(ExpandOperator.class, implemented); + assertEquals(fieldName, ((ExpandOperator) implemented).getField().getAttr()); + assertSame(physicalChild, implemented.getChild().getFirst()); + } + @Test public void visitFlatten_should_build_FlattenOperator() { diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java new file mode 100644 index 0000000000..7fd8d5c9fe --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java @@ -0,0 +1,16 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; + +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.analysis.AnalyzerTestBase; + +@ExtendWith(MockitoExtension.class) +class LogicalExpandTest extends AnalyzerTestBase { + + // TODO #3016: Test expand command +} diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java new file mode 100644 index 0000000000..b45d66cb7d --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java @@ -0,0 +1,22 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import lombok.ToString; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +@ToString +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +@ExtendWith(MockitoExtension.class) +class ExpandOperatorTest extends PhysicalPlanTestBase { + @Mock private PhysicalPlan inputPlan; + + // TODO #3016: Test expand command +} diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java index bd67c9076d..5d0d4dd468 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java @@ -16,6 +16,7 @@ import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.agg; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.dedupe; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.eval; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.expand; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.filter; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.flatten; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.limit; @@ -130,6 +131,8 @@ public static Stream getPhysicalPlanForTest() { PhysicalPlan eval = eval(plan, Pair.of(ref, ref)); + PhysicalPlan expand = expand(plan, ref); + PhysicalPlan flatten = flatten(plan, ref); PhysicalPlan sort = sort(plan, Pair.of(SortOption.DEFAULT_ASC, ref)); @@ -164,6 +167,7 @@ public static Stream getPhysicalPlanForTest() { Arguments.of(window, "window"), Arguments.of(remove, "remove"), Arguments.of(eval, "eval"), + Arguments.of(expand, "expand"), Arguments.of(flatten, "flatten"), Arguments.of(sort, "sort"), Arguments.of(takeOrdered, "takeOrdered"), diff --git a/docs/category.json b/docs/category.json index 8dda7160a8..7bde4fb4a4 100644 --- a/docs/category.json +++ b/docs/category.json @@ -13,6 +13,7 @@ "user/ppl/cmd/showdatasources.rst", "user/ppl/cmd/information_schema.rst", "user/ppl/cmd/eval.rst", + "user/ppl/cmd/expand.rst", "user/ppl/cmd/fields.rst", "user/ppl/cmd/fillnull.rst", "user/ppl/cmd/flatten.rst", diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst new file mode 100644 index 0000000000..78ada722e3 --- /dev/null +++ b/docs/user/ppl/cmd/expand.rst @@ -0,0 +1,5 @@ +============= +flatten +============= + +TODO #3016: Add documentation for expand command \ No newline at end of file diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 7a10e98504..80eca74d84 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -52,6 +52,8 @@ The query start with search command and then flowing a set of command delimited - `eval command `_ + - `expand command `_ + - `fields command `_ - `flatten command `_ diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java new file mode 100644 index 0000000000..6de5b3f696 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java @@ -0,0 +1,11 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +public class ExpandCommandIT extends PPLIntegTestCase { + + // TODO #3016: Test expand command +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index c98d879230..6cf32fd1dc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -129,6 +129,12 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { + "| fields ageTrend")); } + @Test + public void testExpand() throws Exception { + + // TODO #3016: Test expand command. + } + @Test public void testFlatten() throws Exception { String query = StringUtils.format("source=%s | flatten location", TEST_INDEX_FLATTEN); diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json new file mode 100644 index 0000000000..57a005492d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json @@ -0,0 +1 @@ +// TODO #3016: Test expand command \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index 0920d01eec..565504aebe 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -15,6 +15,7 @@ import org.opensearch.sql.planner.physical.CursorCloseOperator; import org.opensearch.sql.planner.physical.DedupeOperator; import org.opensearch.sql.planner.physical.EvalOperator; +import org.opensearch.sql.planner.physical.ExpandOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.LimitOperator; @@ -108,6 +109,11 @@ public PhysicalPlan visitEval(EvalOperator node, Object context) { return new EvalOperator(visitInput(node.getInput(), context), node.getExpressionList()); } + @Override + public PhysicalPlan visitExpand(ExpandOperator node, Object context) { + return doProtect(new ExpandOperator(visitInput(node.getInput(), context), node.getField())); + } + @Override public PhysicalPlan visitFlatten(FlattenOperator node, Object context) { return doProtect(new FlattenOperator(visitInput(node.getInput(), context), node.getField())); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java index 99e958db1b..1452f09091 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java @@ -11,6 +11,7 @@ import static org.mockito.Mockito.*; import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC; import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -71,6 +72,7 @@ import org.opensearch.sql.opensearch.setting.OpenSearchSettings; import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScan; import org.opensearch.sql.planner.physical.CursorCloseOperator; +import org.opensearch.sql.planner.physical.ExpandOperator; import org.opensearch.sql.planner.physical.FlattenOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; @@ -365,6 +367,15 @@ void test_visitOpenSearchEval() { executionProtector.visitEval(evalOperator, null)); } + @Test + void test_visitExpand() { + ExpandOperator expandOperator = + new ExpandOperator(values(emptyList()), ref("field_name", ARRAY)); + + assertEquals( + resourceMonitor(expandOperator), executionProtector.visitExpand(expandOperator, null)); + } + @Test void test_visitFlatten() { FlattenOperator flattenOperator = diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 0265a4ddf2..af555ca76a 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -37,6 +37,7 @@ AD: 'AD'; ML: 'ML'; FILLNULL: 'FILLNULL'; TRENDLINE: 'TRENDLINE'; +EXPAND: 'EXPAND'; FLATTEN: 'FLATTEN'; // COMMAND ASSIST KEYWORDS diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 0b1fc4f130..458d502c07 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -51,6 +51,7 @@ commands | mlCommand | fillnullCommand | trendlineCommand + | expandCommand | flattenCommand ; @@ -159,7 +160,11 @@ trendlineType : SMA ; -// TODO #3030: Extend flatten command to support aliases +expandCommand + : EXPAND fieldExpression + ; + +// TODO #3291: Extend flatten command to support aliases flattenCommand : FLATTEN fieldExpression ; @@ -895,6 +900,7 @@ keywordsCanBeId | DEDUP | SORT | EVAL + | EXPAND | FILLNULL | FLATTEN | HEAD diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index ee4edfc609..49864e637f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -51,6 +51,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.Expand; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; @@ -265,6 +266,12 @@ public UnresolvedPlan visitRareCommand(RareCommandContext ctx) { groupList); } + @Override + public UnresolvedPlan visitExpandCommand(OpenSearchPPLParser.ExpandCommandContext ctx) { + Field fieldExpression = (Field) internalVisitExpression(ctx.fieldExpression()); + return new Expand(fieldExpression); + } + @Override public UnresolvedPlan visitFlattenCommand(OpenSearchPPLParser.FlattenCommandContext ctx) { Field fieldExpression = (Field) internalVisitExpression(ctx.fieldExpression()); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index f3e6e7d878..dbce5b7ce1 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -35,6 +35,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.Expand; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; @@ -193,6 +194,14 @@ public String visitEval(Eval node, String context) { return StringUtils.format("%s | eval %s", child, expressions); } + @Override + public String visitExpand(Expand node, String context) { + String child = node.getChild().getFirst().accept(this, context); + String field = visitExpression(node.getField()); + + return StringUtils.format("%s | expand %s", child, field); + } + @Override public String visitFlatten(Flatten node, String context) { String child = node.getChild().getFirst().accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 7ace9fc831..0e494318c6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -21,6 +21,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.defaultSortFieldArgs; import static org.opensearch.sql.ast.dsl.AstDSL.defaultStatsArgs; import static org.opensearch.sql.ast.dsl.AstDSL.eval; +import static org.opensearch.sql.ast.dsl.AstDSL.expand; import static org.opensearch.sql.ast.dsl.AstDSL.exprList; import static org.opensearch.sql.ast.dsl.AstDSL.field; import static org.opensearch.sql.ast.dsl.AstDSL.filter; @@ -448,6 +449,14 @@ public void testEvalCommand() { eval(relation("t"), let(field("r"), function("abs", field("f"))))); } + @Test + public void testExpandCommand() { + String fieldName = "field_name"; + assertEqual( + StringUtils.format("source=t | expand %s", fieldName), + expand(relation("t"), field(fieldName))); + } + @Test public void testFlattenCommand() { String fieldName = "field_name"; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index ea4388b163..b9eb57d624 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -22,6 +22,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.doubleLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.equalTo; import static org.opensearch.sql.ast.dsl.AstDSL.eval; +import static org.opensearch.sql.ast.dsl.AstDSL.expand; import static org.opensearch.sql.ast.dsl.AstDSL.exprList; import static org.opensearch.sql.ast.dsl.AstDSL.field; import static org.opensearch.sql.ast.dsl.AstDSL.filter; @@ -253,6 +254,14 @@ public void testEvalIfFunctionExpr() { intLiteral(0))))); } + @Test + public void testExpandExpr() { + String fieldName = "field_name"; + assertEqual( + StringUtils.format("source=t | expand %s", fieldName), + expand(relation("t"), field(fieldName))); + } + @Test public void testFlattenExpr() { String fieldName = "field_name"; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index c2a94e1c5f..1a3ff2d4f0 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -96,6 +96,11 @@ public void testTrendlineCommand() { anonymize("source=t | trendline sma(2, date) as date_alias sma(3, time) as time_alias")); } + @Test + public void testExpandCommand() { + assertEquals("source=t | expand field_name", anonymize("source=t | expand field_name")); + } + @Test public void testFlattenCommand() { assertEquals("source=t | flatten field_name", anonymize("source=t | flatten field_name")); From e7e5a5a56d73970e508492f8e54f6b6ead7b287d Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 3 Feb 2025 16:46:36 -0800 Subject: [PATCH 56/81] Implement some of the `expand` logic Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 8 +- .../sql/planner/physical/ExpandOperator.java | 20 ++- .../planner/logical/LogicalExpandTest.java | 29 +++- .../planner/physical/ExpandOperatorTest.java | 128 +++++++++++++++++- 4 files changed, 176 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 7301ce442e..1922ccf501 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -101,6 +101,7 @@ import org.opensearch.sql.planner.logical.LogicalCloseCursor; import org.opensearch.sql.planner.logical.LogicalDedupe; import org.opensearch.sql.planner.logical.LogicalEval; +import org.opensearch.sql.planner.logical.LogicalExpand; import org.opensearch.sql.planner.logical.LogicalFetchCursor; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalFlatten; @@ -461,9 +462,10 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { */ @Override public LogicalPlan visitExpand(Expand node, AnalysisContext context) { - - // TODO #3016: Implement expand command - return null; + LogicalPlan child = node.getChild().getFirst().accept(this, context); + ReferenceExpression fieldExpr = + (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); + return new LogicalExpand(child, fieldExpr); } /** diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index ee524abbfb..3b2964a87b 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -5,6 +5,7 @@ package org.opensearch.sql.planner.physical; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; @@ -13,7 +14,6 @@ import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; /** Flattens the specified field from the input and returns the result. */ @@ -28,6 +28,8 @@ public class ExpandOperator extends PhysicalPlan { private static final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); + private List expandedRows = List.of(); + @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { return visitor.visitExpand(this, context); @@ -40,15 +42,25 @@ public List getChild() { @Override public boolean hasNext() { + while (expandedRows.isEmpty() && input.hasNext()) { + expandedRows = expandExprValueAtPath(input.next(), field.getAttr()); + } - // TODO #3016: Implement expand command - return false; + return expandedRows.isEmpty(); } @Override public ExprValue next() { + return expandedRows.removeFirst(); + } + + /** + * Expands the {@link org.opensearch.sql.data.model.ExprCollectionValue} at the specified path and + * returns the resulting value. If the value is null or missing, the unmodified value is returned. + */ + private static List expandExprValueAtPath(ExprValue exprValue, String path) { // TODO #3016: Implement expand command - return ExprValueUtils.nullValue(); + return new ArrayList<>(Collections.singletonList(exprValue)); } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java index 7fd8d5c9fe..c34816eed7 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java @@ -5,12 +5,39 @@ package org.opensearch.sql.planner.logical; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; + +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.analysis.AnalyzerTestBase; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.DSL; @ExtendWith(MockitoExtension.class) class LogicalExpandTest extends AnalyzerTestBase { - // TODO #3016: Test expand command + private static final String TABLE_NAME = "schema"; + + @Test + void testExpandArray() { + LogicalPlan expected = + LogicalPlanDSL.expand( + LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("array_value", ARRAY)); + UnresolvedPlan unresolved = + AstDSL.expand(AstDSL.relation(TABLE_NAME), AstDSL.field("array_value")); + assertEquals(expected, analyze(unresolved)); + } + + @Test + void testExpandInvalidFieldName() { + UnresolvedPlan unresolved = AstDSL.expand(AstDSL.relation(TABLE_NAME), AstDSL.field("invalid")); + + String msg = assertThrows(SemanticCheckException.class, () -> analyze(unresolved)).getMessage(); + assertEquals("can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env", msg); + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java index b45d66cb7d..c5cd64f032 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java @@ -5,12 +5,27 @@ package org.opensearch.sql.planner.physical; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.expand; + +import java.util.List; +import java.util.Map; import lombok.ToString; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; @ToString @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) @@ -18,5 +33,116 @@ class ExpandOperatorTest extends PhysicalPlanTestBase { @Mock private PhysicalPlan inputPlan; - // TODO #3016: Test expand command + private final ExprValue integerExprValue = ExprValueUtils.integerValue(0); + private final ExprValue doubleExprValue = ExprValueUtils.doubleValue(0.0); + private final ExprValue stringExprValue = ExprValueUtils.stringValue("value"); + + @Test + void testArrayEmpty() { + mockInput( + ExprValueUtils.tupleValue( + Map.of("array_empty", ExprValueUtils.collectionValue(List.of())))); + + List actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); + assertTrue(actualRows.isEmpty()); + } + + @Test + void testArrayNull() { + mockInput(ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.nullValue()))); + + List actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); + assertTrue(actualRows.isEmpty()); + } + + @Test + void testArrayMissing() { + mockInput(ExprValueUtils.tupleValue(Map.of("array_missing", ExprValueUtils.missingValue()))); + + List actualRows = execute(expand(inputPlan, DSL.ref("array_missing", ARRAY))); + assertTrue(actualRows.isEmpty()); + } + + @Test + void testArrayUnknown() { + List actualRows = execute(expand(inputPlan, DSL.ref("array_unknown", ARRAY))); + assertTrue(actualRows.isEmpty()); + } + + @Test + void testArray() { + mockInput( + ExprValueUtils.tupleValue( + Map.of( + "array", + ExprValueUtils.collectionValue(List.of(integerExprValue, doubleExprValue))))); + + List expectedRows = + List.of( + ExprValueUtils.tupleValue(Map.of("array", integerExprValue)), + ExprValueUtils.tupleValue(Map.of("array", doubleExprValue))); + + List actualRows = execute(expand(inputPlan, DSL.ref("array", ARRAY))); + assertEquals(expectedRows, actualRows); + } + + @Test + void testArrayNested() { + mockInput( + ExprValueUtils.tupleValue( + Map.of( + "struct", + ExprValueUtils.tupleValue( + Map.of( + "array_nested", + ExprValueUtils.collectionValue(List.of(stringExprValue))))))); + + List expectedRows = + List.of( + ExprValueUtils.tupleValue( + Map.of( + "struct", ExprValueUtils.tupleValue(Map.of("array_nested", stringExprValue))))); + + List actualRows = execute(expand(inputPlan, DSL.ref("array", ARRAY))); + assertEquals(expectedRows, actualRows); + } + + @Test + void testAncestorNull() { + mockInput(ExprValueUtils.tupleValue(Map.of("struct", ExprValueUtils.nullValue()))); + + List actualRows = execute(expand(inputPlan, DSL.ref("struct.array_nested", ARRAY))); + assertTrue(actualRows.isEmpty()); + } + + @Test + void testAncestorMissing() { + mockInput(ExprValueUtils.tupleValue(Map.of("struct", ExprValueUtils.missingValue()))); + + List actualRows = execute(expand(inputPlan, DSL.ref("struct.array_nested", ARRAY))); + assertTrue(actualRows.isEmpty()); + } + + @Test + void testAncestorUnknown() { + List actualRows = execute(expand(inputPlan, DSL.ref("struct.array_nested", ARRAY))); + assertTrue(actualRows.isEmpty()); + } + + @Test + void testInvalidType() { + mockInput(ExprValueUtils.tupleValue(Map.of("integer", integerExprValue))); + + Exception ex = + assertThrows( + ExpressionEvaluationException.class, + () -> execute(expand(inputPlan, DSL.ref("integer", INTEGER)))); + assertEquals("invalid to get collectionValue from value of type INTEGER", ex.getMessage()); + } + + /** Mocks the input plan to return a single row with the given input value. */ + private void mockInput(ExprValue mockInputValue) { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()).thenReturn(mockInputValue); + } } From a8f6855db2f9dfe08d7623ff7a0cfe6f86dcf251 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 4 Feb 2025 14:39:54 -0800 Subject: [PATCH 57/81] Add `PathUtils` and unit tests. Signed-off-by: currantw --- .../sql/planner/physical/ExpandOperator.java | 18 +-- .../org/opensearch/sql/utils/PathUtils.java | 123 ++++++++++++++++ .../opensearch/sql/utils/PathUtilsTest.java | 134 ++++++++++++++++++ 3 files changed, 266 insertions(+), 9 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/utils/PathUtils.java create mode 100644 core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index 3b2964a87b..e084299b0d 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -5,16 +5,17 @@ package org.opensearch.sql.planner.physical; -import java.util.ArrayList; import java.util.Collections; +import java.util.LinkedList; import java.util.List; -import java.util.regex.Pattern; +import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.utils.PathUtils; /** Flattens the specified field from the input and returns the result. */ @Getter @@ -26,8 +27,6 @@ public class ExpandOperator extends PhysicalPlan { private final PhysicalPlan input; private final ReferenceExpression field; - private static final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); - private List expandedRows = List.of(); @Override @@ -43,7 +42,7 @@ public List getChild() { @Override public boolean hasNext() { while (expandedRows.isEmpty() && input.hasNext()) { - expandedRows = expandExprValueAtPath(input.next(), field.getAttr()); + expandedRows = expandValue(input.next(), field.getAttr()); } return expandedRows.isEmpty(); @@ -58,9 +57,10 @@ public ExprValue next() { * Expands the {@link org.opensearch.sql.data.model.ExprCollectionValue} at the specified path and * returns the resulting value. If the value is null or missing, the unmodified value is returned. */ - private static List expandExprValueAtPath(ExprValue exprValue, String path) { - - // TODO #3016: Implement expand command - return new ArrayList<>(Collections.singletonList(exprValue)); + private static List expandValue(ExprValue rootExprValue, String path) { + ExprValue targetExprValue = PathUtils.getExprValueAtPath(rootExprValue, path); + return targetExprValue.collectionValue().stream() + .map(v -> PathUtils.setExprValueAtPath(rootExprValue, path, v)) + .collect(Collectors.toCollection(LinkedList::new)); } } diff --git a/core/src/main/java/org/opensearch/sql/utils/PathUtils.java b/core/src/main/java/org/opensearch/sql/utils/PathUtils.java new file mode 100644 index 0000000000..d8ba8615d1 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/PathUtils.java @@ -0,0 +1,123 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import lombok.experimental.UtilityClass; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.exception.SemanticCheckException; + +@UtilityClass +public class PathUtils { + + private final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); + + /** Returns true if a value exists at the specified path within the given root value. */ + public boolean containsExprValueAtPath(ExprValue root, String path) { + List pathComponents = splitPath(path); + return containsExprValueForPathComponents(root, pathComponents); + } + + /** + * Returns the {@link ExprValue} at the specified path within the given root value. Returns {@code + * null} if the root value does not contain the path - see {@link + * PathUtils#containsExprValueAtPath}. + */ + public ExprValue getExprValueAtPath(ExprValue root, String path) { + List pathComponents = splitPath(path); + + if (!containsExprValueForPathComponents(root, pathComponents)) { + return null; + } + + return getExprValueForPathComponents(root, pathComponents); + } + + /** + * Sets the {@link ExprValue} at the specified path within the given root value and returns the + * result. Throws {@link SemanticCheckException} if the root value does not contain the path - see + * {@link PathUtils#containsExprValueAtPath}. + */ + public ExprValue setExprValueAtPath(ExprValue root, String path, ExprValue newValue) { + List pathComponents = splitPath(path); + + if (!containsExprValueForPathComponents(root, pathComponents)) { + throw new SemanticCheckException(String.format("Field path '%s' does not exist.", path)); + } + + return setExprValueForPathComponents(root, pathComponents, newValue); + } + + /** Helper method for {@link PathUtils#containsExprValueAtPath}. */ + private boolean containsExprValueForPathComponents(ExprValue root, List pathComponents) { + + if (pathComponents.isEmpty()) { + return true; + } + + if (!root.type().equals(STRUCT)) { + return false; + } + + String currentPathComponent = pathComponents.getFirst(); + List remainingPathComponents = pathComponents.subList(1, pathComponents.size()); + + Map exprValueMap = root.tupleValue(); + if (!exprValueMap.containsKey(currentPathComponent)) { + return false; + } + + return containsExprValueForPathComponents( + exprValueMap.get(currentPathComponent), remainingPathComponents); + } + + /** Helper method for {@link PathUtils#getExprValueAtPath}. */ + private ExprValue getExprValueForPathComponents(ExprValue root, List pathComponents) { + + if (pathComponents.isEmpty()) { + return root; + } + + String currentPathComponent = pathComponents.getFirst(); + List remainingPathComponents = pathComponents.subList(1, pathComponents.size()); + + Map exprValueMap = root.tupleValue(); + return getExprValueForPathComponents( + exprValueMap.get(currentPathComponent), remainingPathComponents); + } + + /** Helper method for {@link PathUtils#setExprValueAtPath}. */ + private ExprValue setExprValueForPathComponents( + ExprValue root, List pathComponents, ExprValue newValue) { + + if (pathComponents.isEmpty()) { + return newValue; + } + + String currentPathComponent = pathComponents.getFirst(); + List remainingPathComponents = pathComponents.subList(1, pathComponents.size()); + + Map exprValueMap = new HashMap<>(root.tupleValue()); + exprValueMap.put( + currentPathComponent, + setExprValueForPathComponents( + exprValueMap.get(currentPathComponent), remainingPathComponents, newValue)); + + return ExprTupleValue.fromExprValueMap(exprValueMap); + } + + /** Splits the given path and returns the corresponding components. */ + private List splitPath(String path) { + return Arrays.asList(PATH_SEPARATOR_PATTERN.split(path)); + } +} diff --git a/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java b/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java new file mode 100644 index 0000000000..4923d21e14 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java @@ -0,0 +1,134 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Map; +import lombok.ToString; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.exception.SemanticCheckException; + +@ToString +class PathUtilsTest { + + // Test values + private final ExprValue value = ExprValueUtils.integerValue(0); + private final ExprValue newValue = ExprValueUtils.stringValue("value"); + private final ExprValue nullValue = ExprValueUtils.nullValue(); + private final ExprValue missingValue = ExprValueUtils.missingValue(); + + private final ExprValue struct1Value = ExprValueUtils.tupleValue(Map.of("field", value)); + private final ExprValue struct2Value = + ExprValueUtils.tupleValue( + Map.of("struct1", ExprValueUtils.tupleValue(Map.of("field", value)))); + + private final ExprValue input = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("field", value), + Map.entry("struct1", struct1Value), + Map.entry("struct2", struct2Value), + Map.entry("struct_null", nullValue), + Map.entry("struct_missing", missingValue))); + + @Test + void testContainsExprValueForPathComponents() { + assertTrue(PathUtils.containsExprValueAtPath(input, "field")); + assertTrue(PathUtils.containsExprValueAtPath(input, "struct1.field")); + assertTrue(PathUtils.containsExprValueAtPath(input, "struct2.struct1.field")); + + assertFalse(PathUtils.containsExprValueAtPath(input, "field_invalid")); + assertFalse(PathUtils.containsExprValueAtPath(input, "struct_null.field")); + assertFalse(PathUtils.containsExprValueAtPath(input, "struct_missing.field")); + assertFalse(PathUtils.containsExprValueAtPath(input, "field.field")); + } + + @Test + void testGetExprValueForPathComponents() { + assertEquals(value, PathUtils.getExprValueAtPath(input, "field")); + assertEquals(value, PathUtils.getExprValueAtPath(input, "struct1.field")); + assertEquals(value, PathUtils.getExprValueAtPath(input, "struct2.struct1.field")); + + assertNull(PathUtils.getExprValueAtPath(input, "field_invalid")); + assertNull(PathUtils.getExprValueAtPath(input, "struct_null.field")); + assertNull(PathUtils.getExprValueAtPath(input, "struct_missing.field")); + assertNull(PathUtils.getExprValueAtPath(input, "field.field")); + } + + @Test + void testSetExprValueForPathComponents() { + ExprValue expected; + ExprValue actual; + + expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("field", newValue), + Map.entry("struct1", struct1Value), + Map.entry("struct2", struct2Value), + Map.entry("struct_null", nullValue), + Map.entry("struct_missing", missingValue))); + actual = PathUtils.setExprValueAtPath(input, "field", newValue); + assertEquals(expected, actual); + + expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("field", value), + Map.entry("struct1", ExprValueUtils.tupleValue(Map.of("field", newValue))), + Map.entry("struct2", struct2Value), + Map.entry("struct_null", nullValue), + Map.entry("struct_missing", missingValue))); + actual = PathUtils.setExprValueAtPath(input, "struct1.field", newValue); + assertEquals(expected, actual); + + expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("field", value), + Map.entry("struct1", struct1Value), + Map.entry( + "struct2", + ExprValueUtils.tupleValue( + Map.of("struct1", ExprValueUtils.tupleValue(Map.of("field", newValue))))), + Map.entry("struct_null", nullValue), + Map.entry("struct_missing", missingValue))); + assertEquals(expected, PathUtils.setExprValueAtPath(input, "struct2.struct1.field", newValue)); + + Exception ex; + + ex = + assertThrows( + SemanticCheckException.class, + () -> PathUtils.setExprValueAtPath(input, "field_invalid", newValue)); + assertEquals("Field path 'field_invalid' does not exist.", ex.getMessage()); + + ex = + assertThrows( + SemanticCheckException.class, + () -> PathUtils.setExprValueAtPath(input, "struct_null.field_invalid", newValue)); + assertEquals("Field path 'struct_null.field_invalid' does not exist.", ex.getMessage()); + + ex = + assertThrows( + SemanticCheckException.class, + () -> PathUtils.setExprValueAtPath(input, "struct_missing.field_invalid", newValue)); + assertEquals("Field path 'struct_missing.field_invalid' does not exist.", ex.getMessage()); + + ex = + assertThrows( + SemanticCheckException.class, + () -> PathUtils.setExprValueAtPath(input, "field.field_invalid", newValue)); + assertEquals("Field path 'field.field_invalid' does not exist.", ex.getMessage()); + } +} From cf357ea1a22b6ad9621013317ea6dc66390971e1 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 4 Feb 2025 16:57:12 -0800 Subject: [PATCH 58/81] Update `ExpandOperator` and unit tests. Signed-off-by: currantw --- .../sql/planner/physical/ExpandOperator.java | 15 +++++++-- .../planner/physical/ExpandOperatorTest.java | 33 +++++++++---------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index e084299b0d..dd23df8f75 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -42,10 +42,10 @@ public List getChild() { @Override public boolean hasNext() { while (expandedRows.isEmpty() && input.hasNext()) { - expandedRows = expandValue(input.next(), field.getAttr()); + expandedRows = expandExprValue(input.next(), field.getAttr()); } - return expandedRows.isEmpty(); + return !expandedRows.isEmpty(); } @Override @@ -57,8 +57,17 @@ public ExprValue next() { * Expands the {@link org.opensearch.sql.data.model.ExprCollectionValue} at the specified path and * returns the resulting value. If the value is null or missing, the unmodified value is returned. */ - private static List expandValue(ExprValue rootExprValue, String path) { + private static List expandExprValue(ExprValue rootExprValue, String path) { + + if (!PathUtils.containsExprValueAtPath(rootExprValue, path)) { + return List.of(); + } + ExprValue targetExprValue = PathUtils.getExprValueAtPath(rootExprValue, path); + if (targetExprValue.isMissing() || targetExprValue.isNull()) { + return List.of(); + } + return targetExprValue.collectionValue().stream() .map(v -> PathUtils.setExprValueAtPath(rootExprValue, path, v)) .collect(Collectors.toCollection(LinkedList::new)); diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java index c5cd64f032..31c1f6a94f 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java @@ -33,33 +33,34 @@ class ExpandOperatorTest extends PhysicalPlanTestBase { @Mock private PhysicalPlan inputPlan; - private final ExprValue integerExprValue = ExprValueUtils.integerValue(0); - private final ExprValue doubleExprValue = ExprValueUtils.doubleValue(0.0); - private final ExprValue stringExprValue = ExprValueUtils.stringValue("value"); + // Test constants + private static final Integer integerValue = 0; + private static final Double doubleValue = 0.0; + private static final String stringValue = "value"; @Test void testArrayEmpty() { mockInput( ExprValueUtils.tupleValue( Map.of("array_empty", ExprValueUtils.collectionValue(List.of())))); - List actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); + assertTrue(actualRows.isEmpty()); } @Test void testArrayNull() { mockInput(ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.nullValue()))); - List actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); + assertTrue(actualRows.isEmpty()); } @Test void testArrayMissing() { mockInput(ExprValueUtils.tupleValue(Map.of("array_missing", ExprValueUtils.missingValue()))); - List actualRows = execute(expand(inputPlan, DSL.ref("array_missing", ARRAY))); + assertTrue(actualRows.isEmpty()); } @@ -73,16 +74,14 @@ void testArrayUnknown() { void testArray() { mockInput( ExprValueUtils.tupleValue( - Map.of( - "array", - ExprValueUtils.collectionValue(List.of(integerExprValue, doubleExprValue))))); + Map.of("array", ExprValueUtils.collectionValue(List.of(integerValue, doubleValue))))); + List actualRows = execute(expand(inputPlan, DSL.ref("array", ARRAY))); List expectedRows = List.of( - ExprValueUtils.tupleValue(Map.of("array", integerExprValue)), - ExprValueUtils.tupleValue(Map.of("array", doubleExprValue))); + ExprValueUtils.tupleValue(Map.of("array", integerValue)), + ExprValueUtils.tupleValue(Map.of("array", doubleValue))); - List actualRows = execute(expand(inputPlan, DSL.ref("array", ARRAY))); assertEquals(expectedRows, actualRows); } @@ -94,16 +93,14 @@ void testArrayNested() { "struct", ExprValueUtils.tupleValue( Map.of( - "array_nested", - ExprValueUtils.collectionValue(List.of(stringExprValue))))))); + "array_nested", ExprValueUtils.collectionValue(List.of(stringValue))))))); + List actualRows = execute(expand(inputPlan, DSL.ref("struct.array_nested", ARRAY))); List expectedRows = List.of( ExprValueUtils.tupleValue( - Map.of( - "struct", ExprValueUtils.tupleValue(Map.of("array_nested", stringExprValue))))); + Map.of("struct", ExprValueUtils.tupleValue(Map.of("array_nested", stringValue))))); - List actualRows = execute(expand(inputPlan, DSL.ref("array", ARRAY))); assertEquals(expectedRows, actualRows); } @@ -131,7 +128,7 @@ void testAncestorUnknown() { @Test void testInvalidType() { - mockInput(ExprValueUtils.tupleValue(Map.of("integer", integerExprValue))); + mockInput(ExprValueUtils.tupleValue(Map.of("integer", integerValue))); Exception ex = assertThrows( From c260f6b3e7d89afcee19a285156bee43c4ece62a Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 4 Feb 2025 22:47:19 -0800 Subject: [PATCH 59/81] Implement integration tests, update `Expand` logic, rename data set. Signed-off-by: currantw --- doctest/test_docs.py | 5 +++-- .../java/org/opensearch/sql/legacy/SQLIntegTestCase.java | 6 ++++++ .../test/java/org/opensearch/sql/legacy/TestUtils.java | 5 +++++ .../java/org/opensearch/sql/legacy/TestsConstants.java | 1 + .../src/test/java/org/opensearch/sql/ppl/ExplainIT.java | 8 ++++++-- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/doctest/test_docs.py b/doctest/test_docs.py index b0a4bc2a09..2364649e71 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -31,6 +31,7 @@ DATASOURCES = ".ql-datasources" WEBLOGS = "weblogs" JSON_TEST = "json_test" +EXPAND = "expand" FLATTEN = "flatten" class DocTestConnection(OpenSearchConnection): @@ -126,6 +127,7 @@ def set_up_test_indices(test): load_file("datasources.json", index_name=DATASOURCES) load_file("weblogs.json", index_name=WEBLOGS) load_file("json_test.json", index_name=JSON_TEST) + load_file("expand.json", index_name=CITIES) load_file("flatten.json", index_name=FLATTEN) def load_file(filename, index_name): @@ -154,8 +156,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, FLATTEN], ignore_unavailable=True) - + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, FLATTEN, CITIES], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, tearDown=tear_down, diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 7bd291e6cc..94c04b2754 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -20,6 +20,7 @@ import static org.opensearch.sql.legacy.TestUtils.getDogs2IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDogs3IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getEmployeeNestedTypeIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getExpandIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getFlattenIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGeoIpIndexMapping; @@ -760,6 +761,11 @@ public enum Index { "json", getJsonTestIndexMapping(), "src/test/resources/json_test.json"), + EXPAND( + TestsConstants.TEST_INDEX_EXPAND, + "expand", + getExpandIndexMapping(), + "src/test/resources/expand.json"), FLATTEN( TestsConstants.TEST_INDEX_FLATTEN, "flatten", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index e3b8d21793..c0e75f873e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -260,6 +260,11 @@ public static String getJsonTestIndexMapping() { return getMappingFile(mappingFile); } + public static String getExpandIndexMapping() { + String mappingFile = "expand_mapping.json"; + return getMappingFile(mappingFile); + } + public static String getAliasIndexMapping() { String mappingFile = "alias_index_mapping.json"; return getMappingFile(mappingFile); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 0739628a41..17bd830811 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -59,6 +59,7 @@ public class TestsConstants { public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; public static final String TEST_INDEX_JSON_TEST = TEST_INDEX + "_json_test"; + public static final String TEST_INDEX_EXPAND = TEST_INDEX + "_expand"; public static final String TEST_INDEX_ALIAS = TEST_INDEX + "_alias"; public static final String TEST_INDEX_FLATTEN = TEST_INDEX + "_flatten"; public static final String TEST_INDEX_GEOIP = TEST_INDEX + "_geoip"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 6cf32fd1dc..4111285e63 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.ppl; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_EXPAND; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_FLATTEN; import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; @@ -21,6 +22,7 @@ public class ExplainIT extends PPLIntegTestCase { @Override public void init() throws IOException { loadIndex(Index.ACCOUNT); + loadIndex(Index.EXPAND); loadIndex(Index.FLATTEN); } @@ -131,8 +133,10 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { @Test public void testExpand() throws Exception { - - // TODO #3016: Test expand command. + String query = StringUtils.format("source=%s | expand team", TEST_INDEX_EXPAND); + String actual = explainQueryToString(query); + String expected = loadFromFile("expectedOutput/ppl/explain_expand.json"); + assertJsonEquals(expected, actual); } @Test From 0d3c33c202d8ff478de91857999485329fa79a5b Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 09:56:33 -0800 Subject: [PATCH 60/81] Implement integration tests, update `Expand` logic, rename data set. Signed-off-by: currantw --- .../sql/planner/physical/ExpandOperator.java | 30 +-- .../planner/physical/ExpandOperatorTest.java | 174 ++++++++++++------ doctest/test_data/expand.json | 4 + doctest/test_mapping/expand.json | 34 ++++ .../opensearch/sql/ppl/ExpandCommandIT.java | 90 ++++++++- integ-test/src/test/resources/expand.json | 12 ++ .../expectedOutput/ppl/explain_expand.json | 33 +++- .../indexDefinitions/expand_mapping.json | 19 ++ 8 files changed, 326 insertions(+), 70 deletions(-) create mode 100644 doctest/test_data/expand.json create mode 100644 doctest/test_mapping/expand.json create mode 100644 integ-test/src/test/resources/expand.json create mode 100644 integ-test/src/test/resources/indexDefinitions/expand_mapping.json diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index dd23df8f75..37f4764199 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -5,6 +5,8 @@ package org.opensearch.sql.planner.physical; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; + import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -14,6 +16,7 @@ import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.utils.PathUtils; @@ -42,7 +45,7 @@ public List getChild() { @Override public boolean hasNext() { while (expandedRows.isEmpty() && input.hasNext()) { - expandedRows = expandExprValue(input.next(), field.getAttr()); + expandedRows = expandExprValueAtPath(input.next(), field.getAttr()); } return !expandedRows.isEmpty(); @@ -53,23 +56,28 @@ public ExprValue next() { return expandedRows.removeFirst(); } - /** - * Expands the {@link org.opensearch.sql.data.model.ExprCollectionValue} at the specified path and - * returns the resulting value. If the value is null or missing, the unmodified value is returned. - */ - private static List expandExprValue(ExprValue rootExprValue, String path) { + /** Expands the {@link ExprValue} at the specified path and returns the resulting value. */ + private static List expandExprValueAtPath(ExprValue rootExprValue, String path) { if (!PathUtils.containsExprValueAtPath(rootExprValue, path)) { - return List.of(); + return new LinkedList<>(Collections.singletonList(rootExprValue)); } ExprValue targetExprValue = PathUtils.getExprValueAtPath(rootExprValue, path); - if (targetExprValue.isMissing() || targetExprValue.isNull()) { - return List.of(); - } + List expandedExprValues = expandExprValue(targetExprValue); - return targetExprValue.collectionValue().stream() + return expandedExprValues.stream() .map(v -> PathUtils.setExprValueAtPath(rootExprValue, path, v)) .collect(Collectors.toCollection(LinkedList::new)); } + + /** Expands the given {@link ExprValue} and returns the result. */ + private static List expandExprValue(ExprValue exprValue) { + if (exprValue.type().equals(ARRAY)) { + List values = exprValue.collectionValue(); + return values.isEmpty() ? List.of(ExprValueUtils.nullValue()) : values; + } + + return List.of(exprValue); + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java index 31c1f6a94f..a2de30fcdc 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java @@ -6,11 +6,9 @@ package org.opensearch.sql.planner.physical; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.when; import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; -import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.expand; import java.util.List; @@ -24,119 +22,181 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; -import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.expression.DSL; @ToString @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) @ExtendWith(MockitoExtension.class) class ExpandOperatorTest extends PhysicalPlanTestBase { - @Mock private PhysicalPlan inputPlan; // Test constants private static final Integer integerValue = 0; private static final Double doubleValue = 0.0; private static final String stringValue = "value"; + private static final ExprValue nullExprValue = ExprValueUtils.nullValue(); + private static final ExprValue missingExprValue = ExprValueUtils.missingValue(); + + // Test variables + @Mock private PhysicalPlan inputPlan; + private ExprValue inputRow; + private List actualRows; + private List expectedRows; + @Test - void testArrayEmpty() { - mockInput( + void testArray() { + inputRow = ExprValueUtils.tupleValue( - Map.of("array_empty", ExprValueUtils.collectionValue(List.of())))); - List actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); + Map.of("array", ExprValueUtils.collectionValue(List.of(integerValue, doubleValue)))); + mockInput(inputRow); - assertTrue(actualRows.isEmpty()); + actualRows = execute(expand(inputPlan, DSL.ref("array", ARRAY))); + expectedRows = + List.of( + ExprValueUtils.tupleValue(Map.of("array", integerValue)), + ExprValueUtils.tupleValue(Map.of("array", doubleValue))); + + assertEquals(expectedRows, actualRows); } @Test - void testArrayNull() { - mockInput(ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.nullValue()))); - List actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); + void testArrayEmpty() { + ExprValue inputRow = + ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.collectionValue(List.of()))); + mockInput(inputRow); - assertTrue(actualRows.isEmpty()); + actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); + expectedRows = List.of(ExprValueUtils.tupleValue(Map.of("array_empty", nullExprValue))); + + assertEquals(expectedRows, actualRows); } @Test - void testArrayMissing() { - mockInput(ExprValueUtils.tupleValue(Map.of("array_missing", ExprValueUtils.missingValue()))); - List actualRows = execute(expand(inputPlan, DSL.ref("array_missing", ARRAY))); + void testArrayNested() { + ExprValue inputRow = + ExprValueUtils.tupleValue( + Map.of( + "struct", + ExprValueUtils.tupleValue( + Map.of("array", ExprValueUtils.collectionValue(List.of(stringValue)))))); + mockInput(inputRow); - assertTrue(actualRows.isEmpty()); + actualRows = execute(expand(inputPlan, DSL.ref("struct.array", ARRAY))); + expectedRows = + List.of( + ExprValueUtils.tupleValue( + Map.of("struct", ExprValueUtils.tupleValue(Map.of("array", stringValue))))); + + assertEquals(expectedRows, actualRows); } @Test - void testArrayUnknown() { - List actualRows = execute(expand(inputPlan, DSL.ref("array_unknown", ARRAY))); - assertTrue(actualRows.isEmpty()); + void testScalar() { + ExprValue inputValue = ExprValueUtils.tupleValue(Map.of("scalar", stringValue)); + mockInput(inputValue); + + actualRows = execute(expand(inputPlan, DSL.ref("scalar", ARRAY))); + expectedRows = List.of(inputValue); + + assertEquals(expectedRows, actualRows); } @Test - void testArray() { - mockInput( - ExprValueUtils.tupleValue( - Map.of("array", ExprValueUtils.collectionValue(List.of(integerValue, doubleValue))))); - List actualRows = execute(expand(inputPlan, DSL.ref("array", ARRAY))); + void testScalarNull() { + ExprValue inputRow = ExprValueUtils.tupleValue(Map.of("scalar_null", nullExprValue)); + mockInput(inputRow); - List expectedRows = - List.of( - ExprValueUtils.tupleValue(Map.of("array", integerValue)), - ExprValueUtils.tupleValue(Map.of("array", doubleValue))); + actualRows = execute(expand(inputPlan, DSL.ref("scalar_null", ARRAY))); + expectedRows = List.of(inputRow); assertEquals(expectedRows, actualRows); } @Test - void testArrayNested() { - mockInput( + void testScalarMissing() { + + /** With {@link org.opensearch.sql.data.model.ExprMissingValue} */ + inputRow = ExprValueUtils.tupleValue(Map.of()); + mockInput(inputRow); + + actualRows = execute(expand(inputPlan, DSL.ref("scalar_missing", ARRAY))); + expectedRows = List.of(inputRow); + + assertEquals(expectedRows, actualRows); + + /** Without {@link org.opensearch.sql.data.model.ExprMissingValue} */ + inputRow = ExprValueUtils.tupleValue(Map.of("scalar_missing", missingExprValue)); + mockInput(inputRow); + + actualRows = execute(expand(inputPlan, DSL.ref("scalar_missing", ARRAY))); + expectedRows = List.of(inputRow); + + assertEquals(expectedRows, actualRows); + } + + @Test + void testScalarNested() { + ExprValue rowInput = ExprValueUtils.tupleValue( - Map.of( - "struct", - ExprValueUtils.tupleValue( - Map.of( - "array_nested", ExprValueUtils.collectionValue(List.of(stringValue))))))); - List actualRows = execute(expand(inputPlan, DSL.ref("struct.array_nested", ARRAY))); + Map.of("struct", ExprValueUtils.tupleValue(Map.of("scalar", stringValue)))); + mockInput(rowInput); - List expectedRows = + actualRows = execute(expand(inputPlan, DSL.ref("struct.scalar", ARRAY))); + expectedRows = List.of( ExprValueUtils.tupleValue( - Map.of("struct", ExprValueUtils.tupleValue(Map.of("array_nested", stringValue))))); + Map.of("struct", ExprValueUtils.tupleValue(Map.of("scalar", stringValue))))); + + assertEquals(expectedRows, actualRows); + } + + @Test + void testPathUnknown() { + actualRows = execute(expand(inputPlan, DSL.ref("unknown", ARRAY))); + expectedRows = List.of(); assertEquals(expectedRows, actualRows); } @Test void testAncestorNull() { - mockInput(ExprValueUtils.tupleValue(Map.of("struct", ExprValueUtils.nullValue()))); + ExprValue rowInput = ExprValueUtils.tupleValue(Map.of("struct_null", nullExprValue)); + mockInput(rowInput); - List actualRows = execute(expand(inputPlan, DSL.ref("struct.array_nested", ARRAY))); - assertTrue(actualRows.isEmpty()); + actualRows = execute(expand(inputPlan, DSL.ref("struct_null.unreachable", ARRAY))); + expectedRows = List.of(rowInput); + + assertEquals(expectedRows, actualRows); } @Test void testAncestorMissing() { - mockInput(ExprValueUtils.tupleValue(Map.of("struct", ExprValueUtils.missingValue()))); - List actualRows = execute(expand(inputPlan, DSL.ref("struct.array_nested", ARRAY))); - assertTrue(actualRows.isEmpty()); + /** With {@link org.opensearch.sql.data.model.ExprMissingValue} */ + inputRow = ExprValueUtils.tupleValue(Map.of()); + mockInput(inputRow); + + actualRows = execute(expand(inputPlan, DSL.ref("struct_missing.unreachable", ARRAY))); + expectedRows = List.of(inputRow); + + assertEquals(expectedRows, actualRows); + + /** Without {@link org.opensearch.sql.data.model.ExprMissingValue} */ + inputRow = ExprValueUtils.tupleValue(Map.of("struct_missing", missingExprValue)); + mockInput(inputRow); + + actualRows = execute(expand(inputPlan, DSL.ref("struct_missing.unreachable", ARRAY))); + expectedRows = List.of(inputRow); + + assertEquals(expectedRows, actualRows); } @Test void testAncestorUnknown() { - List actualRows = execute(expand(inputPlan, DSL.ref("struct.array_nested", ARRAY))); + actualRows = execute(expand(inputPlan, DSL.ref("unknown.unreachable", ARRAY))); assertTrue(actualRows.isEmpty()); } - @Test - void testInvalidType() { - mockInput(ExprValueUtils.tupleValue(Map.of("integer", integerValue))); - - Exception ex = - assertThrows( - ExpressionEvaluationException.class, - () -> execute(expand(inputPlan, DSL.ref("integer", INTEGER)))); - assertEquals("invalid to get collectionValue from value of type INTEGER", ex.getMessage()); - } - /** Mocks the input plan to return a single row with the given input value. */ private void mockInput(ExprValue mockInputValue) { when(inputPlan.hasNext()).thenReturn(true, false); diff --git a/doctest/test_data/expand.json b/doctest/test_data/expand.json new file mode 100644 index 0000000000..eb4cf6c2a2 --- /dev/null +++ b/doctest/test_data/expand.json @@ -0,0 +1,4 @@ +{"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}} +{"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}} +{"name": "Null Location", "location": null} +{"name": "Null Coordinates", "location": { "state": "Victoria", "country": "Australia", "coordinates": null}} \ No newline at end of file diff --git a/doctest/test_mapping/expand.json b/doctest/test_mapping/expand.json new file mode 100644 index 0000000000..e85047c8a8 --- /dev/null +++ b/doctest/test_mapping/expand.json @@ -0,0 +1,34 @@ +{ + "mappings": { + "properties": { + "name": { + "type": "keyword" + }, + "location": { + "type": "object", + "properties": { + "state": { + "type": "keyword" + }, + "province": { + "type": "keyword" + }, + "country": { + "type": "keyword" + }, + "coordinates": { + "type": "object", + "properties": { + "latitude": { + "type": "double" + }, + "longitude": { + "type": "double" + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java index 6de5b3f696..4022df6ffc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java @@ -5,7 +5,95 @@ package org.opensearch.sql.ppl; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_EXPAND; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.utils.StringUtils; + public class ExpandCommandIT extends PPLIntegTestCase { - // TODO #3016: Test expand command + @Override + public void init() throws IOException { + loadIndex(Index.EXPAND); + } + + @Test + public void testBasic() throws IOException { + String query = + StringUtils.format("source=%s | expand team | fields city, team.name", TEST_INDEX_EXPAND); + JSONObject result = executeQuery(query); + + verifySchema(result, schema("city", "string"), schema("team.name", "string")); + verifyDataRows( + result, + rows("Seattle", "Seattle Seahawks"), + rows("Seattle", "Seattle Kraken"), + rows("Vancouver", "Vancouver Canucks"), + rows("Vancouver", "BC Lions"), + rows("San Antonio", "San Antonio Spurs"), + rows("Empty Sports Team", null), + rows("Null Sports Team", null), + rows("Missing Sports Team", null)); + } + + @Test + public void testNested() throws IOException { + String query = + StringUtils.format( + "source=%s | where city = 'San Antonio' | expand team.title | fields team.name," + + " team.title", + TEST_INDEX_EXPAND); + JSONObject result = executeQuery(query); + + verifySchema(result, schema("team.name", "string"), schema("team.title", "integer")); + verifyDataRows( + result, + rows("San Antonio Spurs", 1999), + rows("San Antonio Spurs", 2003), + rows("San Antonio Spurs", 2005), + rows("San Antonio Spurs", 2007), + rows("San Antonio Spurs", 2014)); + } + + @Test + public void testMultiple() throws IOException { + String query = + StringUtils.format( + "source=%s | expand team | expand team.title | fields team.name, team.title", + TEST_INDEX_EXPAND); + JSONObject result = executeQuery(query); + + verifySchema(result, schema("team.name", "string"), schema("team.title", "integer")); + verifyDataRows( + result, + rows("Seattle Seahawks", 2014), + rows("Seattle Kraken", null), + rows("Vancouver Canucks", null), + rows("BC Lions", 1964), + rows("BC Lions", 1985), + rows("BC Lions", 1994), + rows("BC Lions", 2000), + rows("BC Lions", 2006), + rows("BC Lions", 2011), + rows("San Antonio Spurs", 1999), + rows("San Antonio Spurs", 2003), + rows("San Antonio Spurs", 2005), + rows("San Antonio Spurs", 2007), + rows("San Antonio Spurs", 2014), + rows(null, null), + rows(null, null), + rows(null, null)); + } + + @Test + public void testExpandFlatten() throws IOException { + + // TODO #3016: Test once flatten merged. + } } diff --git a/integ-test/src/test/resources/expand.json b/integ-test/src/test/resources/expand.json new file mode 100644 index 0000000000..1996e99e0d --- /dev/null +++ b/integ-test/src/test/resources/expand.json @@ -0,0 +1,12 @@ +{"index":{"_id":"1"}} +{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": []}]} +{"index":{"_id":"2"}} +{"city": "Vancouver", "team":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]} +{"index":{"_id":"3"}} +{"city": "San Antonio", "team": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}} +{"index":{"_id":"4"}} +{"city": "Empty Sports Team", "team": []} +{"index":{"_id":"5"}} +{"city": "Null Sports Team", "team": null} +{"index":{"_id":"6"}} +{"city": "Missing Sports Team"} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json index 57a005492d..7935375af9 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json @@ -1 +1,32 @@ -// TODO #3016: Test expand command \ No newline at end of file +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[city, team]" + }, + "children": [ + { + "name": "ExpandOperator", + "description": { + "expandField": { + "attr": "team", + "rawPath": "team", + "paths": [ + "team" + ], + "type": "STRUCT" + } + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_expand, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + }, + "children": [] + } + ] + } + ] + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/expand_mapping.json b/integ-test/src/test/resources/indexDefinitions/expand_mapping.json new file mode 100644 index 0000000000..76d6e8a8bf --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/expand_mapping.json @@ -0,0 +1,19 @@ +{ + "mappings": { + "properties": { + "city": { + "type": "keyword" + }, + "team": { + "properties": { + "name": { + "type":"keyword" + }, + "title": { + "type": "integer" + } + } + } + } + } +} \ No newline at end of file From 209326dc34ce25c5921edfe46a0f8c470d96fec3 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 12:40:52 -0800 Subject: [PATCH 61/81] Add `expand.rst` documentation and further updates to tests/implementation. Signed-off-by: currantw --- .../sql/planner/physical/ExpandOperator.java | 17 +--- .../planner/physical/ExpandOperatorTest.java | 24 ++--- docs/user/dql/metadata.rst | 3 +- docs/user/ppl/cmd/expand.rst | 90 ++++++++++++++++++- doctest/test_data/expand.json | 9 +- doctest/test_docs.py | 2 +- doctest/test_mapping/expand.json | 27 ++---- .../opensearch/sql/ppl/ExpandCommandIT.java | 6 +- integ-test/src/test/resources/expand.json | 8 +- 9 files changed, 123 insertions(+), 63 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index 37f4764199..ca620dffff 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -16,7 +16,6 @@ import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.utils.PathUtils; @@ -64,20 +63,12 @@ private static List expandExprValueAtPath(ExprValue rootExprValue, St } ExprValue targetExprValue = PathUtils.getExprValueAtPath(rootExprValue, path); - List expandedExprValues = expandExprValue(targetExprValue); + if (!targetExprValue.type().equals(ARRAY)) { + return new LinkedList<>(Collections.singletonList(rootExprValue)); + } - return expandedExprValues.stream() + return targetExprValue.collectionValue().stream() .map(v -> PathUtils.setExprValueAtPath(rootExprValue, path, v)) .collect(Collectors.toCollection(LinkedList::new)); } - - /** Expands the given {@link ExprValue} and returns the result. */ - private static List expandExprValue(ExprValue exprValue) { - if (exprValue.type().equals(ARRAY)) { - List values = exprValue.collectionValue(); - return values.isEmpty() ? List.of(ExprValueUtils.nullValue()) : values; - } - - return List.of(exprValue); - } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java index a2de30fcdc..0c8c2094ea 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java @@ -61,19 +61,19 @@ void testArray() { @Test void testArrayEmpty() { - ExprValue inputRow = + inputRow = ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.collectionValue(List.of()))); mockInput(inputRow); actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); - expectedRows = List.of(ExprValueUtils.tupleValue(Map.of("array_empty", nullExprValue))); + expectedRows = List.of(); assertEquals(expectedRows, actualRows); } @Test void testArrayNested() { - ExprValue inputRow = + inputRow = ExprValueUtils.tupleValue( Map.of( "struct", @@ -92,18 +92,18 @@ void testArrayNested() { @Test void testScalar() { - ExprValue inputValue = ExprValueUtils.tupleValue(Map.of("scalar", stringValue)); - mockInput(inputValue); + inputRow = ExprValueUtils.tupleValue(Map.of("scalar", stringValue)); + mockInput(inputRow); actualRows = execute(expand(inputPlan, DSL.ref("scalar", ARRAY))); - expectedRows = List.of(inputValue); + expectedRows = List.of(inputRow); assertEquals(expectedRows, actualRows); } @Test void testScalarNull() { - ExprValue inputRow = ExprValueUtils.tupleValue(Map.of("scalar_null", nullExprValue)); + inputRow = ExprValueUtils.tupleValue(Map.of("scalar_null", nullExprValue)); mockInput(inputRow); actualRows = execute(expand(inputPlan, DSL.ref("scalar_null", ARRAY))); @@ -136,10 +136,10 @@ void testScalarMissing() { @Test void testScalarNested() { - ExprValue rowInput = + inputRow = ExprValueUtils.tupleValue( Map.of("struct", ExprValueUtils.tupleValue(Map.of("scalar", stringValue)))); - mockInput(rowInput); + mockInput(inputRow); actualRows = execute(expand(inputPlan, DSL.ref("struct.scalar", ARRAY))); expectedRows = @@ -160,11 +160,11 @@ void testPathUnknown() { @Test void testAncestorNull() { - ExprValue rowInput = ExprValueUtils.tupleValue(Map.of("struct_null", nullExprValue)); - mockInput(rowInput); + inputRow = ExprValueUtils.tupleValue(Map.of("struct_null", nullExprValue)); + mockInput(inputRow); actualRows = execute(expand(inputPlan, DSL.ref("struct_null.unreachable", ARRAY))); - expectedRows = List.of(rowInput); + expectedRows = List.of(inputRow); assertEquals(expectedRows, actualRows); } diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index d617ce946b..c0dbf77b68 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 12/12 + fetched rows / total rows = 13/13 +----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -44,6 +44,7 @@ SQL query:: | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | expand | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | flatten | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index 78ada722e3..3066b3a246 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -1,5 +1,91 @@ ============= -flatten +expand ============= -TODO #3016: Add documentation for expand command \ No newline at end of file +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + +Description +============ + +The ``expand`` command expands a field that contains an array of values to produce a seperate row for each value in the +array. If the field does not contain an array, the row is not modified. + +Syntax +============ + +``expand `` + +* ``field``: reference to the field to flatten. + +Example 1: Expand a field +========================= + +PPL query:: + + os> source=expand | expand team | fields city, team.name + fetched rows / total rows = 7/7 + +--------------+-------------------+ + | city | team.name | + |--------------+-------------------| + | Seattle | Seattle Seahawks | + | Seattle | Seattle Kraken | + | Vancouver | Vancouver Canucks | + | Vancouver | BC Lions | + | San Antonio | San Antonio Spurs | + | Null Team | null | + | Missing Team | null | + +--------------+-------------------+ + +Example 2: Expand a nested field +================================= + +PPL query:: + + os> source=expand | where city = 'San Antonio' | expand team.title | fields team.name, team.title + fetched rows / total rows = 5/5 + +-------------------+------------+ + | team.name | team.title | + |-------------------+------------| + | San Antonio Spurs | 1999 | + | San Antonio Spurs | 2003 | + | San Antonio Spurs | 2005 | + | San Antonio Spurs | 2007 | + | San Antonio Spurs | 2014 | + +-------------------+------------+ + +Example 3: Expand multiple fields +================================== + +PPL query:: + + os> source=expand | expand team | expand team.title | fields team.name, team.title + fetched rows / total rows = 16/16 + +-------------------+------------+ + | team.name | team.title | + |-------------------+------------| + | Seattle Seahawks | 2014 | + | Seattle Kraken | null | + | Vancouver Canucks | null | + | BC Lions | 1964 | + | BC Lions | 1985 | + | BC Lions | 1994 | + | BC Lions | 2000 | + | BC Lions | 2006 | + | BC Lions | 2011 | + | San Antonio Spurs | 1999 | + | San Antonio Spurs | 2003 | + | San Antonio Spurs | 2005 | + | San Antonio Spurs | 2007 | + | San Antonio Spurs | 2014 | + | null | null | + | null | null | + +-------------------+------------+ + +Example 4: Expand and flatten +============================= + +TODO #3016: Test once flatten merged. \ No newline at end of file diff --git a/doctest/test_data/expand.json b/doctest/test_data/expand.json index eb4cf6c2a2..2861aa0a2a 100644 --- a/doctest/test_data/expand.json +++ b/doctest/test_data/expand.json @@ -1,4 +1,5 @@ -{"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}} -{"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}} -{"name": "Null Location", "location": null} -{"name": "Null Coordinates", "location": { "state": "Victoria", "country": "Australia", "coordinates": null}} \ No newline at end of file +{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": null}]} +{"city": "Vancouver", "team":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]} +{"city": "San Antonio", "team": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}} +{"city": "Null Team", "team": null} +{"city": "Missing Team"} \ No newline at end of file diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 2364649e71..f9b4c2a570 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -156,7 +156,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, FLATTEN, CITIES], ignore_unavailable=True) + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, EXPAND, FLATTEN], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, tearDown=tear_down, diff --git a/doctest/test_mapping/expand.json b/doctest/test_mapping/expand.json index e85047c8a8..76d6e8a8bf 100644 --- a/doctest/test_mapping/expand.json +++ b/doctest/test_mapping/expand.json @@ -1,31 +1,16 @@ { "mappings": { "properties": { - "name": { + "city": { "type": "keyword" }, - "location": { - "type": "object", + "team": { "properties": { - "state": { - "type": "keyword" + "name": { + "type":"keyword" }, - "province": { - "type": "keyword" - }, - "country": { - "type": "keyword" - }, - "coordinates": { - "type": "object", - "properties": { - "latitude": { - "type": "double" - }, - "longitude": { - "type": "double" - } - } + "title": { + "type": "integer" } } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java index 4022df6ffc..b2c51387de 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java @@ -37,9 +37,8 @@ public void testBasic() throws IOException { rows("Vancouver", "Vancouver Canucks"), rows("Vancouver", "BC Lions"), rows("San Antonio", "San Antonio Spurs"), - rows("Empty Sports Team", null), - rows("Null Sports Team", null), - rows("Missing Sports Team", null)); + rows("Null Team", null), + rows("Missing Team", null)); } @Test @@ -87,7 +86,6 @@ public void testMultiple() throws IOException { rows("San Antonio Spurs", 2007), rows("San Antonio Spurs", 2014), rows(null, null), - rows(null, null), rows(null, null)); } diff --git a/integ-test/src/test/resources/expand.json b/integ-test/src/test/resources/expand.json index 1996e99e0d..cc343c1689 100644 --- a/integ-test/src/test/resources/expand.json +++ b/integ-test/src/test/resources/expand.json @@ -1,12 +1,10 @@ {"index":{"_id":"1"}} -{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": []}]} +{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": null}]} {"index":{"_id":"2"}} {"city": "Vancouver", "team":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]} {"index":{"_id":"3"}} {"city": "San Antonio", "team": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}} {"index":{"_id":"4"}} -{"city": "Empty Sports Team", "team": []} +{"city": "Null Team", "team": null} {"index":{"_id":"5"}} -{"city": "Null Sports Team", "team": null} -{"index":{"_id":"6"}} -{"city": "Missing Sports Team"} +{"city": "Missing Team"} From dd9a024b6919be5ed9e99fe2ea43054b0a001933 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 12:41:16 -0800 Subject: [PATCH 62/81] Unrelated typo fix Signed-off-by: currantw --- docs/user/ppl/functions/condition.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/condition.rst b/docs/user/ppl/functions/condition.rst index 9ce130072e..deadca53f8 100644 --- a/docs/user/ppl/functions/condition.rst +++ b/docs/user/ppl/functions/condition.rst @@ -103,7 +103,7 @@ Description Usage: nullif(field1, field2) return null if two parameters are same, otherwise return field1. -Argument type: all the supported data type, (NOTE : if two parameters has different type, if two parameters has different type, you will fail semantic check) +Argument type: all supported data types (NOTE: if the two parameters has different types, you will fail semantic check) Return type: any From e31df37779f5ff2d5cde9dd7c3407cb7ff530101 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 5 Feb 2025 17:53:57 -0800 Subject: [PATCH 63/81] Cleanup, modify to return `null` for an empty array. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 5 +---- .../org/opensearch/sql/ast/tree/Expand.java | 4 ++-- .../sql/planner/physical/ExpandOperator.java | 8 ++++++- .../org/opensearch/sql/utils/PathUtils.java | 5 +++-- .../planner/logical/LogicalExpandTest.java | 18 +++++++++++---- .../planner/physical/ExpandOperatorTest.java | 3 ++- .../opensearch/sql/utils/PathUtilsTest.java | 22 +++++++++---------- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 4 +++- 8 files changed, 43 insertions(+), 26 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 1922ccf501..f5d3cc87fa 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -456,10 +456,7 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { return new LogicalEval(child, expressionsBuilder.build()); } - /** - * Builds and returns a {@link org.opensearch.sql.planner.logical.logicalExpand} corresponding to - * the given expand node. - */ + /** Builds and returns a {@link LogicalExpand} corresponding to the given expand node. */ @Override public LogicalPlan visitExpand(Expand node, AnalysisContext context) { LogicalPlan child = node.getChild().getFirst().accept(this, context); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java index 70fa3666b7..77aaac885d 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java @@ -13,13 +13,13 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Field; -/** AST node representing an expand operation. */ +/** AST node representing an {@code expand } operation. */ @Getter @ToString @RequiredArgsConstructor public class Expand extends UnresolvedPlan { - private UnresolvedPlan child; + private UnresolvedPlan child; @Getter private final Field field; @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index ca620dffff..c7a3f59373 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -16,6 +16,7 @@ import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.utils.PathUtils; @@ -67,7 +68,12 @@ private static List expandExprValueAtPath(ExprValue rootExprValue, St return new LinkedList<>(Collections.singletonList(rootExprValue)); } - return targetExprValue.collectionValue().stream() + List expandedExprValues = targetExprValue.collectionValue(); + if (expandedExprValues.isEmpty()) { + expandedExprValues = List.of(ExprValueUtils.nullValue()); + } + + return expandedExprValues.stream() .map(v -> PathUtils.setExprValueAtPath(rootExprValue, path, v)) .collect(Collectors.toCollection(LinkedList::new)); } diff --git a/core/src/main/java/org/opensearch/sql/utils/PathUtils.java b/core/src/main/java/org/opensearch/sql/utils/PathUtils.java index d8ba8615d1..95ed46cd37 100644 --- a/core/src/main/java/org/opensearch/sql/utils/PathUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/PathUtils.java @@ -17,6 +17,7 @@ import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.exception.SemanticCheckException; +/** Utility methods for handling {@link ExprValue} paths. */ @UtilityClass public class PathUtils { @@ -34,8 +35,8 @@ public boolean containsExprValueAtPath(ExprValue root, String path) { * PathUtils#containsExprValueAtPath}. */ public ExprValue getExprValueAtPath(ExprValue root, String path) { - List pathComponents = splitPath(path); + List pathComponents = splitPath(path); if (!containsExprValueForPathComponents(root, pathComponents)) { return null; } @@ -49,8 +50,8 @@ public ExprValue getExprValueAtPath(ExprValue root, String path) { * {@link PathUtils#containsExprValueAtPath}. */ public ExprValue setExprValueAtPath(ExprValue root, String path, ExprValue newValue) { - List pathComponents = splitPath(path); + List pathComponents = splitPath(path); if (!containsExprValueForPathComponents(root, pathComponents)) { throw new SemanticCheckException(String.format("Field path '%s' does not exist.", path)); } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java index c34816eed7..427844cc0b 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java @@ -8,6 +8,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -23,20 +24,29 @@ class LogicalExpandTest extends AnalyzerTestBase { private static final String TABLE_NAME = "schema"; + @Test + void testExpandScalar() { + LogicalPlan expected = + LogicalPlanDSL.expand( + LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("integer_value", INTEGER)); + LogicalPlan actual = + analyze(AstDSL.expand(AstDSL.relation(TABLE_NAME), AstDSL.field("integer_value"))); + assertEquals(expected, actual); + } + @Test void testExpandArray() { LogicalPlan expected = LogicalPlanDSL.expand( LogicalPlanDSL.relation(TABLE_NAME, table), DSL.ref("array_value", ARRAY)); - UnresolvedPlan unresolved = - AstDSL.expand(AstDSL.relation(TABLE_NAME), AstDSL.field("array_value")); - assertEquals(expected, analyze(unresolved)); + LogicalPlan actual = + analyze(AstDSL.expand(AstDSL.relation(TABLE_NAME), AstDSL.field("array_value"))); + assertEquals(expected, actual); } @Test void testExpandInvalidFieldName() { UnresolvedPlan unresolved = AstDSL.expand(AstDSL.relation(TABLE_NAME), AstDSL.field("invalid")); - String msg = assertThrows(SemanticCheckException.class, () -> analyze(unresolved)).getMessage(); assertEquals("can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env", msg); } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java index 0c8c2094ea..c8e54aea97 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java @@ -66,7 +66,8 @@ void testArrayEmpty() { mockInput(inputRow); actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); - expectedRows = List.of(); + expectedRows = + List.of(ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.nullValue()))); assertEquals(expectedRows, actualRows); } diff --git a/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java b/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java index 4923d21e14..f83601ff2d 100644 --- a/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java @@ -42,31 +42,31 @@ class PathUtilsTest { Map.entry("struct_missing", missingValue))); @Test - void testContainsExprValueForPathComponents() { + void testContainsExprValueForPath() { assertTrue(PathUtils.containsExprValueAtPath(input, "field")); assertTrue(PathUtils.containsExprValueAtPath(input, "struct1.field")); assertTrue(PathUtils.containsExprValueAtPath(input, "struct2.struct1.field")); assertFalse(PathUtils.containsExprValueAtPath(input, "field_invalid")); - assertFalse(PathUtils.containsExprValueAtPath(input, "struct_null.field")); - assertFalse(PathUtils.containsExprValueAtPath(input, "struct_missing.field")); - assertFalse(PathUtils.containsExprValueAtPath(input, "field.field")); + assertFalse(PathUtils.containsExprValueAtPath(input, "struct_null.field_invalid")); + assertFalse(PathUtils.containsExprValueAtPath(input, "struct_missing.field_invalid")); + assertFalse(PathUtils.containsExprValueAtPath(input, "struct_invalid.field_invalid")); } @Test - void testGetExprValueForPathComponents() { + void testGetExprValueForPath() { assertEquals(value, PathUtils.getExprValueAtPath(input, "field")); assertEquals(value, PathUtils.getExprValueAtPath(input, "struct1.field")); assertEquals(value, PathUtils.getExprValueAtPath(input, "struct2.struct1.field")); assertNull(PathUtils.getExprValueAtPath(input, "field_invalid")); - assertNull(PathUtils.getExprValueAtPath(input, "struct_null.field")); - assertNull(PathUtils.getExprValueAtPath(input, "struct_missing.field")); - assertNull(PathUtils.getExprValueAtPath(input, "field.field")); + assertNull(PathUtils.getExprValueAtPath(input, "struct_null.field_invalid")); + assertNull(PathUtils.getExprValueAtPath(input, "struct_missing.field_invalid")); + assertNull(PathUtils.getExprValueAtPath(input, "struct_invalid.field_invalid")); } @Test - void testSetExprValueForPathComponents() { + void testSetExprValueForPath() { ExprValue expected; ExprValue actual; @@ -128,7 +128,7 @@ void testSetExprValueForPathComponents() { ex = assertThrows( SemanticCheckException.class, - () -> PathUtils.setExprValueAtPath(input, "field.field_invalid", newValue)); - assertEquals("Field path 'field.field_invalid' does not exist.", ex.getMessage()); + () -> PathUtils.setExprValueAtPath(input, "struct_invalid.field_invalid", newValue)); + assertEquals("Field path 'struct_invalid.field_invalid' does not exist.", ex.getMessage()); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 1a3ff2d4f0..d6248fe52e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -98,7 +98,9 @@ public void testTrendlineCommand() { @Test public void testExpandCommand() { - assertEquals("source=t | expand field_name", anonymize("source=t | expand field_name")); + String expected = "source=t | expand field_name"; + String actual = anonymize("source=t | expand field_name"); + assertEquals(expected, actual); } @Test From 274719ddce7462c6c65e0373f2df0ea7f894f15e Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 10 Feb 2025 15:13:59 -0800 Subject: [PATCH 64/81] Fix `test_docs.py` typo, order alphabetically. Signed-off-by: currantw --- doctest/test_docs.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doctest/test_docs.py b/doctest/test_docs.py index f9b4c2a570..e3e4b8ba7c 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -116,19 +116,19 @@ def bash_transform(s): def set_up_test_indices(test): set_up(test) - load_file("accounts.json", index_name=ACCOUNTS) - load_file("people.json", index_name=PEOPLE) load_file("account2.json", index_name=ACCOUNT2) - load_file("nyc_taxi.json", index_name=NYC_TAXI) - load_file("books.json", index_name=BOOKS) + load_file("accounts.json", index_name=ACCOUNTS) load_file("apache.json", index_name=APACHE) - load_file("wildcard.json", index_name=WILDCARD) - load_file("nested_objects.json", index_name=NESTED) + load_file("books.json", index_name=BOOKS) load_file("datasources.json", index_name=DATASOURCES) - load_file("weblogs.json", index_name=WEBLOGS) - load_file("json_test.json", index_name=JSON_TEST) - load_file("expand.json", index_name=CITIES) + load_file("expand.json", index_name=EXPAND) load_file("flatten.json", index_name=FLATTEN) + load_file("json_test.json", index_name=JSON_TEST) + load_file("nested_objects.json", index_name=NESTED) + load_file("nyc_taxi.json", index_name=NYC_TAXI) + load_file("people.json", index_name=PEOPLE) + load_file("weblogs.json", index_name=WEBLOGS) + load_file("wildcard.json", index_name=WILDCARD) def load_file(filename, index_name): # Create index with the mapping if mapping file exists From 459ca24ecb35b7940b2229a4edbf9b8631e08c17 Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 10 Feb 2025 15:31:00 -0800 Subject: [PATCH 65/81] Minor cleanup, mostly alphabetizing constants. Signed-off-by: currantw --- .../sql/planner/DefaultImplementorTest.java | 28 +++----- .../logical/LogicalPlanNodeVisitorTest.java | 36 +++++----- .../physical/PhysicalPlanNodeVisitorTest.java | 22 +++--- docs/category.json | 32 ++++----- doctest/test_docs.py | 16 ++++- .../opensearch/sql/legacy/TestsConstants.java | 68 +++++++++---------- 6 files changed, 105 insertions(+), 97 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index f6b6829205..e00e14c70c 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -98,7 +98,7 @@ class DefaultImplementorTest { private final DefaultImplementor implementor = new DefaultImplementor<>(); @Test - public void visit_should_return_default_physical_operator() { + void visit_should_return_default_physical_operator() { String indexName = "test"; NamedExpression include = named("age", ref("age", INTEGER)); ReferenceExpression exclude = ref("name", STRING); @@ -192,7 +192,7 @@ public void visit_should_return_default_physical_operator() { } @Test - public void visitRelation_should_throw_an_exception() { + void visitRelation_should_throw_an_exception() { assertThrows( UnsupportedOperationException.class, () -> new LogicalRelation("test", table).accept(implementor, null)); @@ -200,7 +200,7 @@ public void visitRelation_should_throw_an_exception() { @SuppressWarnings({"rawtypes", "unchecked"}) @Test - public void visitWindowOperator_should_return_PhysicalWindowOperator() { + void visitWindowOperator_should_return_PhysicalWindowOperator() { NamedExpression windowFunction = named(new RowNumberFunction()); WindowDefinition windowDefinition = new WindowDefinition( @@ -242,7 +242,7 @@ void visitLogicalCursor_deserializes_it() { } @Test - public void visitTableScanBuilder_should_build_TableScanOperator() { + void visitTableScanBuilder_should_build_TableScanOperator() { TableScanOperator tableScanOperator = mock(TableScanOperator.class); TableScanBuilder tableScanBuilder = new TableScanBuilder() { @@ -255,7 +255,7 @@ public TableScanOperator build() { } @Test - public void visitTableWriteBuilder_should_build_TableWriteOperator() { + void visitTableWriteBuilder_should_build_TableWriteOperator() { LogicalPlan child = values(); TableWriteOperator tableWriteOperator = mock(TableWriteOperator.class); TableWriteBuilder logicalPlan = @@ -269,7 +269,7 @@ public TableWriteOperator build(PhysicalPlan child) { } @Test - public void visitCloseCursor_should_build_CursorCloseOperator() { + void visitCloseCursor_should_build_CursorCloseOperator() { var logicalChild = mock(LogicalPlan.class); var physicalChild = mock(PhysicalPlan.class); when(logicalChild.accept(implementor, null)).thenReturn(physicalChild); @@ -280,7 +280,7 @@ public void visitCloseCursor_should_build_CursorCloseOperator() { } @Test - public void visitPaginate_should_remove_it_from_tree() { + void visitPaginate_should_remove_it_from_tree() { var logicalPlanTree = new LogicalPaginate( 42, @@ -292,7 +292,7 @@ public void visitPaginate_should_remove_it_from_tree() { } @Test - public void visitLimit_support_return_takeOrdered() { + void visitLimit_support_return_takeOrdered() { // replace SortOperator + LimitOperator with TakeOrderedOperator Pair sort = ImmutablePair.of(Sort.SortOption.DEFAULT_ASC, ref("a", INTEGER)); @@ -318,7 +318,7 @@ public void visitLimit_support_return_takeOrdered() { } @Test - public void visitTrendline_should_build_TrendlineOperator() { + void visitTrendline_should_build_TrendlineOperator() { var logicalChild = mock(LogicalPlan.class); var physicalChild = mock(PhysicalPlan.class); when(logicalChild.accept(implementor, null)).thenReturn(physicalChild); @@ -333,14 +333,11 @@ public void visitTrendline_should_build_TrendlineOperator() { } @Test - public void visitExpand_should_build_ExpandOperator() { - - // Mock physical and logical plan children. + void visitExpand_should_build_ExpandOperator() { var logicalChild = mock(LogicalPlan.class); var physicalChild = mock(PhysicalPlan.class); when(logicalChild.accept(implementor, null)).thenReturn(physicalChild); - // Build physical plan from logical plan. var fieldName = "field_name"; var logicalPlan = new LogicalExpand(logicalChild, ref(fieldName, ARRAY)); var implemented = logicalPlan.accept(implementor, null); @@ -351,14 +348,11 @@ public void visitExpand_should_build_ExpandOperator() { } @Test - public void visitFlatten_should_build_FlattenOperator() { - - // Mock physical and logical plan children. + void visitFlatten_should_build_FlattenOperator() { var logicalChild = mock(LogicalPlan.class); var physicalChild = mock(PhysicalPlan.class); when(logicalChild.accept(implementor, null)).thenReturn(physicalChild); - // Build physical plan from logical plan. var fieldName = "field_name"; var logicalPlan = new LogicalFlatten(logicalChild, ref(fieldName, STRUCT)); var implemented = logicalPlan.accept(implementor, null); diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java index b4e852f4b1..ac91458ff6 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java @@ -156,30 +156,30 @@ public TableWriteOperator build(PhysicalPlan child) { LogicalFlatten flatten = new LogicalFlatten(relation, ref("testField", STRUCT)); return Stream.of( - relation, - tableScanBuilder, - write, - tableWriteBuilder, - filter, + ad, aggregation, - rename, - project, - remove, - eval, - sort, + closeCursor, + cursor, dedup, - window, - rareTopN, + eval, + filter, + flatten, highlight, - mlCommons, - ad, ml, - paginate, + mlCommons, nested, - cursor, - closeCursor, + paginate, + project, + rareTopN, + relation, + remove, + rename, + sort, + tableScanBuilder, + tableWriteBuilder, trendline, - flatten) + window, + write) .map(Arguments::of); } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java index 5d0d4dd468..837cf5fba4 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java @@ -160,24 +160,24 @@ public static Stream getPhysicalPlanForTest() { Pair.of(AstDSL.computation(1, AstDSL.field("field"), "alias", SMA), DOUBLE))); return Stream.of( - Arguments.of(filter, "filter"), Arguments.of(aggregation, "aggregation"), - Arguments.of(rename, "rename"), - Arguments.of(project, "project"), - Arguments.of(window, "window"), - Arguments.of(remove, "remove"), + Arguments.of(cursorClose, "cursorClose"), + Arguments.of(dedupe, "dedupe"), Arguments.of(eval, "eval"), Arguments.of(expand, "expand"), + Arguments.of(filter, "filter"), Arguments.of(flatten, "flatten"), + Arguments.of(limit, "limit"), + Arguments.of(nested, "nested"), + Arguments.of(project, "project"), + Arguments.of(rareTopN, "rareTopN"), + Arguments.of(remove, "remove"), + Arguments.of(rename, "rename"), Arguments.of(sort, "sort"), Arguments.of(takeOrdered, "takeOrdered"), - Arguments.of(dedupe, "dedupe"), + Arguments.of(trendline, "trendline"), Arguments.of(values, "values"), - Arguments.of(rareTopN, "rareTopN"), - Arguments.of(limit, "limit"), - Arguments.of(nested, "nested"), - Arguments.of(cursorClose, "cursorClose"), - Arguments.of(trendline, "trendline")); + Arguments.of(window, "window")); } @ParameterizedTest(name = "{1}") diff --git a/docs/category.json b/docs/category.json index 7bde4fb4a4..372e7c135f 100644 --- a/docs/category.json +++ b/docs/category.json @@ -1,17 +1,15 @@ { "bash": [ + "user/admin/settings.rst", + "user/optimization/optimization.rst", + "user/ppl/admin/settings.rst", "user/ppl/interfaces/endpoint.rst", "user/ppl/interfaces/protocol.rst", - "user/ppl/admin/settings.rst", - "user/optimization/optimization.rst", - "user/admin/settings.rst" ], "ppl_cli": [ "user/ppl/cmd/ad.rst", "user/ppl/cmd/dedup.rst", "user/ppl/cmd/describe.rst", - "user/ppl/cmd/showdatasources.rst", - "user/ppl/cmd/information_schema.rst", "user/ppl/cmd/eval.rst", "user/ppl/cmd/expand.rst", "user/ppl/cmd/fields.rst", @@ -19,19 +17,19 @@ "user/ppl/cmd/flatten.rst", "user/ppl/cmd/grok.rst", "user/ppl/cmd/head.rst", + "user/ppl/cmd/information_schema.rst", "user/ppl/cmd/parse.rst", "user/ppl/cmd/patterns.rst", "user/ppl/cmd/rare.rst", "user/ppl/cmd/rename.rst", "user/ppl/cmd/search.rst", + "user/ppl/cmd/showdatasources.rst", "user/ppl/cmd/sort.rst", "user/ppl/cmd/stats.rst", "user/ppl/cmd/syntax.rst", - "user/ppl/cmd/trendline.rst", "user/ppl/cmd/top.rst", + "user/ppl/cmd/trendline.rst", "user/ppl/cmd/where.rst", - "user/ppl/general/identifiers.rst", - "user/ppl/general/datatypes.rst", "user/ppl/functions/condition.rst", "user/ppl/functions/datetime.rst", "user/ppl/functions/expressions.rst", @@ -39,20 +37,22 @@ "user/ppl/functions/json.rst", "user/ppl/functions/math.rst", "user/ppl/functions/relevance.rst", - "user/ppl/functions/string.rst" + "user/ppl/functions/string.rst", + "user/ppl/general/datatypes.rst", + "user/ppl/general/identifiers.rst", ], "sql_cli": [ + "user/beyond/partiql.rst", + "user/dql/aggregations.rst", + "user/dql/basics.rst", + "user/dql/complex.rst", "user/dql/expressions.rst", + "user/dql/functions.rst", + "user/dql/metadata.rst", + "user/dql/window.rst", "user/general/comments.rst", "user/general/datatypes.rst", "user/general/identifiers.rst", "user/general/values.rst", - "user/dql/basics.rst", - "user/dql/functions.rst", - "user/dql/window.rst", - "user/beyond/partiql.rst", - "user/dql/aggregations.rst", - "user/dql/complex.rst", - "user/dql/metadata.rst" ] } diff --git a/doctest/test_docs.py b/doctest/test_docs.py index e3e4b8ba7c..3086c37fda 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -156,7 +156,21 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST, EXPAND, FLATTEN], ignore_unavailable=True) + index = [ + ACCOUNT2, + ACCOUNTS, + APACHE, + BOOKS, + EMPLOYEES, + EXPAND, + FLATTEN, + JSON_TEST, + NESTED, + NYC_TAXI, + PEOPLE, + WEBLOGS, + WILDCARD] + test_data_client.indices.delete(index=index, ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, tearDown=tear_down, diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 17bd830811..037cbc95e6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -13,56 +13,56 @@ public class TestsConstants { public static final String TEST_INDEX = "opensearch-sql_test_index"; - public static final String TEST_INDEX_ONLINE = TEST_INDEX + "_online"; public static final String TEST_INDEX_ACCOUNT = TEST_INDEX + "_account"; - public static final String TEST_INDEX_PHRASE = TEST_INDEX + "_phrase"; + public static final String TEST_INDEX_ALIAS = TEST_INDEX + "_alias"; + public static final String TEST_INDEX_BANK = TEST_INDEX + "_bank"; + public static final String TEST_INDEX_BANK_CSV_SANITIZE = TEST_INDEX_BANK + "_csv_sanitize"; + public static final String TEST_INDEX_BANK_RAW_SANITIZE = TEST_INDEX_BANK + "_raw_sanitize"; + public static final String TEST_INDEX_BANK_TWO = TEST_INDEX_BANK + "_two"; + public static final String TEST_INDEX_BANK_WITH_NULL_VALUES = + TEST_INDEX_BANK + "_with_null_values"; + public static final String TEST_INDEX_BEER = TEST_INDEX + "_beer"; + public static final String TEST_INDEX_CALCS = TEST_INDEX + "_calcs"; + public static final String TEST_INDEX_DATATYPE_NONNUMERIC = TEST_INDEX + "_datatypes_nonnumeric"; + public static final String TEST_INDEX_DATATYPE_NUMERIC = TEST_INDEX + "_datatypes_numeric"; + public static final String TEST_INDEX_DATE = TEST_INDEX + "_date"; + public static final String TEST_INDEX_DATE_FORMATS = TEST_INDEX + "_date_formats"; + public static final String TEST_INDEX_DATE_TIME = TEST_INDEX + "_datetime"; + public static final String TEST_INDEX_DEEP_NESTED = TEST_INDEX + "_deep_nested"; public static final String TEST_INDEX_DOG = TEST_INDEX + "_dog"; public static final String TEST_INDEX_DOG2 = TEST_INDEX + "_dog2"; public static final String TEST_INDEX_DOG3 = TEST_INDEX + "_dog3"; public static final String TEST_INDEX_DOGSUBQUERY = TEST_INDEX + "_subquery"; - public static final String TEST_INDEX_PEOPLE = TEST_INDEX + "_people"; - public static final String TEST_INDEX_PEOPLE2 = TEST_INDEX + "_people2"; + public static final String TEST_INDEX_EMPLOYEE_NESTED = TEST_INDEX + "_employee_nested"; + public static final String TEST_INDEX_EXPAND = TEST_INDEX + "_expand"; + public static final String TEST_INDEX_FLATTEN = TEST_INDEX + "_flatten"; public static final String TEST_INDEX_GAME_OF_THRONES = TEST_INDEX + "_game_of_thrones"; - public static final String TEST_INDEX_SYSTEM = TEST_INDEX + "_system"; - public static final String TEST_INDEX_ODBC = TEST_INDEX + "_odbc"; + public static final String TEST_INDEX_GEOIP = TEST_INDEX + "_geoip"; + public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; + public static final String TEST_INDEX_JOIN_TYPE = TEST_INDEX + "_join_type"; + public static final String TEST_INDEX_JSON_TEST = TEST_INDEX + "_json_test"; public static final String TEST_INDEX_LOCATION = TEST_INDEX + "_location"; public static final String TEST_INDEX_LOCATION2 = TEST_INDEX + "_location2"; + public static final String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; + public static final String TEST_INDEX_NESTED_SIMPLE = TEST_INDEX + "_nested_simple"; public static final String TEST_INDEX_NESTED_TYPE = TEST_INDEX + "_nested_type"; public static final String TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS = TEST_INDEX + "_nested_type_without_arrays"; - public static final String TEST_INDEX_NESTED_SIMPLE = TEST_INDEX + "_nested_simple"; + public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; public static final String TEST_INDEX_NESTED_WITH_QUOTES = TEST_INDEX + "_nested_type_with_quotes"; - public static final String TEST_INDEX_EMPLOYEE_NESTED = TEST_INDEX + "_employee_nested"; - public static final String TEST_INDEX_JOIN_TYPE = TEST_INDEX + "_join_type"; - public static final String TEST_INDEX_UNEXPANDED_OBJECT = TEST_INDEX + "_unexpanded_object"; - public static final String TEST_INDEX_BANK = TEST_INDEX + "_bank"; - public static final String TEST_INDEX_BANK_TWO = TEST_INDEX_BANK + "_two"; - public static final String TEST_INDEX_BANK_WITH_NULL_VALUES = - TEST_INDEX_BANK + "_with_null_values"; - public static final String TEST_INDEX_BANK_CSV_SANITIZE = TEST_INDEX_BANK + "_csv_sanitize"; - public static final String TEST_INDEX_BANK_RAW_SANITIZE = TEST_INDEX_BANK + "_raw_sanitize"; + public static final String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing"; + public static final String TEST_INDEX_ODBC = TEST_INDEX + "_odbc"; + public static final String TEST_INDEX_ONLINE = TEST_INDEX + "_online"; public static final String TEST_INDEX_ORDER = TEST_INDEX + "_order"; - public static final String TEST_INDEX_WEBLOGS = TEST_INDEX + "_weblogs"; - public static final String TEST_INDEX_DATE = TEST_INDEX + "_date"; - public static final String TEST_INDEX_DATE_TIME = TEST_INDEX + "_datetime"; - public static final String TEST_INDEX_DEEP_NESTED = TEST_INDEX + "_deep_nested"; + public static final String TEST_INDEX_PEOPLE = TEST_INDEX + "_people"; + public static final String TEST_INDEX_PEOPLE2 = TEST_INDEX + "_people2"; + public static final String TEST_INDEX_PHRASE = TEST_INDEX + "_phrase"; public static final String TEST_INDEX_STRINGS = TEST_INDEX + "_strings"; - public static final String TEST_INDEX_DATATYPE_NUMERIC = TEST_INDEX + "_datatypes_numeric"; - public static final String TEST_INDEX_DATATYPE_NONNUMERIC = TEST_INDEX + "_datatypes_nonnumeric"; - public static final String TEST_INDEX_BEER = TEST_INDEX + "_beer"; - public static final String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing"; - public static final String TEST_INDEX_CALCS = TEST_INDEX + "_calcs"; - public static final String TEST_INDEX_DATE_FORMATS = TEST_INDEX + "_date_formats"; + public static final String TEST_INDEX_SYSTEM = TEST_INDEX + "_system"; + public static final String TEST_INDEX_UNEXPANDED_OBJECT = TEST_INDEX + "_unexpanded_object"; + public static final String TEST_INDEX_WEBLOGS = TEST_INDEX + "_weblogs"; public static final String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard"; - public static final String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; - public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; - public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; - public static final String TEST_INDEX_JSON_TEST = TEST_INDEX + "_json_test"; - public static final String TEST_INDEX_EXPAND = TEST_INDEX + "_expand"; - public static final String TEST_INDEX_ALIAS = TEST_INDEX + "_alias"; - public static final String TEST_INDEX_FLATTEN = TEST_INDEX + "_flatten"; - public static final String TEST_INDEX_GEOIP = TEST_INDEX + "_geoip"; public static final String DATASOURCES = ".ql-datasources"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; From eef907fd2fba50281d4573c20889f596021667ae Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 10 Feb 2025 20:54:16 -0800 Subject: [PATCH 66/81] Add new doc and integration tests Signed-off-by: currantw --- docs/category.json | 6 +++--- docs/user/ppl/cmd/expand.rst | 20 ++++++++++++++++--- docs/user/ppl/cmd/flatten.rst | 19 +++++++++++++++--- .../opensearch/sql/ppl/ExpandCommandIT.java | 16 ++++++++++++++- 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/docs/category.json b/docs/category.json index 372e7c135f..82697a7a62 100644 --- a/docs/category.json +++ b/docs/category.json @@ -4,7 +4,7 @@ "user/optimization/optimization.rst", "user/ppl/admin/settings.rst", "user/ppl/interfaces/endpoint.rst", - "user/ppl/interfaces/protocol.rst", + "user/ppl/interfaces/protocol.rst" ], "ppl_cli": [ "user/ppl/cmd/ad.rst", @@ -39,7 +39,7 @@ "user/ppl/functions/relevance.rst", "user/ppl/functions/string.rst", "user/ppl/general/datatypes.rst", - "user/ppl/general/identifiers.rst", + "user/ppl/general/identifiers.rst" ], "sql_cli": [ "user/beyond/partiql.rst", @@ -53,6 +53,6 @@ "user/general/comments.rst", "user/general/datatypes.rst", "user/general/identifiers.rst", - "user/general/values.rst", + "user/general/values.rst" ] } diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index 3066b3a246..44f847f0e5 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -85,7 +85,21 @@ PPL query:: | null | null | +-------------------+------------+ -Example 4: Expand and flatten -============================= +Example 4: Expand and flatten a field +===================================== -TODO #3016: Test once flatten merged. \ No newline at end of file +PPL query:: + + os> source=expand | expand team | flatten team | fields name, title + fetched rows / total rows = 7/7 + +-------------------+---------------------------------+ + | name | title | + |-------------------+---------------------------------| + | Seattle Seahawks | 2014 | + | Seattle Kraken | null | + | Vancouver Canucks | null | + | BC Lions | [1964,1985,1994,2000,2006,2011] | + | San Antonio Spurs | [1999,2003,2005,2007,2014] | + | null | null | + | null | null | + +-------------------+---------------------------------+ diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index c81e571b01..e786cd401b 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -69,7 +69,20 @@ PPL query:: | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -Example 4: Flatten and expand -============================= +Example 4: Flatten and expand an object field +============================================= + +PPL query:: + + os> source=expand | where city = 'San Antonio' | flatten team | expand title | fields name, title + fetched rows / total rows = 5/5 + +-------------------+-------+ + | name | title | + |-------------------+-------| + | San Antonio Spurs | 1999 | + | San Antonio Spurs | 2003 | + | San Antonio Spurs | 2005 | + | San Antonio Spurs | 2007 | + | San Antonio Spurs | 2014 | + +-------------------+-------+ -TODO #3016: Add test once flatten merged. \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java index b2c51387de..0530dfcd8d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java @@ -12,6 +12,7 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.List; import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.sql.common.utils.StringUtils; @@ -91,7 +92,20 @@ public void testMultiple() throws IOException { @Test public void testExpandFlatten() throws IOException { + String query = + StringUtils.format( + "source=%s | expand team | flatten team | fields name, title", TEST_INDEX_EXPAND); + JSONObject result = executeQuery(query); - // TODO #3016: Test once flatten merged. + verifySchema(result, schema("name", "string"), schema("title", "integer")); + verifyDataRows( + result, + rows("Seattle Seahawks", 2014), + rows("Seattle Kraken", null), + rows("Vancouver Canucks", null), + rows("BC Lions", List.of(1964, 1985, 1994, 2000, 2006, 2011)), + rows("San Antonio Spurs", List.of(1999, 2003, 2005, 2007, 2014)), + rows(null, null), + rows(null, null)); } } From 77afac73bfb196f0500c998fb74a3257a971fe43 Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 10 Feb 2025 20:56:13 -0800 Subject: [PATCH 67/81] Fix missing test coverage Signed-off-by: currantw --- .../sql/planner/logical/LogicalPlanNodeVisitorTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java index ac91458ff6..0dd9425225 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java @@ -9,6 +9,7 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.Mockito.mock; import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.expression.DSL.named; @@ -154,6 +155,7 @@ public TableWriteOperator build(PhysicalPlan child) { ExprCoreType.DOUBLE))); LogicalFlatten flatten = new LogicalFlatten(relation, ref("testField", STRUCT)); + LogicalExpand expand = new LogicalExpand(relation, ref("testField", ARRAY)); return Stream.of( ad, @@ -164,6 +166,7 @@ public TableWriteOperator build(PhysicalPlan child) { eval, filter, flatten, + expand, highlight, ml, mlCommons, From 49078e922b6ffbbb52071394a7ded1d143dbcd7f Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 10 Feb 2025 23:08:53 -0800 Subject: [PATCH 68/81] Move `PathUtils` to `ExprValueUtils` and update tests. Signed-off-by: currantw --- .../sql/data/model/ExprIntervalValue.java | 6 + .../sql/data/model/ExprIpValue.java | 6 + .../sql/data/model/ExprTupleValue.java | 2 +- .../sql/data/model/ExprValueUtils.java | 145 +++++++++++ .../sql/planner/physical/ExpandOperator.java | 7 +- .../org/opensearch/sql/utils/PathUtils.java | 124 --------- .../sql/data/model/ExprValueUtilsTest.java | 240 ++++++++++++++---- .../opensearch/sql/utils/PathUtilsTest.java | 134 ---------- 8 files changed, 357 insertions(+), 307 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/utils/PathUtils.java delete mode 100644 core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprIntervalValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprIntervalValue.java index 754520d7c8..c04575d34a 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprIntervalValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprIntervalValue.java @@ -7,6 +7,7 @@ import java.time.temporal.TemporalAmount; import java.time.temporal.TemporalUnit; +import java.util.Objects; import lombok.RequiredArgsConstructor; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -56,4 +57,9 @@ public TemporalUnit unit() { .findAny() .orElse(interval.getUnits().get(0)); } + + @Override + public int hashCode() { + return Objects.hashCode(interval); + } } diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java index 8bdbec4bb5..63894cc09d 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java @@ -6,6 +6,7 @@ package org.opensearch.sql.data.model; import inet.ipaddr.IPAddress; +import java.util.Objects; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.utils.IPUtils; @@ -47,4 +48,9 @@ public String toString() { public IPAddress ipValue() { return value; } + + @Override + public int hashCode() { + return Objects.hashCode(value); + } } diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java index b7abe0d256..0e88682636 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java @@ -20,7 +20,7 @@ @RequiredArgsConstructor public class ExprTupleValue extends AbstractExprValue { - private final LinkedHashMap valueMap; + private final Map valueMap; public static ExprTupleValue fromExprValueMap(Map map) { LinkedHashMap linkedHashMap = new LinkedHashMap<>(map); diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index 36be8dc648..82cfe3234b 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -5,6 +5,8 @@ package org.opensearch.sql.data.model; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; + import inet.ipaddr.IPAddress; import java.time.Instant; import java.time.LocalDate; @@ -13,6 +15,8 @@ import java.time.ZoneOffset; import java.time.temporal.TemporalAmount; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -20,6 +24,7 @@ import lombok.experimental.UtilityClass; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.exception.SemanticCheckException; /** The definition of {@link ExprValue} factory. */ @UtilityClass @@ -38,6 +43,8 @@ public class ExprValueUtils { public final Pattern QUALIFIED_NAME_SEPARATOR_PATTERN = Pattern.compile(QUALIFIED_NAME_SEPARATOR, Pattern.LITERAL); + private final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); + public static ExprValue booleanValue(Boolean value) { return value ? LITERAL_TRUE : LITERAL_FALSE; } @@ -210,4 +217,142 @@ public static IPAddress getIpValue(ExprValue exprValue) { public static Boolean getBooleanValue(ExprValue exprValue) { return exprValue.booleanValue(); } + + /** + * Returns true if a nested {@link ExprValue} with the specified qualified name exists within the + * given root value. + * + * @param rootExprValue root value - expected to be an {@link ExprTupleValue} + * @param qualifiedName qualified name for the nested {@link ExprValue} - e.g. + * 'nested_struct.integer_field' + */ + public boolean containsNestedExprValue(ExprValue rootExprValue, String qualifiedName) { + List components = splitQualifiedName(qualifiedName); + return containsNestedExprValueForComponents(rootExprValue, components); + } + + /** + * Returns the nested {@link ExprValue} with the specified qualified name within the given root + * value. Returns {@link ExprNullValue} if the root value does not contain a nested value with the + * qualified name - see {@link ExprValueUtils#containsNestedExprValue}. + * + * @param rootExprValue root value - expected to be an {@link ExprTupleValue} + * @param qualifiedName qualified name for the nested {@link ExprValue} - e.g. + * 'nested_struct.integer_field' + */ + public ExprValue getNestedExprValue(ExprValue rootExprValue, String qualifiedName) { + + List components = splitQualifiedName(qualifiedName); + if (!containsNestedExprValueForComponents(rootExprValue, components)) { + return nullValue(); + } + + return getNestedExprValueForComponents(rootExprValue, components); + } + + /** + * Sets the {@link ExprValue} with the specified qualified name within the given root value and + * returns the result. Throws {@link SemanticCheckException} if the root value does not contain a + * value with the qualified name - see {@link ExprValueUtils#containsNestedExprValue}. + * + * @param rootExprValue root value - expected to be an {@link ExprTupleValue} + * @param qualifiedName qualified name for the nested {@link ExprValue} - e.g. + * 'nested_struct.integer_field' + */ + public ExprValue setNestedExprValue( + ExprValue rootExprValue, String qualifiedName, ExprValue newExprValue) { + + List components = splitQualifiedName(qualifiedName); + if (!containsNestedExprValueForComponents(rootExprValue, components)) { + throw new SemanticCheckException( + String.format("Field with qualified name '%s' does not exist.", qualifiedName)); + } + + return setNestedExprValueForComponents(rootExprValue, components, newExprValue); + } + + /** + * Returns true if a nested {@link ExprValue} exists within the given root value, at the location + * specified by the qualified name components. + * + * @param rootExprValue root value - expected to be an {@link ExprTupleValue} + * @param components list of qualified name components - e.g. ['nested_struct','integer_field'] + */ + private boolean containsNestedExprValueForComponents( + ExprValue rootExprValue, List components) { + + if (components.isEmpty()) { + return true; + } + + if (!rootExprValue.type().equals(STRUCT)) { + return false; + } + + String currentComponent = components.getFirst(); + List remainingComponents = components.subList(1, components.size()); + + Map exprValueMap = rootExprValue.tupleValue(); + if (!exprValueMap.containsKey(currentComponent)) { + return false; + } + + return containsNestedExprValueForComponents( + exprValueMap.get(currentComponent), remainingComponents); + } + + /** + * Returns the nested {@link ExprValue} within the given root value, at the location specified by + * the qualified name components. Requires that the root value contain a nested value with the + * qualified name - see {@link ExprValueUtils#containsNestedExprValue}. + * + * @param rootExprValue root value - expected to be an {@link ExprTupleValue} + * @param components list of qualified name components - e.g. ['nested_struct','integer_field'] + */ + private ExprValue getNestedExprValueForComponents( + ExprValue rootExprValue, List components) { + + if (components.isEmpty()) { + return rootExprValue; + } + + String currentComponent = components.getFirst(); + List remainingQualifiedNameComponents = components.subList(1, components.size()); + + Map exprValueMap = rootExprValue.tupleValue(); + return getNestedExprValueForComponents( + exprValueMap.get(currentComponent), remainingQualifiedNameComponents); + } + + /** + * Sets the nested {@link ExprValue} within the given root value, at the location specified by the + * qualified name components, and returns the result. Requires that the root value contain a + * nested value with the qualified name - see {@link ExprValueUtils#containsNestedExprValue}. + * + * @param rootExprValue root value - expected to be an {@link ExprTupleValue} + * @param components list of qualified name components - e.g. ['nested_struct','integer_field'] + */ + private ExprValue setNestedExprValueForComponents( + ExprValue rootExprValue, List components, ExprValue newExprValue) { + + if (components.isEmpty()) { + return newExprValue; + } + + String currentComponent = components.getFirst(); + List remainingComponents = components.subList(1, components.size()); + + Map exprValueMap = new HashMap<>(rootExprValue.tupleValue()); + exprValueMap.put( + currentComponent, + setNestedExprValueForComponents( + exprValueMap.get(currentComponent), remainingComponents, newExprValue)); + + return ExprTupleValue.fromExprValueMap(exprValueMap); + } + + /** Splits the given qualified name into components and returns the result.. */ + private List splitQualifiedName(String qualifiedName) { + return Arrays.asList(PATH_SEPARATOR_PATTERN.split(qualifiedName)); + } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index c7a3f59373..fc5e6c10dd 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -18,7 +18,6 @@ import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; -import org.opensearch.sql.utils.PathUtils; /** Flattens the specified field from the input and returns the result. */ @Getter @@ -59,11 +58,11 @@ public ExprValue next() { /** Expands the {@link ExprValue} at the specified path and returns the resulting value. */ private static List expandExprValueAtPath(ExprValue rootExprValue, String path) { - if (!PathUtils.containsExprValueAtPath(rootExprValue, path)) { + if (!ExprValueUtils.containsNestedExprValue(rootExprValue, path)) { return new LinkedList<>(Collections.singletonList(rootExprValue)); } - ExprValue targetExprValue = PathUtils.getExprValueAtPath(rootExprValue, path); + ExprValue targetExprValue = ExprValueUtils.getNestedExprValue(rootExprValue, path); if (!targetExprValue.type().equals(ARRAY)) { return new LinkedList<>(Collections.singletonList(rootExprValue)); } @@ -74,7 +73,7 @@ private static List expandExprValueAtPath(ExprValue rootExprValue, St } return expandedExprValues.stream() - .map(v -> PathUtils.setExprValueAtPath(rootExprValue, path, v)) + .map(v -> ExprValueUtils.setNestedExprValue(rootExprValue, path, v)) .collect(Collectors.toCollection(LinkedList::new)); } } diff --git a/core/src/main/java/org/opensearch/sql/utils/PathUtils.java b/core/src/main/java/org/opensearch/sql/utils/PathUtils.java deleted file mode 100644 index 95ed46cd37..0000000000 --- a/core/src/main/java/org/opensearch/sql/utils/PathUtils.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.utils; - -import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Pattern; -import lombok.experimental.UtilityClass; -import org.opensearch.sql.data.model.ExprTupleValue; -import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.exception.SemanticCheckException; - -/** Utility methods for handling {@link ExprValue} paths. */ -@UtilityClass -public class PathUtils { - - private final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); - - /** Returns true if a value exists at the specified path within the given root value. */ - public boolean containsExprValueAtPath(ExprValue root, String path) { - List pathComponents = splitPath(path); - return containsExprValueForPathComponents(root, pathComponents); - } - - /** - * Returns the {@link ExprValue} at the specified path within the given root value. Returns {@code - * null} if the root value does not contain the path - see {@link - * PathUtils#containsExprValueAtPath}. - */ - public ExprValue getExprValueAtPath(ExprValue root, String path) { - - List pathComponents = splitPath(path); - if (!containsExprValueForPathComponents(root, pathComponents)) { - return null; - } - - return getExprValueForPathComponents(root, pathComponents); - } - - /** - * Sets the {@link ExprValue} at the specified path within the given root value and returns the - * result. Throws {@link SemanticCheckException} if the root value does not contain the path - see - * {@link PathUtils#containsExprValueAtPath}. - */ - public ExprValue setExprValueAtPath(ExprValue root, String path, ExprValue newValue) { - - List pathComponents = splitPath(path); - if (!containsExprValueForPathComponents(root, pathComponents)) { - throw new SemanticCheckException(String.format("Field path '%s' does not exist.", path)); - } - - return setExprValueForPathComponents(root, pathComponents, newValue); - } - - /** Helper method for {@link PathUtils#containsExprValueAtPath}. */ - private boolean containsExprValueForPathComponents(ExprValue root, List pathComponents) { - - if (pathComponents.isEmpty()) { - return true; - } - - if (!root.type().equals(STRUCT)) { - return false; - } - - String currentPathComponent = pathComponents.getFirst(); - List remainingPathComponents = pathComponents.subList(1, pathComponents.size()); - - Map exprValueMap = root.tupleValue(); - if (!exprValueMap.containsKey(currentPathComponent)) { - return false; - } - - return containsExprValueForPathComponents( - exprValueMap.get(currentPathComponent), remainingPathComponents); - } - - /** Helper method for {@link PathUtils#getExprValueAtPath}. */ - private ExprValue getExprValueForPathComponents(ExprValue root, List pathComponents) { - - if (pathComponents.isEmpty()) { - return root; - } - - String currentPathComponent = pathComponents.getFirst(); - List remainingPathComponents = pathComponents.subList(1, pathComponents.size()); - - Map exprValueMap = root.tupleValue(); - return getExprValueForPathComponents( - exprValueMap.get(currentPathComponent), remainingPathComponents); - } - - /** Helper method for {@link PathUtils#setExprValueAtPath}. */ - private ExprValue setExprValueForPathComponents( - ExprValue root, List pathComponents, ExprValue newValue) { - - if (pathComponents.isEmpty()) { - return newValue; - } - - String currentPathComponent = pathComponents.getFirst(); - List remainingPathComponents = pathComponents.subList(1, pathComponents.size()); - - Map exprValueMap = new HashMap<>(root.tupleValue()); - exprValueMap.put( - currentPathComponent, - setExprValueForPathComponents( - exprValueMap.get(currentPathComponent), remainingPathComponents, newValue)); - - return ExprTupleValue.fromExprValueMap(exprValueMap); - } - - /** Splits the given path and returns the corresponding components. */ - private List splitPath(String path) { - return Arrays.asList(PATH_SEPARATOR_PATTERN.split(path)); - } -} diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java index 48db530a94..8fca55f7ea 100644 --- a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java @@ -7,8 +7,10 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; @@ -32,12 +34,9 @@ import java.time.ZonedDateTime; import java.util.AbstractMap; import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Function; -import java.util.stream.Collectors; import java.util.stream.Stream; import org.hamcrest.Matchers; import org.junit.jupiter.api.DisplayName; @@ -47,39 +46,77 @@ import org.junit.jupiter.params.provider.MethodSource; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.storage.bindingtuple.BindingTuple; import org.opensearch.sql.utils.IPUtils; @DisplayName("Test Expression Value Utils") public class ExprValueUtilsTest { - private static final LinkedHashMap testTuple = new LinkedHashMap<>(); - static { - testTuple.put("1", new ExprIntegerValue(1)); - } + // Test values + private static final ExprValue byteExprValue = new ExprByteValue((byte) 1); + private static final ExprValue shortExprValue = new ExprShortValue((short) 1); + private static final ExprValue integerExprValue = new ExprIntegerValue(1); + private static final ExprValue longExprValue = new ExprLongValue(1L); + private static final ExprValue floatExprValue = new ExprFloatValue(1.0f); + private static final ExprValue doubleExprValue = new ExprDoubleValue(1.0d); + + private static final ExprValue nullExprValue = ExprNullValue.of(); + private static final ExprValue missingExprValue = ExprMissingValue.of(); + + private static final ExprValue ipExprValue = new ExprIpValue("1.2.3.4"); + private static final ExprValue stringExprValue = new ExprStringValue("value"); + private static final ExprValue booleanExprValue = ExprBooleanValue.of(true); + private static final ExprValue dateExprValue = new ExprDateValue("2012-08-07"); + private static final ExprValue timeExprValue = new ExprTimeValue("18:00:00"); + private static final ExprValue timestampExprValue = new ExprTimestampValue("2012-08-07 18:00:00"); + private static final ExprValue intervalExprValue = + new ExprIntervalValue(Duration.ofSeconds(100L)); + + private static final ExprValue tupleExprValue = + ExprTupleValue.fromExprValueMap(Map.of("integer", integerExprValue)); + private static final ExprValue collectionExprValue = + new ExprCollectionValue(ImmutableList.of(integerExprValue)); + + private final ExprValue tupleWithNestedExprValue = + ExprTupleValue.fromExprValueMap( + Map.of("tuple", ExprValueUtils.tupleValue(Map.of("integer", integerExprValue)))); + + private final ExprValue rootExprValue = + ExprTupleValue.fromExprValueMap( + Map.ofEntries( + Map.entry("integer", integerExprValue), + Map.entry("tuple", tupleExprValue), + Map.entry("tuple_with_nested", tupleWithNestedExprValue), + Map.entry("null", ExprNullValue.of()), + Map.entry("missing", ExprMissingValue.of()))); private static final List numberValues = - Stream.of((byte) 1, (short) 1, 1, 1L, 1f, 1D) - .map(ExprValueUtils::fromObjectValue) - .collect(Collectors.toList()); + List.of( + byteExprValue, + shortExprValue, + integerExprValue, + longExprValue, + floatExprValue, + doubleExprValue); private static final List nonNumberValues = - Arrays.asList( - new ExprIpValue("1.2.3.4"), - new ExprStringValue("1"), - ExprBooleanValue.of(true), - new ExprCollectionValue(ImmutableList.of(new ExprIntegerValue(1))), - new ExprTupleValue(testTuple), - new ExprDateValue("2012-08-07"), - new ExprTimeValue("18:00:00"), - new ExprTimestampValue("2012-08-07 18:00:00"), - new ExprIntervalValue(Duration.ofSeconds(100))); + List.of( + ipExprValue, + stringExprValue, + booleanExprValue, + collectionExprValue, + tupleExprValue, + dateExprValue, + timeExprValue, + timestampExprValue, + intervalExprValue); private static final List allValues = Lists.newArrayList(Iterables.concat(numberValues, nonNumberValues)); private static final List> numberValueExtractor = - Arrays.asList( + List.of( ExprValueUtils::getByteValue, ExprValueUtils::getShortValue, ExprValueUtils::getIntegerValue, @@ -87,14 +124,14 @@ public class ExprValueUtilsTest { ExprValueUtils::getFloatValue, ExprValueUtils::getDoubleValue); private static final List> nonNumberValueExtractor = - Arrays.asList( + List.of( ExprValueUtils::getIpValue, ExprValueUtils::getStringValue, ExprValueUtils::getBooleanValue, ExprValueUtils::getCollectionValue, ExprValueUtils::getTupleValue); private static final List> dateAndTimeValueExtractor = - Arrays.asList( + List.of( ExprValue::dateValue, ExprValue::timeValue, ExprValue::timestampValue, @@ -105,7 +142,7 @@ public class ExprValueUtilsTest { numberValueExtractor, nonNumberValueExtractor, dateAndTimeValueExtractor)); private static final List numberTypes = - Arrays.asList( + List.of( ExprCoreType.BYTE, ExprCoreType.SHORT, ExprCoreType.INTEGER, @@ -113,31 +150,31 @@ public class ExprValueUtilsTest { ExprCoreType.FLOAT, ExprCoreType.DOUBLE); private static final List nonNumberTypes = - Arrays.asList(IP, STRING, BOOLEAN, ARRAY, STRUCT); + List.of(IP, STRING, BOOLEAN, ARRAY, STRUCT); private static final List dateAndTimeTypes = - Arrays.asList(DATE, TIME, TIMESTAMP, INTERVAL); + List.of(DATE, TIME, TIMESTAMP, INTERVAL); private static final List allTypes = Lists.newArrayList(Iterables.concat(numberTypes, nonNumberTypes, dateAndTimeTypes)); private static Stream getValueTestArgumentStream() { List expectedValues = - Arrays.asList( + List.of( (byte) 1, (short) 1, 1, 1L, - 1f, - 1D, + 1.0f, + 1.0d, IPUtils.toAddress("1.2.3.4"), - "1", + "value", true, - Arrays.asList(integerValue(1)), - ImmutableMap.of("1", integerValue(1)), + List.of(integerValue(1)), + ImmutableMap.of("integer", integerValue(1)), LocalDate.parse("2012-08-07"), LocalTime.parse("18:00:00"), ZonedDateTime.of(LocalDateTime.parse("2012-08-07T18:00:00"), ZoneOffset.UTC) .toInstant(), - Duration.ofSeconds(100)); + Duration.ofSeconds(100L)); Stream.Builder builder = Stream.builder(); for (int i = 0; i < expectedValues.size(); i++) { builder.add(Arguments.of(allValues.get(i), allValueExtractor.get(i), expectedValues.get(i))); @@ -247,21 +284,136 @@ public void constructDateAndTimeValue() { @Test public void hashCodeTest() { - assertEquals(new ExprByteValue(1).hashCode(), new ExprByteValue(1).hashCode()); - assertEquals(new ExprShortValue(1).hashCode(), new ExprShortValue(1).hashCode()); - assertEquals(new ExprIntegerValue(1).hashCode(), new ExprIntegerValue(1).hashCode()); - assertEquals(new ExprStringValue("1").hashCode(), new ExprStringValue("1").hashCode()); + assertEquals(byteExprValue.hashCode(), new ExprByteValue((byte) 1).hashCode()); + assertEquals(shortExprValue.hashCode(), new ExprShortValue((short) 1).hashCode()); + assertEquals(integerExprValue.hashCode(), new ExprIntegerValue(1).hashCode()); + assertEquals(longExprValue.hashCode(), new ExprLongValue(1L).hashCode()); + assertEquals(floatExprValue.hashCode(), new ExprFloatValue(1.0f).hashCode()); + assertEquals(doubleExprValue.hashCode(), new ExprDoubleValue(1.0d).hashCode()); + + assertEquals(nullExprValue.hashCode(), ExprNullValue.of().hashCode()); + assertEquals(missingExprValue.hashCode(), ExprMissingValue.of().hashCode()); + assertEquals(ipExprValue.hashCode(), new ExprIpValue("1.2.3.4").hashCode()); + assertEquals(stringExprValue.hashCode(), new ExprStringValue("value").hashCode()); + assertEquals(booleanExprValue.hashCode(), ExprBooleanValue.of(true).hashCode()); + assertEquals(dateExprValue.hashCode(), new ExprDateValue("2012-08-07").hashCode()); + assertEquals(timeExprValue.hashCode(), new ExprTimeValue("18:00:00").hashCode()); + assertEquals( + timestampExprValue.hashCode(), new ExprTimestampValue("2012-08-07 18:00:00").hashCode()); + assertEquals( + intervalExprValue.hashCode(), new ExprIntervalValue(Duration.ofSeconds(100L)).hashCode()); + assertEquals( + tupleExprValue.hashCode(), + ExprTupleValue.fromExprValueMap(Map.of("integer", integerExprValue)).hashCode()); + assertEquals( + collectionExprValue.hashCode(), + new ExprCollectionValue(ImmutableList.of(integerExprValue)).hashCode()); + } + + @Test + void testContainsNestedExprValue() { + assertTrue(ExprValueUtils.containsNestedExprValue(rootExprValue, "integer")); + assertTrue(ExprValueUtils.containsNestedExprValue(rootExprValue, "tuple.integer")); + assertTrue( + ExprValueUtils.containsNestedExprValue(rootExprValue, "tuple_with_nested.tuple.integer")); + + assertFalse(ExprValueUtils.containsNestedExprValue(rootExprValue, "invalid")); + assertFalse(ExprValueUtils.containsNestedExprValue(rootExprValue, "null.invalid")); + assertFalse(ExprValueUtils.containsNestedExprValue(rootExprValue, "missing.invalid")); + assertFalse(ExprValueUtils.containsNestedExprValue(rootExprValue, "invalid.invalid")); + } + + @Test + void testGetNestedExprValue() { + assertEquals(integerExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "integer")); assertEquals( - new ExprCollectionValue(ImmutableList.of(new ExprIntegerValue(1))).hashCode(), - new ExprCollectionValue(ImmutableList.of(new ExprIntegerValue(1))).hashCode()); + integerExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "tuple.integer")); assertEquals( - new ExprTupleValue(testTuple).hashCode(), new ExprTupleValue(testTuple).hashCode()); + integerExprValue, + ExprValueUtils.getNestedExprValue(rootExprValue, "tuple_with_nested.tuple.integer")); + + assertEquals(nullExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "invalid")); + assertEquals(nullExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "null.invalid")); assertEquals( - new ExprDateValue("2012-08-07").hashCode(), new ExprDateValue("2012-08-07").hashCode()); + nullExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "missing.invalid")); assertEquals( - new ExprTimeValue("18:00:00").hashCode(), new ExprTimeValue("18:00:00").hashCode()); + nullExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "invalid.invalid")); + } + + @Test + void testSetNestedExprValue() { + ExprValue expected; + ExprValue actual; + + expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("integer", stringExprValue), + Map.entry("tuple", tupleExprValue), + Map.entry("tuple_with_nested", tupleWithNestedExprValue), + Map.entry("null", nullExprValue), + Map.entry("missing", missingExprValue))); + actual = ExprValueUtils.setNestedExprValue(rootExprValue, "integer", stringExprValue); + assertEquals(expected, actual); + + expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("integer", integerExprValue), + Map.entry("tuple", ExprValueUtils.tupleValue(Map.of("integer", stringExprValue))), + Map.entry("tuple_with_nested", tupleWithNestedExprValue), + Map.entry("null", nullExprValue), + Map.entry("missing", missingExprValue))); + actual = ExprValueUtils.setNestedExprValue(rootExprValue, "tuple.integer", stringExprValue); + assertEquals(expected, actual); + + expected = + ExprValueUtils.tupleValue( + Map.ofEntries( + Map.entry("integer", integerExprValue), + Map.entry("tuple", tupleExprValue), + Map.entry( + "tuple_with_nested", + ExprValueUtils.tupleValue( + Map.of( + "tuple", + ExprValueUtils.tupleValue(Map.of("integer", stringExprValue))))), + Map.entry("null", nullExprValue), + Map.entry("missing", missingExprValue))); assertEquals( - new ExprTimestampValue("2012-08-07 18:00:00").hashCode(), - new ExprTimestampValue("2012-08-07 18:00:00").hashCode()); + expected, + ExprValueUtils.setNestedExprValue( + rootExprValue, "tuple_with_nested.tuple.integer", stringExprValue)); + + Exception ex; + + ex = + assertThrows( + SemanticCheckException.class, + () -> ExprValueUtils.setNestedExprValue(rootExprValue, "invalid", stringExprValue)); + assertEquals("Field with qualified name 'invalid' does not exist.", ex.getMessage()); + + ex = + assertThrows( + SemanticCheckException.class, + () -> + ExprValueUtils.setNestedExprValue(rootExprValue, "null.invalid", stringExprValue)); + assertEquals("Field with qualified name 'null.invalid' does not exist.", ex.getMessage()); + + ex = + assertThrows( + SemanticCheckException.class, + () -> + ExprValueUtils.setNestedExprValue( + rootExprValue, "missing.invalid", stringExprValue)); + assertEquals("Field with qualified name 'missing.invalid' does not exist.", ex.getMessage()); + + ex = + assertThrows( + SemanticCheckException.class, + () -> + ExprValueUtils.setNestedExprValue( + rootExprValue, "invalid.invalid", stringExprValue)); + assertEquals("Field with qualified name 'invalid.invalid' does not exist.", ex.getMessage()); } } diff --git a/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java b/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java deleted file mode 100644 index f83601ff2d..0000000000 --- a/core/src/test/java/org/opensearch/sql/utils/PathUtilsTest.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.utils; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.Map; -import lombok.ToString; -import org.junit.jupiter.api.Test; -import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.model.ExprValueUtils; -import org.opensearch.sql.exception.SemanticCheckException; - -@ToString -class PathUtilsTest { - - // Test values - private final ExprValue value = ExprValueUtils.integerValue(0); - private final ExprValue newValue = ExprValueUtils.stringValue("value"); - private final ExprValue nullValue = ExprValueUtils.nullValue(); - private final ExprValue missingValue = ExprValueUtils.missingValue(); - - private final ExprValue struct1Value = ExprValueUtils.tupleValue(Map.of("field", value)); - private final ExprValue struct2Value = - ExprValueUtils.tupleValue( - Map.of("struct1", ExprValueUtils.tupleValue(Map.of("field", value)))); - - private final ExprValue input = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("field", value), - Map.entry("struct1", struct1Value), - Map.entry("struct2", struct2Value), - Map.entry("struct_null", nullValue), - Map.entry("struct_missing", missingValue))); - - @Test - void testContainsExprValueForPath() { - assertTrue(PathUtils.containsExprValueAtPath(input, "field")); - assertTrue(PathUtils.containsExprValueAtPath(input, "struct1.field")); - assertTrue(PathUtils.containsExprValueAtPath(input, "struct2.struct1.field")); - - assertFalse(PathUtils.containsExprValueAtPath(input, "field_invalid")); - assertFalse(PathUtils.containsExprValueAtPath(input, "struct_null.field_invalid")); - assertFalse(PathUtils.containsExprValueAtPath(input, "struct_missing.field_invalid")); - assertFalse(PathUtils.containsExprValueAtPath(input, "struct_invalid.field_invalid")); - } - - @Test - void testGetExprValueForPath() { - assertEquals(value, PathUtils.getExprValueAtPath(input, "field")); - assertEquals(value, PathUtils.getExprValueAtPath(input, "struct1.field")); - assertEquals(value, PathUtils.getExprValueAtPath(input, "struct2.struct1.field")); - - assertNull(PathUtils.getExprValueAtPath(input, "field_invalid")); - assertNull(PathUtils.getExprValueAtPath(input, "struct_null.field_invalid")); - assertNull(PathUtils.getExprValueAtPath(input, "struct_missing.field_invalid")); - assertNull(PathUtils.getExprValueAtPath(input, "struct_invalid.field_invalid")); - } - - @Test - void testSetExprValueForPath() { - ExprValue expected; - ExprValue actual; - - expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("field", newValue), - Map.entry("struct1", struct1Value), - Map.entry("struct2", struct2Value), - Map.entry("struct_null", nullValue), - Map.entry("struct_missing", missingValue))); - actual = PathUtils.setExprValueAtPath(input, "field", newValue); - assertEquals(expected, actual); - - expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("field", value), - Map.entry("struct1", ExprValueUtils.tupleValue(Map.of("field", newValue))), - Map.entry("struct2", struct2Value), - Map.entry("struct_null", nullValue), - Map.entry("struct_missing", missingValue))); - actual = PathUtils.setExprValueAtPath(input, "struct1.field", newValue); - assertEquals(expected, actual); - - expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("field", value), - Map.entry("struct1", struct1Value), - Map.entry( - "struct2", - ExprValueUtils.tupleValue( - Map.of("struct1", ExprValueUtils.tupleValue(Map.of("field", newValue))))), - Map.entry("struct_null", nullValue), - Map.entry("struct_missing", missingValue))); - assertEquals(expected, PathUtils.setExprValueAtPath(input, "struct2.struct1.field", newValue)); - - Exception ex; - - ex = - assertThrows( - SemanticCheckException.class, - () -> PathUtils.setExprValueAtPath(input, "field_invalid", newValue)); - assertEquals("Field path 'field_invalid' does not exist.", ex.getMessage()); - - ex = - assertThrows( - SemanticCheckException.class, - () -> PathUtils.setExprValueAtPath(input, "struct_null.field_invalid", newValue)); - assertEquals("Field path 'struct_null.field_invalid' does not exist.", ex.getMessage()); - - ex = - assertThrows( - SemanticCheckException.class, - () -> PathUtils.setExprValueAtPath(input, "struct_missing.field_invalid", newValue)); - assertEquals("Field path 'struct_missing.field_invalid' does not exist.", ex.getMessage()); - - ex = - assertThrows( - SemanticCheckException.class, - () -> PathUtils.setExprValueAtPath(input, "struct_invalid.field_invalid", newValue)); - assertEquals("Field path 'struct_invalid.field_invalid' does not exist.", ex.getMessage()); - } -} From ff0d5ae14549e8a4b9d037b945c74fcc559ae34d Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 11 Feb 2025 10:12:31 -0800 Subject: [PATCH 69/81] Use `ExprValueUtils` to simplify `FlattenOperator` Signed-off-by: currantw --- .../sql/data/model/ExprValueUtils.java | 20 +++++--- .../sql/planner/physical/FlattenOperator.java | 51 ++++++++++--------- .../sql/data/model/ExprValueUtilsTest.java | 18 +++++++ 3 files changed, 58 insertions(+), 31 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index 82cfe3234b..00d8da5a1a 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -43,8 +43,6 @@ public class ExprValueUtils { public final Pattern QUALIFIED_NAME_SEPARATOR_PATTERN = Pattern.compile(QUALIFIED_NAME_SEPARATOR, Pattern.LITERAL); - private final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile(".", Pattern.LITERAL); - public static ExprValue booleanValue(Boolean value) { return value ? LITERAL_TRUE : LITERAL_FALSE; } @@ -218,6 +216,19 @@ public static Boolean getBooleanValue(ExprValue exprValue) { return exprValue.booleanValue(); } + /** + * Splits the given qualified name into components and returns the result. Throws {@link + * SemanticCheckException} if the qualified name is not valid. + */ + public List splitQualifiedName(String qualifiedName) { + return Arrays.asList(QUALIFIED_NAME_SEPARATOR_PATTERN.split(qualifiedName)); + } + + /** Joins the given components into a qualified name and returns the result. */ + public String joinQualifiedName(List components) { + return String.join(QUALIFIED_NAME_SEPARATOR, components); + } + /** * Returns true if a nested {@link ExprValue} with the specified qualified name exists within the * given root value. @@ -350,9 +361,4 @@ private ExprValue setNestedExprValueForComponents( return ExprTupleValue.fromExprValueMap(exprValueMap); } - - /** Splits the given qualified name into components and returns the result.. */ - private List splitQualifiedName(String qualifiedName) { - return Arrays.asList(PATH_SEPARATOR_PATTERN.split(qualifiedName)); - } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index c529920d3e..213a897e9e 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -8,7 +8,6 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.regex.Matcher; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -45,41 +44,45 @@ public boolean hasNext() { @Override public ExprValue next() { - return flattenExprValueAtPath(input.next(), field.getAttr()); + ExprValue rootExprValue = input.next(); + String qualifiedName = field.getAttr(); + + if (!ExprValueUtils.containsNestedExprValue(rootExprValue, qualifiedName)) { + return rootExprValue; + } + + ExprValue flattenExprValue = ExprValueUtils.getNestedExprValue(rootExprValue, qualifiedName); + if (flattenExprValue.isNull() || flattenExprValue.isMissing()) { + return rootExprValue; + } + + return flattenNestedExprValue(rootExprValue, qualifiedName); } /** - * Flattens the {@link ExprTupleValue} at the specified path within the given root value and - * returns the result. Returns the unmodified root value if it does not contain a value at the - * specified path. rootExprValue is expected to be an {@link ExprTupleValue}. + * Flattens the nested {@link ExprTupleValue} with the specified qualified name within the given + * root value and returns the result. Requires that the root value contain a nested value with the + * qualified name - see {@link ExprValueUtils#containsNestedExprValue}. */ - private static ExprValue flattenExprValueAtPath(ExprValue rootExprValue, String path) { + private static ExprValue flattenNestedExprValue(ExprValue rootExprValue, String qualifiedName) { Map exprValueMap = ExprValueUtils.getTupleValue(rootExprValue); - // Get current path component. - Matcher matcher = ExprValueUtils.QUALIFIED_NAME_SEPARATOR_PATTERN.matcher(path); - boolean fieldIsNested = matcher.find(); - String currentPathComponent = fieldIsNested ? path.substring(0, matcher.start()) : path; - - // Check for undefined, null, or missing values. - if (!exprValueMap.containsKey(currentPathComponent)) { - return rootExprValue; - } - - ExprValue childExprValue = exprValueMap.get(currentPathComponent); - if (childExprValue.isNull() || childExprValue.isMissing()) { - return rootExprValue; - } + List qualifiedNameComponents = ExprValueUtils.splitQualifiedName(qualifiedName); + String currentQualifiedNameComponent = qualifiedNameComponents.getFirst(); + ExprValue childExprValue = exprValueMap.get(currentQualifiedNameComponent); // Get flattened values and add them to the field map. Map flattenedExprValueMap; - if (fieldIsNested) { - String remainingPath = path.substring(matcher.end()); + if (qualifiedNameComponents.size() > 1) { + String remainingQualifiedName = + ExprValueUtils.joinQualifiedName( + qualifiedNameComponents.subList(1, qualifiedNameComponents.size())); + flattenedExprValueMap = Map.of( - currentPathComponent, - flattenExprValueAtPath(exprValueMap.get(currentPathComponent), remainingPath)); + currentQualifiedNameComponent, + flattenNestedExprValue(childExprValue, remainingQualifiedName)); } else { flattenedExprValueMap = ExprValueUtils.getTupleValue(childExprValue); } diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java index 8fca55f7ea..ad579f9266 100644 --- a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java @@ -310,6 +310,24 @@ public void hashCodeTest() { new ExprCollectionValue(ImmutableList.of(integerExprValue)).hashCode()); } + @Test + void testSplitQualifiedName() { + assertEquals(List.of("integer"), ExprValueUtils.splitQualifiedName("integer")); + assertEquals(List.of("tuple", "integer"), ExprValueUtils.splitQualifiedName("tuple.integer")); + assertEquals( + List.of("tuple_with_nested", "tuple", "integer"), + ExprValueUtils.splitQualifiedName("tuple_with_nested.tuple.integer")); + } + + @Test + void testJoinQualifiedName() { + assertEquals("integer", ExprValueUtils.joinQualifiedName(List.of("integer"))); + assertEquals("tuple.integer", ExprValueUtils.joinQualifiedName(List.of("tuple", "integer"))); + assertEquals( + "tuple_with_nested.tuple.integer", + ExprValueUtils.joinQualifiedName(List.of("tuple_with_nested", "tuple", "integer"))); + } + @Test void testContainsNestedExprValue() { assertTrue(ExprValueUtils.containsNestedExprValue(rootExprValue, "integer")); From 790104c1185e24ccd17cd610a032c8ebee34f3a9 Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 11 Feb 2025 22:40:12 -0800 Subject: [PATCH 70/81] Simplify and make consistent flatten and expand operators. Signed-off-by: currantw --- .../sql/planner/physical/ExpandOperator.java | 66 +++++++++++++----- .../sql/planner/physical/FlattenOperator.java | 67 ++++++++++--------- .../planner/physical/ExpandOperatorTest.java | 13 ---- 3 files changed, 85 insertions(+), 61 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index fc5e6c10dd..100a1dfc70 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -8,13 +8,15 @@ import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import java.util.Collections; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; -import java.util.stream.Collectors; +import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; +import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.ReferenceExpression; @@ -29,7 +31,7 @@ public class ExpandOperator extends PhysicalPlan { private final PhysicalPlan input; private final ReferenceExpression field; - private List expandedRows = List.of(); + private LinkedList expandedRows = new LinkedList<>(); @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { @@ -44,7 +46,7 @@ public List getChild() { @Override public boolean hasNext() { while (expandedRows.isEmpty() && input.hasNext()) { - expandedRows = expandExprValueAtPath(input.next(), field.getAttr()); + expandedRows = new LinkedList<>(expandNestedExprValue(input.next(), field.getAttr())); } return !expandedRows.isEmpty(); @@ -55,25 +57,57 @@ public ExprValue next() { return expandedRows.removeFirst(); } - /** Expands the {@link ExprValue} at the specified path and returns the resulting value. */ - private static List expandExprValueAtPath(ExprValue rootExprValue, String path) { + /** + * Expands the nested {@link org.opensearch.sql.data.model.ExprCollectionValueValue} value with + * the specified qualified name within the given root value, and returns the results. If the root + * value does not contain a nested value with the qualified name, if the nested value is null or + * missing, or if the nested value in not an {@link + * org.opensearch.sql.data.model.ExprCollectionValueValue}, returns the unmodified root value. + * Raises {@link org.opensearch.sql.exception.SemanticCheckException} if the root value is not an + * {@link org.opensearch.sql.data.model.ExprTupleValue}. + */ + private static List expandNestedExprValue( + ExprValue rootExprValue, String qualifiedName) { + + // Get current field name. + List components = ExprValueUtils.splitQualifiedName(qualifiedName); + String fieldName = components.getFirst(); + + // Check if the child value is undefined. + Map fieldsMap = rootExprValue.tupleValue(); + if (!fieldsMap.containsKey(fieldName)) { + return List.of(rootExprValue); + } - if (!ExprValueUtils.containsNestedExprValue(rootExprValue, path)) { - return new LinkedList<>(Collections.singletonList(rootExprValue)); + // Check if the child value is null or missing. + ExprValue childExprValue = fieldsMap.get(fieldName); + if (childExprValue.isNull() || childExprValue.isMissing()) { + return List.of(rootExprValue); } - ExprValue targetExprValue = ExprValueUtils.getNestedExprValue(rootExprValue, path); - if (!targetExprValue.type().equals(ARRAY)) { - return new LinkedList<>(Collections.singletonList(rootExprValue)); + // Expand the child value. + List expandedChildExprValues; + if (components.size() == 1) { + expandedChildExprValues = + new LinkedList<>( + childExprValue.type().equals(ARRAY) + ? childExprValue.collectionValue() + : List.of(childExprValue)); + } else { + String remainingQualifiedName = + ExprValueUtils.joinQualifiedName(components.subList(1, components.size())); + expandedChildExprValues = expandNestedExprValue(childExprValue, remainingQualifiedName); } - List expandedExprValues = targetExprValue.collectionValue(); - if (expandedExprValues.isEmpty()) { - expandedExprValues = List.of(ExprValueUtils.nullValue()); + // Build expanded values. + List expandedExprValues = new LinkedList<>(); + + for (ExprValue expandedChildExprValue : expandedChildExprValues) { + Map newFieldsMap = new HashMap<>(fieldsMap); + newFieldsMap.put(fieldName, expandedChildExprValue); + expandedExprValues.add(ExprTupleValue.fromExprValueMap(newFieldsMap)); } - return expandedExprValues.stream() - .map(v -> ExprValueUtils.setNestedExprValue(rootExprValue, path, v)) - .collect(Collectors.toCollection(LinkedList::new)); + return expandedExprValues; } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 213a897e9e..4373708687 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -6,12 +6,14 @@ package org.opensearch.sql.planner.physical; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; +import org.apache.commons.math3.analysis.function.Exp; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -44,50 +46,51 @@ public boolean hasNext() { @Override public ExprValue next() { - ExprValue rootExprValue = input.next(); - String qualifiedName = field.getAttr(); - - if (!ExprValueUtils.containsNestedExprValue(rootExprValue, qualifiedName)) { - return rootExprValue; - } - - ExprValue flattenExprValue = ExprValueUtils.getNestedExprValue(rootExprValue, qualifiedName); - if (flattenExprValue.isNull() || flattenExprValue.isMissing()) { - return rootExprValue; - } - - return flattenNestedExprValue(rootExprValue, qualifiedName); + return flattenNestedExprValue(input.next(), field.getAttr()); } /** * Flattens the nested {@link ExprTupleValue} with the specified qualified name within the given - * root value and returns the result. Requires that the root value contain a nested value with the - * qualified name - see {@link ExprValueUtils#containsNestedExprValue}. + * root value, and returns the result. If the root value does not contain a nested value with the + * qualified name, or if the nested value is null or missing, returns the unmodified root value. + * Raises {@link org.opensearch.sql.exception.SemanticCheckException} if the root value or nested + * value is not an {@link ExprTupleValue}. */ private static ExprValue flattenNestedExprValue(ExprValue rootExprValue, String qualifiedName) { - Map exprValueMap = ExprValueUtils.getTupleValue(rootExprValue); + // Get current field name. + List components = ExprValueUtils.splitQualifiedName(qualifiedName); + String fieldName = components.getFirst(); - List qualifiedNameComponents = ExprValueUtils.splitQualifiedName(qualifiedName); - String currentQualifiedNameComponent = qualifiedNameComponents.getFirst(); - ExprValue childExprValue = exprValueMap.get(currentQualifiedNameComponent); + // Check if the child value is undefined. + Map fieldsMap = rootExprValue.tupleValue(); + if (!fieldsMap.containsKey(fieldName)) { + return rootExprValue; + } - // Get flattened values and add them to the field map. - Map flattenedExprValueMap; - if (qualifiedNameComponents.size() > 1) { - String remainingQualifiedName = - ExprValueUtils.joinQualifiedName( - qualifiedNameComponents.subList(1, qualifiedNameComponents.size())); + // Check if the child value is null or missing. + ExprValue childExprValue = fieldsMap.get(fieldName); + if (childExprValue.isNull() || childExprValue.isMissing()) { + return rootExprValue; + } - flattenedExprValueMap = - Map.of( - currentQualifiedNameComponent, - flattenNestedExprValue(childExprValue, remainingQualifiedName)); + // Flatten the child value. + Map flattenedChildFieldMap; + + if (components.size() == 1) { + flattenedChildFieldMap = childExprValue.tupleValue(); } else { - flattenedExprValueMap = ExprValueUtils.getTupleValue(childExprValue); + String remainingQualifiedName = + ExprValueUtils.joinQualifiedName(components.subList(1, components.size())); + ExprValue flattenedChildExprValue = + flattenNestedExprValue(childExprValue, remainingQualifiedName); + flattenedChildFieldMap = Map.of(fieldName, flattenedChildExprValue); } - exprValueMap.putAll(flattenedExprValueMap); - return ExprTupleValue.fromExprValueMap(exprValueMap); + // Build flattened value. + Map newFieldsMap = new HashMap<>(fieldsMap); + newFieldsMap.putAll(flattenedChildFieldMap); + + return ExprTupleValue.fromExprValueMap(newFieldsMap); } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java index c8e54aea97..29ccb9b809 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java @@ -59,19 +59,6 @@ void testArray() { assertEquals(expectedRows, actualRows); } - @Test - void testArrayEmpty() { - inputRow = - ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.collectionValue(List.of()))); - mockInput(inputRow); - - actualRows = execute(expand(inputPlan, DSL.ref("array_empty", ARRAY))); - expectedRows = - List.of(ExprValueUtils.tupleValue(Map.of("array_empty", ExprValueUtils.nullValue()))); - - assertEquals(expectedRows, actualRows); - } - @Test void testArrayNested() { inputRow = From ec47f8f30b9ecdc81ef67eee08a08efd3ea86daa Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 11 Feb 2025 23:01:47 -0800 Subject: [PATCH 71/81] Update `ExprValueUtils` and unit tests. Signed-off-by: currantw --- .../sql/data/model/ExprValueUtils.java | 144 +--------- .../sql/planner/physical/FlattenOperator.java | 3 +- .../sql/data/model/ExprValueUtilsTest.java | 252 ++++-------------- 3 files changed, 52 insertions(+), 347 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index 00d8da5a1a..35f33ad267 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -5,8 +5,6 @@ package org.opensearch.sql.data.model; -import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; - import inet.ipaddr.IPAddress; import java.time.Instant; import java.time.LocalDate; @@ -16,7 +14,6 @@ import java.time.temporal.TemporalAmount; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -24,7 +21,6 @@ import lombok.experimental.UtilityClass; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; -import org.opensearch.sql.exception.SemanticCheckException; /** The definition of {@link ExprValue} factory. */ @UtilityClass @@ -216,149 +212,13 @@ public static Boolean getBooleanValue(ExprValue exprValue) { return exprValue.booleanValue(); } - /** - * Splits the given qualified name into components and returns the result. Throws {@link - * SemanticCheckException} if the qualified name is not valid. - */ + /** Splits the given qualified name into components and returns the result. */ public List splitQualifiedName(String qualifiedName) { - return Arrays.asList(QUALIFIED_NAME_SEPARATOR_PATTERN.split(qualifiedName)); + return Arrays.asList(QUALIFIED_NAME_SEPARATOR_PATTERN.split(qualifiedName, -1)); } /** Joins the given components into a qualified name and returns the result. */ public String joinQualifiedName(List components) { return String.join(QUALIFIED_NAME_SEPARATOR, components); } - - /** - * Returns true if a nested {@link ExprValue} with the specified qualified name exists within the - * given root value. - * - * @param rootExprValue root value - expected to be an {@link ExprTupleValue} - * @param qualifiedName qualified name for the nested {@link ExprValue} - e.g. - * 'nested_struct.integer_field' - */ - public boolean containsNestedExprValue(ExprValue rootExprValue, String qualifiedName) { - List components = splitQualifiedName(qualifiedName); - return containsNestedExprValueForComponents(rootExprValue, components); - } - - /** - * Returns the nested {@link ExprValue} with the specified qualified name within the given root - * value. Returns {@link ExprNullValue} if the root value does not contain a nested value with the - * qualified name - see {@link ExprValueUtils#containsNestedExprValue}. - * - * @param rootExprValue root value - expected to be an {@link ExprTupleValue} - * @param qualifiedName qualified name for the nested {@link ExprValue} - e.g. - * 'nested_struct.integer_field' - */ - public ExprValue getNestedExprValue(ExprValue rootExprValue, String qualifiedName) { - - List components = splitQualifiedName(qualifiedName); - if (!containsNestedExprValueForComponents(rootExprValue, components)) { - return nullValue(); - } - - return getNestedExprValueForComponents(rootExprValue, components); - } - - /** - * Sets the {@link ExprValue} with the specified qualified name within the given root value and - * returns the result. Throws {@link SemanticCheckException} if the root value does not contain a - * value with the qualified name - see {@link ExprValueUtils#containsNestedExprValue}. - * - * @param rootExprValue root value - expected to be an {@link ExprTupleValue} - * @param qualifiedName qualified name for the nested {@link ExprValue} - e.g. - * 'nested_struct.integer_field' - */ - public ExprValue setNestedExprValue( - ExprValue rootExprValue, String qualifiedName, ExprValue newExprValue) { - - List components = splitQualifiedName(qualifiedName); - if (!containsNestedExprValueForComponents(rootExprValue, components)) { - throw new SemanticCheckException( - String.format("Field with qualified name '%s' does not exist.", qualifiedName)); - } - - return setNestedExprValueForComponents(rootExprValue, components, newExprValue); - } - - /** - * Returns true if a nested {@link ExprValue} exists within the given root value, at the location - * specified by the qualified name components. - * - * @param rootExprValue root value - expected to be an {@link ExprTupleValue} - * @param components list of qualified name components - e.g. ['nested_struct','integer_field'] - */ - private boolean containsNestedExprValueForComponents( - ExprValue rootExprValue, List components) { - - if (components.isEmpty()) { - return true; - } - - if (!rootExprValue.type().equals(STRUCT)) { - return false; - } - - String currentComponent = components.getFirst(); - List remainingComponents = components.subList(1, components.size()); - - Map exprValueMap = rootExprValue.tupleValue(); - if (!exprValueMap.containsKey(currentComponent)) { - return false; - } - - return containsNestedExprValueForComponents( - exprValueMap.get(currentComponent), remainingComponents); - } - - /** - * Returns the nested {@link ExprValue} within the given root value, at the location specified by - * the qualified name components. Requires that the root value contain a nested value with the - * qualified name - see {@link ExprValueUtils#containsNestedExprValue}. - * - * @param rootExprValue root value - expected to be an {@link ExprTupleValue} - * @param components list of qualified name components - e.g. ['nested_struct','integer_field'] - */ - private ExprValue getNestedExprValueForComponents( - ExprValue rootExprValue, List components) { - - if (components.isEmpty()) { - return rootExprValue; - } - - String currentComponent = components.getFirst(); - List remainingQualifiedNameComponents = components.subList(1, components.size()); - - Map exprValueMap = rootExprValue.tupleValue(); - return getNestedExprValueForComponents( - exprValueMap.get(currentComponent), remainingQualifiedNameComponents); - } - - /** - * Sets the nested {@link ExprValue} within the given root value, at the location specified by the - * qualified name components, and returns the result. Requires that the root value contain a - * nested value with the qualified name - see {@link ExprValueUtils#containsNestedExprValue}. - * - * @param rootExprValue root value - expected to be an {@link ExprTupleValue} - * @param components list of qualified name components - e.g. ['nested_struct','integer_field'] - */ - private ExprValue setNestedExprValueForComponents( - ExprValue rootExprValue, List components, ExprValue newExprValue) { - - if (components.isEmpty()) { - return newExprValue; - } - - String currentComponent = components.getFirst(); - List remainingComponents = components.subList(1, components.size()); - - Map exprValueMap = new HashMap<>(rootExprValue.tupleValue()); - exprValueMap.put( - currentComponent, - setNestedExprValueForComponents( - exprValueMap.get(currentComponent), remainingComponents, newExprValue)); - - return ExprTupleValue.fromExprValueMap(exprValueMap); - } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 4373708687..44ab2279b6 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -13,7 +13,6 @@ import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; -import org.apache.commons.math3.analysis.function.Exp; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -88,7 +87,7 @@ private static ExprValue flattenNestedExprValue(ExprValue rootExprValue, String } // Build flattened value. - Map newFieldsMap = new HashMap<>(fieldsMap); + Map newFieldsMap = new HashMap<>(fieldsMap); newFieldsMap.putAll(flattenedChildFieldMap); return ExprTupleValue.fromExprValueMap(newFieldsMap); diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java index ad579f9266..c0ac80ab2b 100644 --- a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java @@ -7,10 +7,8 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; @@ -34,9 +32,12 @@ import java.time.ZonedDateTime; import java.util.AbstractMap; import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Function; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.hamcrest.Matchers; import org.junit.jupiter.api.DisplayName; @@ -46,77 +47,39 @@ import org.junit.jupiter.params.provider.MethodSource; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; -import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.storage.bindingtuple.BindingTuple; import org.opensearch.sql.utils.IPUtils; @DisplayName("Test Expression Value Utils") public class ExprValueUtilsTest { + private static final LinkedHashMap testTuple = new LinkedHashMap<>(); - // Test values - private static final ExprValue byteExprValue = new ExprByteValue((byte) 1); - private static final ExprValue shortExprValue = new ExprShortValue((short) 1); - private static final ExprValue integerExprValue = new ExprIntegerValue(1); - private static final ExprValue longExprValue = new ExprLongValue(1L); - private static final ExprValue floatExprValue = new ExprFloatValue(1.0f); - private static final ExprValue doubleExprValue = new ExprDoubleValue(1.0d); - - private static final ExprValue nullExprValue = ExprNullValue.of(); - private static final ExprValue missingExprValue = ExprMissingValue.of(); - - private static final ExprValue ipExprValue = new ExprIpValue("1.2.3.4"); - private static final ExprValue stringExprValue = new ExprStringValue("value"); - private static final ExprValue booleanExprValue = ExprBooleanValue.of(true); - private static final ExprValue dateExprValue = new ExprDateValue("2012-08-07"); - private static final ExprValue timeExprValue = new ExprTimeValue("18:00:00"); - private static final ExprValue timestampExprValue = new ExprTimestampValue("2012-08-07 18:00:00"); - private static final ExprValue intervalExprValue = - new ExprIntervalValue(Duration.ofSeconds(100L)); - - private static final ExprValue tupleExprValue = - ExprTupleValue.fromExprValueMap(Map.of("integer", integerExprValue)); - private static final ExprValue collectionExprValue = - new ExprCollectionValue(ImmutableList.of(integerExprValue)); - - private final ExprValue tupleWithNestedExprValue = - ExprTupleValue.fromExprValueMap( - Map.of("tuple", ExprValueUtils.tupleValue(Map.of("integer", integerExprValue)))); - - private final ExprValue rootExprValue = - ExprTupleValue.fromExprValueMap( - Map.ofEntries( - Map.entry("integer", integerExprValue), - Map.entry("tuple", tupleExprValue), - Map.entry("tuple_with_nested", tupleWithNestedExprValue), - Map.entry("null", ExprNullValue.of()), - Map.entry("missing", ExprMissingValue.of()))); + static { + testTuple.put("1", new ExprIntegerValue(1)); + } private static final List numberValues = - List.of( - byteExprValue, - shortExprValue, - integerExprValue, - longExprValue, - floatExprValue, - doubleExprValue); + Stream.of((byte) 1, (short) 1, 1, 1L, 1f, 1D) + .map(ExprValueUtils::fromObjectValue) + .collect(Collectors.toList()); private static final List nonNumberValues = - List.of( - ipExprValue, - stringExprValue, - booleanExprValue, - collectionExprValue, - tupleExprValue, - dateExprValue, - timeExprValue, - timestampExprValue, - intervalExprValue); + Arrays.asList( + new ExprIpValue("1.2.3.4"), + new ExprStringValue("1"), + ExprBooleanValue.of(true), + new ExprCollectionValue(ImmutableList.of(new ExprIntegerValue(1))), + new ExprTupleValue(testTuple), + new ExprDateValue("2012-08-07"), + new ExprTimeValue("18:00:00"), + new ExprTimestampValue("2012-08-07 18:00:00"), + new ExprIntervalValue(Duration.ofSeconds(100))); private static final List allValues = Lists.newArrayList(Iterables.concat(numberValues, nonNumberValues)); private static final List> numberValueExtractor = - List.of( + Arrays.asList( ExprValueUtils::getByteValue, ExprValueUtils::getShortValue, ExprValueUtils::getIntegerValue, @@ -124,14 +87,14 @@ public class ExprValueUtilsTest { ExprValueUtils::getFloatValue, ExprValueUtils::getDoubleValue); private static final List> nonNumberValueExtractor = - List.of( + Arrays.asList( ExprValueUtils::getIpValue, ExprValueUtils::getStringValue, ExprValueUtils::getBooleanValue, ExprValueUtils::getCollectionValue, ExprValueUtils::getTupleValue); private static final List> dateAndTimeValueExtractor = - List.of( + Arrays.asList( ExprValue::dateValue, ExprValue::timeValue, ExprValue::timestampValue, @@ -142,7 +105,7 @@ public class ExprValueUtilsTest { numberValueExtractor, nonNumberValueExtractor, dateAndTimeValueExtractor)); private static final List numberTypes = - List.of( + Arrays.asList( ExprCoreType.BYTE, ExprCoreType.SHORT, ExprCoreType.INTEGER, @@ -150,31 +113,31 @@ public class ExprValueUtilsTest { ExprCoreType.FLOAT, ExprCoreType.DOUBLE); private static final List nonNumberTypes = - List.of(IP, STRING, BOOLEAN, ARRAY, STRUCT); + Arrays.asList(IP, STRING, BOOLEAN, ARRAY, STRUCT); private static final List dateAndTimeTypes = - List.of(DATE, TIME, TIMESTAMP, INTERVAL); + Arrays.asList(DATE, TIME, TIMESTAMP, INTERVAL); private static final List allTypes = Lists.newArrayList(Iterables.concat(numberTypes, nonNumberTypes, dateAndTimeTypes)); private static Stream getValueTestArgumentStream() { List expectedValues = - List.of( + Arrays.asList( (byte) 1, (short) 1, 1, 1L, - 1.0f, - 1.0d, + 1f, + 1D, IPUtils.toAddress("1.2.3.4"), - "value", + "1", true, - List.of(integerValue(1)), - ImmutableMap.of("integer", integerValue(1)), + Arrays.asList(integerValue(1)), + ImmutableMap.of("1", integerValue(1)), LocalDate.parse("2012-08-07"), LocalTime.parse("18:00:00"), ZonedDateTime.of(LocalDateTime.parse("2012-08-07T18:00:00"), ZoneOffset.UTC) .toInstant(), - Duration.ofSeconds(100L)); + Duration.ofSeconds(100)); Stream.Builder builder = Stream.builder(); for (int i = 0; i < expectedValues.size(); i++) { builder.add(Arguments.of(allValues.get(i), allValueExtractor.get(i), expectedValues.get(i))); @@ -284,154 +247,37 @@ public void constructDateAndTimeValue() { @Test public void hashCodeTest() { - assertEquals(byteExprValue.hashCode(), new ExprByteValue((byte) 1).hashCode()); - assertEquals(shortExprValue.hashCode(), new ExprShortValue((short) 1).hashCode()); - assertEquals(integerExprValue.hashCode(), new ExprIntegerValue(1).hashCode()); - assertEquals(longExprValue.hashCode(), new ExprLongValue(1L).hashCode()); - assertEquals(floatExprValue.hashCode(), new ExprFloatValue(1.0f).hashCode()); - assertEquals(doubleExprValue.hashCode(), new ExprDoubleValue(1.0d).hashCode()); - - assertEquals(nullExprValue.hashCode(), ExprNullValue.of().hashCode()); - assertEquals(missingExprValue.hashCode(), ExprMissingValue.of().hashCode()); - assertEquals(ipExprValue.hashCode(), new ExprIpValue("1.2.3.4").hashCode()); - assertEquals(stringExprValue.hashCode(), new ExprStringValue("value").hashCode()); - assertEquals(booleanExprValue.hashCode(), ExprBooleanValue.of(true).hashCode()); - assertEquals(dateExprValue.hashCode(), new ExprDateValue("2012-08-07").hashCode()); - assertEquals(timeExprValue.hashCode(), new ExprTimeValue("18:00:00").hashCode()); + assertEquals(new ExprByteValue(1).hashCode(), new ExprByteValue(1).hashCode()); + assertEquals(new ExprShortValue(1).hashCode(), new ExprShortValue(1).hashCode()); + assertEquals(new ExprIntegerValue(1).hashCode(), new ExprIntegerValue(1).hashCode()); + assertEquals(new ExprStringValue("1").hashCode(), new ExprStringValue("1").hashCode()); + assertEquals( + new ExprCollectionValue(ImmutableList.of(new ExprIntegerValue(1))).hashCode(), + new ExprCollectionValue(ImmutableList.of(new ExprIntegerValue(1))).hashCode()); assertEquals( - timestampExprValue.hashCode(), new ExprTimestampValue("2012-08-07 18:00:00").hashCode()); + new ExprTupleValue(testTuple).hashCode(), new ExprTupleValue(testTuple).hashCode()); assertEquals( - intervalExprValue.hashCode(), new ExprIntervalValue(Duration.ofSeconds(100L)).hashCode()); + new ExprDateValue("2012-08-07").hashCode(), new ExprDateValue("2012-08-07").hashCode()); assertEquals( - tupleExprValue.hashCode(), - ExprTupleValue.fromExprValueMap(Map.of("integer", integerExprValue)).hashCode()); + new ExprTimeValue("18:00:00").hashCode(), new ExprTimeValue("18:00:00").hashCode()); assertEquals( - collectionExprValue.hashCode(), - new ExprCollectionValue(ImmutableList.of(integerExprValue)).hashCode()); + new ExprTimestampValue("2012-08-07 18:00:00").hashCode(), + new ExprTimestampValue("2012-08-07 18:00:00").hashCode()); } @Test void testSplitQualifiedName() { assertEquals(List.of("integer"), ExprValueUtils.splitQualifiedName("integer")); assertEquals(List.of("tuple", "integer"), ExprValueUtils.splitQualifiedName("tuple.integer")); - assertEquals( - List.of("tuple_with_nested", "tuple", "integer"), - ExprValueUtils.splitQualifiedName("tuple_with_nested.tuple.integer")); + assertEquals(List.of("", "integer"), ExprValueUtils.splitQualifiedName(".integer")); + assertEquals(List.of("integer", ""), ExprValueUtils.splitQualifiedName("integer.")); } @Test void testJoinQualifiedName() { assertEquals("integer", ExprValueUtils.joinQualifiedName(List.of("integer"))); assertEquals("tuple.integer", ExprValueUtils.joinQualifiedName(List.of("tuple", "integer"))); - assertEquals( - "tuple_with_nested.tuple.integer", - ExprValueUtils.joinQualifiedName(List.of("tuple_with_nested", "tuple", "integer"))); - } - - @Test - void testContainsNestedExprValue() { - assertTrue(ExprValueUtils.containsNestedExprValue(rootExprValue, "integer")); - assertTrue(ExprValueUtils.containsNestedExprValue(rootExprValue, "tuple.integer")); - assertTrue( - ExprValueUtils.containsNestedExprValue(rootExprValue, "tuple_with_nested.tuple.integer")); - - assertFalse(ExprValueUtils.containsNestedExprValue(rootExprValue, "invalid")); - assertFalse(ExprValueUtils.containsNestedExprValue(rootExprValue, "null.invalid")); - assertFalse(ExprValueUtils.containsNestedExprValue(rootExprValue, "missing.invalid")); - assertFalse(ExprValueUtils.containsNestedExprValue(rootExprValue, "invalid.invalid")); - } - - @Test - void testGetNestedExprValue() { - assertEquals(integerExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "integer")); - assertEquals( - integerExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "tuple.integer")); - assertEquals( - integerExprValue, - ExprValueUtils.getNestedExprValue(rootExprValue, "tuple_with_nested.tuple.integer")); - - assertEquals(nullExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "invalid")); - assertEquals(nullExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "null.invalid")); - assertEquals( - nullExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "missing.invalid")); - assertEquals( - nullExprValue, ExprValueUtils.getNestedExprValue(rootExprValue, "invalid.invalid")); - } - - @Test - void testSetNestedExprValue() { - ExprValue expected; - ExprValue actual; - - expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("integer", stringExprValue), - Map.entry("tuple", tupleExprValue), - Map.entry("tuple_with_nested", tupleWithNestedExprValue), - Map.entry("null", nullExprValue), - Map.entry("missing", missingExprValue))); - actual = ExprValueUtils.setNestedExprValue(rootExprValue, "integer", stringExprValue); - assertEquals(expected, actual); - - expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("integer", integerExprValue), - Map.entry("tuple", ExprValueUtils.tupleValue(Map.of("integer", stringExprValue))), - Map.entry("tuple_with_nested", tupleWithNestedExprValue), - Map.entry("null", nullExprValue), - Map.entry("missing", missingExprValue))); - actual = ExprValueUtils.setNestedExprValue(rootExprValue, "tuple.integer", stringExprValue); - assertEquals(expected, actual); - - expected = - ExprValueUtils.tupleValue( - Map.ofEntries( - Map.entry("integer", integerExprValue), - Map.entry("tuple", tupleExprValue), - Map.entry( - "tuple_with_nested", - ExprValueUtils.tupleValue( - Map.of( - "tuple", - ExprValueUtils.tupleValue(Map.of("integer", stringExprValue))))), - Map.entry("null", nullExprValue), - Map.entry("missing", missingExprValue))); - assertEquals( - expected, - ExprValueUtils.setNestedExprValue( - rootExprValue, "tuple_with_nested.tuple.integer", stringExprValue)); - - Exception ex; - - ex = - assertThrows( - SemanticCheckException.class, - () -> ExprValueUtils.setNestedExprValue(rootExprValue, "invalid", stringExprValue)); - assertEquals("Field with qualified name 'invalid' does not exist.", ex.getMessage()); - - ex = - assertThrows( - SemanticCheckException.class, - () -> - ExprValueUtils.setNestedExprValue(rootExprValue, "null.invalid", stringExprValue)); - assertEquals("Field with qualified name 'null.invalid' does not exist.", ex.getMessage()); - - ex = - assertThrows( - SemanticCheckException.class, - () -> - ExprValueUtils.setNestedExprValue( - rootExprValue, "missing.invalid", stringExprValue)); - assertEquals("Field with qualified name 'missing.invalid' does not exist.", ex.getMessage()); - - ex = - assertThrows( - SemanticCheckException.class, - () -> - ExprValueUtils.setNestedExprValue( - rootExprValue, "invalid.invalid", stringExprValue)); - assertEquals("Field with qualified name 'invalid.invalid' does not exist.", ex.getMessage()); + assertEquals(".integer", ExprValueUtils.joinQualifiedName(List.of("", "integer"))); + assertEquals("integer.", ExprValueUtils.joinQualifiedName(List.of("integer", ""))); } } From 101b653352c12980730744fc17c2ffe773fc8b6e Mon Sep 17 00:00:00 2001 From: currantw Date: Tue, 11 Feb 2025 23:55:44 -0800 Subject: [PATCH 72/81] Make constants private Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 31 +++++++++---------- ...ataSourceSchemaIdentifierNameResolver.java | 2 +- .../sql/data/model/ExprValueUtils.java | 8 +++-- .../sql/expression/ReferenceExpression.java | 3 +- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index f5d3cc87fa..bb0c4dec22 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -30,11 +30,14 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; + +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.DataSourceSchemaName; @@ -513,7 +516,7 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { ReferenceExpression fieldExpr = (ReferenceExpression) expressionAnalyzer.analyze(node.getField(), context); - String fieldName = fieldExpr.getAttr(); + String qualifiedName = fieldExpr.getAttr(); // [A] Determine fields to add // --------------------------- @@ -526,25 +529,21 @@ public LogicalPlan visitFlatten(Flatten node, AnalysisContext context) { TypeEnvironment env = context.peek(); Map fieldsMap = env.lookupAllTupleFields(FIELD_NAME); - final String fieldParentPathPrefix = - fieldName.contains(ExprValueUtils.QUALIFIED_NAME_SEPARATOR) - ? fieldName.substring(0, fieldName.lastIndexOf(ExprValueUtils.QUALIFIED_NAME_SEPARATOR)) - + ExprValueUtils.QUALIFIED_NAME_SEPARATOR - : ""; - - // Get entries for paths that are descended from the flattened field. - final String fieldDescendantPathPrefix = fieldName + ExprValueUtils.QUALIFIED_NAME_SEPARATOR; - List> fieldDescendantEntries = - fieldsMap.entrySet().stream() - .filter(e -> e.getKey().startsWith(fieldDescendantPathPrefix)) + List descendantQualifiedNames = + fieldsMap.keySet().stream() + .filter(name -> name.startsWith(qualifiedName) && !name.equals(qualifiedName)) .toList(); // Get fields to add from descendant entries. + int numQualifiedNameComponents = ExprValueUtils.splitQualifiedName(qualifiedName).size(); + Map addFieldsMap = new HashMap<>(); - for (Map.Entry entry : fieldDescendantEntries) { - String newPath = - fieldParentPathPrefix + entry.getKey().substring(fieldDescendantPathPrefix.length()); - addFieldsMap.put(newPath, entry.getValue()); + for (String name : descendantQualifiedNames) { + List components = new LinkedList<>(ExprValueUtils.splitQualifiedName(name)); + components.remove(numQualifiedNameComponents - 1); + + String newName = ExprValueUtils.joinQualifiedName(components); + addFieldsMap.put(newName, fieldsMap.get(name)); } // [B] Add new fields to type environment diff --git a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java index 99f0453427..d1f1323b8f 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java +++ b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java @@ -34,7 +34,7 @@ public DataSourceSchemaIdentifierNameResolver( DataSourceService dataSourceService, List parts) { this.dataSourceService = dataSourceService; List remainingParts = captureSchemaName(captureDataSourceName(parts)); - identifierName = String.join(ExprValueUtils.QUALIFIED_NAME_SEPARATOR, remainingParts); + identifierName = ExprValueUtils.joinQualifiedName(remainingParts); } public String getIdentifierName() { diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index 35f33ad267..cc293e0708 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -33,10 +33,10 @@ public class ExprValueUtils { public static final ExprValue LITERAL_MISSING = ExprMissingValue.of(); /** Qualified name separator string */ - public final String QUALIFIED_NAME_SEPARATOR = "."; + private final String QUALIFIED_NAME_SEPARATOR = "."; /** Pattern that matches the qualified name separator string */ - public final Pattern QUALIFIED_NAME_SEPARATOR_PATTERN = + private final Pattern QUALIFIED_NAME_SEPARATOR_PATTERN = Pattern.compile(QUALIFIED_NAME_SEPARATOR, Pattern.LITERAL); public static ExprValue booleanValue(Boolean value) { @@ -212,7 +212,9 @@ public static Boolean getBooleanValue(ExprValue exprValue) { return exprValue.booleanValue(); } - /** Splits the given qualified name into components and returns the result. */ + /** + * Splits the given qualified name into components and returns the result as a modifiable list. + */ public List splitQualifiedName(String qualifiedName) { return Arrays.asList(QUALIFIED_NAME_SEPARATOR_PATTERN.split(qualifiedName, -1)); } diff --git a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java index c249b426f6..4eb9869e0e 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java @@ -105,8 +105,7 @@ public ExprValue resolve(ExprTupleValue value) { } private ExprValue resolve(ExprValue value, List paths) { - ExprValue wholePathValue = - value.keyValue(String.join(ExprValueUtils.QUALIFIED_NAME_SEPARATOR, paths)); + ExprValue wholePathValue = value.keyValue(ExprValueUtils.joinQualifiedName(paths)); // For array types only first index currently supported. if (value.type().equals(ExprCoreType.ARRAY)) { wholePathValue = value.collectionValue().get(0).keyValue(paths.get(0)); From ec0f44fb158e22a8052c1385f769b55f4b8a9dec Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 12 Feb 2025 08:47:05 -0800 Subject: [PATCH 73/81] Spotless Signed-off-by: currantw --- core/src/main/java/org/opensearch/sql/analysis/Analyzer.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index bb0c4dec22..bb689e5b5b 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -36,8 +36,6 @@ import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; - -import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.DataSourceSchemaName; From da7738a10fb4cf1d32565cef8cbdbafd178dc645 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 12 Feb 2025 09:38:05 -0800 Subject: [PATCH 74/81] Add hashCode unit tests Signed-off-by: currantw --- .../sql/data/model/ExprIntervalValueTest.java | 53 ++++++++++----- .../sql/data/model/ExprIpValueTest.java | 66 ++++++++++++------- 2 files changed, 78 insertions(+), 41 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprIntervalValueTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprIntervalValueTest.java index 36785d383e..a9d8f440d4 100644 --- a/core/src/test/java/org/opensearch/sql/data/model/ExprIntervalValueTest.java +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprIntervalValueTest.java @@ -6,8 +6,8 @@ package org.opensearch.sql.data.model; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.type.ExprCoreType.INTERVAL; import java.time.Duration; @@ -15,31 +15,32 @@ import org.junit.jupiter.api.Test; import org.opensearch.sql.exception.ExpressionEvaluationException; -public class ExprIntervalValueTest { +class ExprIntervalValueTest { + @Test - public void equals_to_self() { - ExprValue interval = ExprValueUtils.intervalValue(Duration.ofNanos(1000)); - assertEquals(interval.intervalValue(), Duration.ofNanos(1000)); + void equals_to_self() { + ExprValue interval = ExprValueUtils.intervalValue(Duration.ofNanos(1000L)); + assertEquals(interval.intervalValue(), Duration.ofNanos(1000L)); } @Test - public void equal() { - ExprValue v1 = new ExprIntervalValue(Duration.ofMinutes(1)); - ExprValue v2 = ExprValueUtils.intervalValue(Duration.ofSeconds(60)); - assertTrue(v1.equals(v2)); + void equal() { + ExprValue v1 = new ExprIntervalValue(Duration.ofMinutes(1L)); + ExprValue v2 = ExprValueUtils.intervalValue(Duration.ofSeconds(60L)); + assertEquals(v1, v2); } @Test - public void compare() { + void compare() { ExprIntervalValue v1 = new ExprIntervalValue(Period.ofDays(1)); ExprIntervalValue v2 = new ExprIntervalValue(Period.ofDays(2)); - assertEquals(v1.compare(v2), -1); + assertEquals(-1, v1.compare(v2)); } @Test - public void invalid_compare() { + void invalid_compare() { ExprIntervalValue v1 = new ExprIntervalValue(Period.ofYears(1)); - ExprIntervalValue v2 = new ExprIntervalValue(Duration.ofHours(1)); + ExprIntervalValue v2 = new ExprIntervalValue(Duration.ofHours(1L)); assertThrows( ExpressionEvaluationException.class, () -> v1.compare(v2), @@ -47,7 +48,7 @@ public void invalid_compare() { } @Test - public void invalid_get_value() { + void invalid_get_value() { ExprDateValue value = new ExprDateValue("2020-08-20"); assertThrows( ExpressionEvaluationException.class, @@ -56,14 +57,32 @@ public void invalid_get_value() { } @Test - public void value() { + void value() { ExprValue value = new ExprIntervalValue(Period.ofWeeks(1)); assertEquals(value.value(), Period.ofWeeks(1)); } @Test - public void type() { + void type() { ExprValue interval = new ExprIntervalValue(Period.ofYears(1)); - assertEquals(interval.type(), INTERVAL); + assertEquals(INTERVAL, interval.type()); + } + + @Test + void testHashCode() { + Duration oneMinute = Duration.ofMinutes(1L); + Duration sixtySeconds = Duration.ofSeconds(60L); + Duration twentyFourHours = Duration.ofHours(24L); + Period oneDay = Period.ofDays(1); + Period oneMonth = Period.ofMonths(1); + + assertEquals(oneMinute.hashCode(), ExprValueUtils.intervalValue(oneMinute).hashCode()); + assertEquals(oneMinute.hashCode(), ExprValueUtils.intervalValue(sixtySeconds).hashCode()); + assertNotEquals(oneMinute.hashCode(), ExprValueUtils.intervalValue(twentyFourHours).hashCode()); + assertNotEquals(oneMinute.hashCode(), ExprValueUtils.intervalValue(oneMonth).hashCode()); + + assertEquals(oneDay.hashCode(), ExprValueUtils.intervalValue(oneDay).hashCode()); + assertNotEquals(oneDay.hashCode(), ExprValueUtils.intervalValue(twentyFourHours).hashCode()); + assertNotEquals(oneDay.hashCode(), ExprValueUtils.intervalValue(oneMonth).hashCode()); } } diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprIpValueTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprIpValueTest.java index b0ef598a5a..224afc1abf 100644 --- a/core/src/test/java/org/opensearch/sql/data/model/ExprIpValueTest.java +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprIpValueTest.java @@ -17,7 +17,7 @@ import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.utils.IPUtils; -public class ExprIpValueTest { +class ExprIpValueTest { private static final String ipv4String = "1.2.3.4"; private static final String ipv6String = "2001:db7::ff00:42:8329"; @@ -53,7 +53,7 @@ public class ExprIpValueTest { "2001:0db7:0000:0000:0000:ff00:0042:8330"); @Test - public void testInvalid() { + void testInvalid() { assertThrows( SemanticCheckException.class, () -> ExprValueUtils.ipValue(ipInvalidString), @@ -61,33 +61,33 @@ public void testInvalid() { } @Test - public void testValue() { - ipv4EqualStrings.forEach((s) -> assertEquals(ipv4String, ExprValueUtils.ipValue(s).value())); - ipv6EqualStrings.forEach((s) -> assertEquals(ipv6String, ExprValueUtils.ipValue(s).value())); + void testValue() { + ipv4EqualStrings.forEach(s -> assertEquals(ipv4String, ExprValueUtils.ipValue(s).value())); + ipv6EqualStrings.forEach(s -> assertEquals(ipv6String, ExprValueUtils.ipValue(s).value())); } @Test - public void testType() { + void testType() { assertEquals(ExprCoreType.IP, exprIpv4Value.type()); assertEquals(ExprCoreType.IP, exprIpv6Value.type()); } @Test - public void testCompare() { + void testCompare() { // Compare to IP address. ipv4LesserStrings.forEach( - (s) -> assertTrue(exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)) > 0)); + s -> assertTrue(exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)) > 0)); ipv4EqualStrings.forEach( - (s) -> assertEquals(0, exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)))); + s -> assertEquals(0, exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)))); ipv4GreaterStrings.forEach( - (s) -> assertTrue(exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)) < 0)); + s -> assertTrue(exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)) < 0)); ipv6LesserStrings.forEach( - (s) -> assertTrue(exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)) > 0)); + s -> assertTrue(exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)) > 0)); ipv6EqualStrings.forEach( - (s) -> assertEquals(0, exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)))); + s -> assertEquals(0, exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)))); ipv6GreaterStrings.forEach( - (s) -> assertTrue(exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)) < 0)); + s -> assertTrue(exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)) < 0)); // Compare to null/missing value. assertThrows( @@ -107,32 +107,50 @@ public void testCompare() { } @Test - public void testEquals() { + void testEquals() { assertEquals(exprIpv4Value, exprIpv4Value); assertNotEquals(exprIpv4Value, new Object()); assertNotEquals(exprIpv4Value, ExprValueUtils.LITERAL_NULL); assertNotEquals(exprIpv4Value, ExprValueUtils.LITERAL_MISSING); - ipv4EqualStrings.forEach((s) -> assertEquals(exprIpv4Value, ExprValueUtils.ipValue(s))); - ipv6EqualStrings.forEach((s) -> assertEquals(exprIpv6Value, ExprValueUtils.ipValue(s))); + ipv4EqualStrings.forEach(s -> assertEquals(exprIpv4Value, ExprValueUtils.ipValue(s))); + ipv6EqualStrings.forEach(s -> assertEquals(exprIpv6Value, ExprValueUtils.ipValue(s))); - ipv4LesserStrings.forEach((s) -> assertNotEquals(exprIpv4Value, ExprValueUtils.ipValue(s))); - ipv6GreaterStrings.forEach((s) -> assertNotEquals(exprIpv6Value, ExprValueUtils.ipValue(s))); + ipv4LesserStrings.forEach(s -> assertNotEquals(exprIpv4Value, ExprValueUtils.ipValue(s))); + ipv6GreaterStrings.forEach(s -> assertNotEquals(exprIpv6Value, ExprValueUtils.ipValue(s))); } @Test - public void testToString() { + void testToString() { ipv4EqualStrings.forEach( - (s) -> + s -> assertEquals(String.format("IP %s", ipv4String), ExprValueUtils.ipValue(s).toString())); ipv6EqualStrings.forEach( - (s) -> + s -> assertEquals(String.format("IP %s", ipv6String), ExprValueUtils.ipValue(s).toString())); } @Test - public void testIpValue() { - ipv4EqualStrings.forEach((s) -> assertEquals(IPUtils.toAddress(s), exprIpv4Value.ipValue())); - ipv6EqualStrings.forEach((s) -> assertEquals(IPUtils.toAddress(s), exprIpv6Value.ipValue())); + void testIpValue() { + ipv4EqualStrings.forEach(s -> assertEquals(IPUtils.toAddress(s), exprIpv4Value.ipValue())); + ipv6EqualStrings.forEach(s -> assertEquals(IPUtils.toAddress(s), exprIpv6Value.ipValue())); + } + + @Test + void testHashCode() { + assertEquals(exprIpv4Value.hashCode(), exprIpv4Value.hashCode()); + assertNotEquals(exprIpv4Value.hashCode(), new Object().hashCode()); + assertNotEquals(exprIpv4Value.hashCode(), ExprValueUtils.LITERAL_NULL.hashCode()); + assertNotEquals(exprIpv4Value.hashCode(), ExprValueUtils.LITERAL_MISSING.hashCode()); + + ipv4EqualStrings.forEach( + s -> assertEquals(exprIpv4Value.hashCode(), ExprValueUtils.ipValue(s).hashCode())); + ipv6EqualStrings.forEach( + s -> assertEquals(exprIpv6Value.hashCode(), ExprValueUtils.ipValue(s).hashCode())); + + ipv4LesserStrings.forEach( + s -> assertNotEquals(exprIpv4Value.hashCode(), ExprValueUtils.ipValue(s).hashCode())); + ipv6GreaterStrings.forEach( + s -> assertNotEquals(exprIpv6Value.hashCode(), ExprValueUtils.ipValue(s).hashCode())); } } From 4284f6ffc1cc2427e7090b3a4fc16adf59a9c9b7 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 12 Feb 2025 09:47:58 -0800 Subject: [PATCH 75/81] Trivial documentation cleanup. Signed-off-by: currantw --- .../java/org/opensearch/sql/data/model/ExprValueUtils.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index cc293e0708..aa3bd15aa2 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -212,9 +212,7 @@ public static Boolean getBooleanValue(ExprValue exprValue) { return exprValue.booleanValue(); } - /** - * Splits the given qualified name into components and returns the result as a modifiable list. - */ + /** Splits the given qualified name into components and returns the result. */ public List splitQualifiedName(String qualifiedName) { return Arrays.asList(QUALIFIED_NAME_SEPARATOR_PATTERN.split(qualifiedName, -1)); } From 1e518812772f1a6407df7fc3f87fecb43de7df2d Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 12 Feb 2025 10:43:28 -0800 Subject: [PATCH 76/81] Fix doctest Signed-off-by: currantw --- docs/user/ppl/cmd/flatten.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index e786cd401b..8971788ed5 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -63,8 +63,8 @@ PPL query:: +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | name | location | |------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Seattle | {'state': 'Washington', 'country': 'United States', 'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}, 'latitude': 47.6061, 'longitude': -122.3328} | - | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}, 'latitude': 49.2827, 'longitude': -123.1207} | + | Seattle | {'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}, 'country': 'United States', 'state': 'Washington', 'latitude': 47.6061, 'longitude': -122.3328} | + | Vancouver | {'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}, 'country': 'Canada', 'province': 'British Columbia', 'latitude': 49.2827, 'longitude': -123.1207} | | Null Location | null | | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ From c4f732e42f98673095a1912d5da41198b9167751 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 12 Feb 2025 14:30:26 -0800 Subject: [PATCH 77/81] General cleanup, combine flatten and expand datasets. Signed-off-by: currantw --- .../org/opensearch/sql/analysis/Analyzer.java | 95 ++++++++++++------- .../sql/data/model/ExprTupleValue.java | 2 +- .../sql/planner/physical/ExpandOperator.java | 22 +++-- .../sql/planner/physical/FlattenOperator.java | 4 +- .../planner/logical/LogicalExpandTest.java | 3 +- docs/user/dql/metadata.rst | 5 +- docs/user/ppl/cmd/expand.rst | 88 ++++++++--------- docs/user/ppl/cmd/flatten.rst | 55 ++++++----- doctest/test_data/expand.json | 5 - doctest/test_data/expand_flatten.json | 5 + doctest/test_data/flatten.json | 4 - doctest/test_docs.py | 8 +- doctest/test_mapping/expand.json | 19 ---- .../test_mapping/expand_flatten.json | 12 ++- .../sql/legacy/SQLIntegTestCase.java | 18 ++-- .../org/opensearch/sql/legacy/TestUtils.java | 9 +- .../opensearch/sql/legacy/TestsConstants.java | 3 +- .../opensearch/sql/ppl/ExpandCommandIT.java | 30 +++--- .../org/opensearch/sql/ppl/ExplainIT.java | 10 +- .../opensearch/sql/ppl/FlattenCommandIT.java | 73 ++++++++++---- integ-test/src/test/resources/expand.json | 10 -- .../src/test/resources/expand_flatten.json | 10 ++ integ-test/src/test/resources/flatten.json | 8 -- .../expand_flatten_mapping.json | 14 ++- .../indexDefinitions/expand_mapping.json | 19 ---- 25 files changed, 281 insertions(+), 250 deletions(-) delete mode 100644 doctest/test_data/expand.json create mode 100644 doctest/test_data/expand_flatten.json delete mode 100644 doctest/test_data/flatten.json delete mode 100644 doctest/test_mapping/expand.json rename integ-test/src/test/resources/indexDefinitions/flatten_mapping.json => doctest/test_mapping/expand_flatten.json (75%) delete mode 100644 integ-test/src/test/resources/expand.json create mode 100644 integ-test/src/test/resources/expand_flatten.json delete mode 100644 integ-test/src/test/resources/flatten.json rename doctest/test_mapping/flatten.json => integ-test/src/test/resources/indexDefinitions/expand_flatten_mapping.json (75%) delete mode 100644 integ-test/src/test/resources/indexDefinitions/expand_mapping.json diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index bb689e5b5b..ae7ee12d61 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -39,7 +39,6 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.DataSourceSchemaName; -import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.analysis.symbol.Symbol; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Argument; @@ -337,11 +336,9 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) { TypeEnvironment newEnv = context.peek(); aggregators.forEach( aggregator -> - newEnv.define( - new Symbol(Namespace.FIELD_NAME, aggregator.getName()), aggregator.type())); + newEnv.define(new Symbol(FIELD_NAME, aggregator.getName()), aggregator.type())); groupBys.forEach( - group -> - newEnv.define(new Symbol(Namespace.FIELD_NAME, group.getNameOrAlias()), group.type())); + group -> newEnv.define(new Symbol(FIELD_NAME, group.getNameOrAlias()), group.type())); return new LogicalAggregation(child, aggregators, groupBys); } @@ -366,9 +363,8 @@ public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) { context.push(); TypeEnvironment newEnv = context.peek(); groupBys.forEach( - group -> newEnv.define(new Symbol(Namespace.FIELD_NAME, group.toString()), group.type())); - fields.forEach( - field -> newEnv.define(new Symbol(Namespace.FIELD_NAME, field.toString()), field.type())); + group -> newEnv.define(new Symbol(FIELD_NAME, group.toString()), group.type())); + fields.forEach(field -> newEnv.define(new Symbol(FIELD_NAME, field.toString()), field.type())); List options = node.getNoOfResults(); Integer noOfResults = (Integer) options.get(0).getValue().getValue(); @@ -434,8 +430,7 @@ public LogicalPlan visitProject(Project node, AnalysisContext context) { context.push(); TypeEnvironment newEnv = context.peek(); namedExpressions.forEach( - expr -> - newEnv.define(new Symbol(Namespace.FIELD_NAME, expr.getNameOrAlias()), expr.type())); + expr -> newEnv.define(new Symbol(FIELD_NAME, expr.getNameOrAlias()), expr.type())); List namedParseExpressions = context.getNamedParseExpressions(); return new LogicalProject(child, namedExpressions, namedParseExpressions); } @@ -457,7 +452,37 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { return new LogicalEval(child, expressionsBuilder.build()); } - /** Builds and returns a {@link LogicalExpand} corresponding to the given expand node. */ + /** + * Builds and returns a {@link LogicalExpand} corresponding to the given expand node. + * + *

Example + * + *

Input Data: + * + *

+   * [
+   *    {
+   *       collection: [ "value_1", "value_2" ],
+   *       integer: 0
+   *      }
+   * ]
+   * 
+ * + * Query: expand collection + * + *
+   * [
+   *    {
+   *       collection: "value_1",
+   *       integer: 0
+   *    },
+   *    {
+   *       collection: "value_2",
+   *       integer: 0
+   *    }
+   * ]
+   * 
+ */ @Override public LogicalPlan visitExpand(Expand node, AnalysisContext context) { LogicalPlan child = node.getChild().getFirst().accept(this, context); @@ -475,37 +500,43 @@ public LogicalPlan visitExpand(Expand node, AnalysisContext context) { *

Input Data: * *

-   * {
-   *   struct: {
-   *     integer: 0,
-   *     nested_struct: { string: "value" }
-   *   }
-   * }
+   * [
+   *    {
+   *       struct: {
+   *         integer: 0,
+   *         nested_struct: { string: "value" }
+   *       }
+   *    }
+   * ]
    * 
* * Query 1: flatten struct * *
-   * {
-   *   struct: {
-   *     integer: 0,
-   *     nested_struct: { string: "value" }
-   *   },
-   *   integer: 0,
-   *   nested_struct: { string: "value" }
-   * }
+   * [
+   *    {
+   *       struct: {
+   *         integer: 0,
+   *         nested_struct: { string: "value" }
+   *       },
+   *       integer: 0,
+   *       nested_struct: { string: "value" }
+   *    }
+   * ]
    * 
* * Query 2: flatten struct.nested_struct * *
-   * {
-   *   struct: {
-   *     integer: 0,
-   *     nested_struct: { string: "value" },
-   *     string: "value"
-   *   }
-   * }
+   * [
+   *    {
+   *       struct: {
+   *         integer: 0,
+   *         nested_struct: { string: "value" },
+   *         string: "value"
+   *       }
+   *    }
+   * ]
    * 
*/ @Override diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java index 0e88682636..b7abe0d256 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprTupleValue.java @@ -20,7 +20,7 @@ @RequiredArgsConstructor public class ExprTupleValue extends AbstractExprValue { - private final Map valueMap; + private final LinkedHashMap valueMap; public static ExprTupleValue fromExprValueMap(Map map) { LinkedHashMap linkedHashMap = new LinkedHashMap<>(map); diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index 100a1dfc70..9b30710ccd 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -12,16 +12,19 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Queue; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; +import org.opensearch.sql.data.model.ExprCollectionValue; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.ReferenceExpression; -/** Flattens the specified field from the input and returns the result. */ +/** Expands the specified field from the input and returns the result. */ @Getter @ToString @RequiredArgsConstructor @@ -31,7 +34,7 @@ public class ExpandOperator extends PhysicalPlan { private final PhysicalPlan input; private final ReferenceExpression field; - private LinkedList expandedRows = new LinkedList<>(); + private Queue expandedRows = new LinkedList<>(); @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { @@ -54,17 +57,16 @@ public boolean hasNext() { @Override public ExprValue next() { - return expandedRows.removeFirst(); + return expandedRows.remove(); } /** - * Expands the nested {@link org.opensearch.sql.data.model.ExprCollectionValueValue} value with - * the specified qualified name within the given root value, and returns the results. If the root - * value does not contain a nested value with the qualified name, if the nested value is null or - * missing, or if the nested value in not an {@link - * org.opensearch.sql.data.model.ExprCollectionValueValue}, returns the unmodified root value. - * Raises {@link org.opensearch.sql.exception.SemanticCheckException} if the root value is not an - * {@link org.opensearch.sql.data.model.ExprTupleValue}. + * Expands the nested {@link ExprCollectionValue} with the specified qualified name within the + * given root value, and returns the results. If the root value does not contain a nested value + * with the qualified name, if the nested value is null or missing, or if the nested value in not + * an {@link ExprCollectionValue}, returns the unmodified root value. + * + * @throws SemanticCheckException if the root value is not an {@link ExprTupleValue}. */ private static List expandNestedExprValue( ExprValue rootExprValue, String qualifiedName) { diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java index 44ab2279b6..7412bc4a2f 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FlattenOperator.java @@ -52,8 +52,8 @@ public ExprValue next() { * Flattens the nested {@link ExprTupleValue} with the specified qualified name within the given * root value, and returns the result. If the root value does not contain a nested value with the * qualified name, or if the nested value is null or missing, returns the unmodified root value. - * Raises {@link org.opensearch.sql.exception.SemanticCheckException} if the root value or nested - * value is not an {@link ExprTupleValue}. + * + * @throws SemanticCheckException if the root or nested value is not an {@link ExprTupleValue}. */ private static ExprValue flattenNestedExprValue(ExprValue rootExprValue, String qualifiedName) { diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java index 427844cc0b..a5f7cffb44 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalExpandTest.java @@ -47,7 +47,6 @@ void testExpandArray() { @Test void testExpandInvalidFieldName() { UnresolvedPlan unresolved = AstDSL.expand(AstDSL.relation(TABLE_NAME), AstDSL.field("invalid")); - String msg = assertThrows(SemanticCheckException.class, () -> analyze(unresolved)).getMessage(); - assertEquals("can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env", msg); + assertThrows(SemanticCheckException.class, () -> analyze(unresolved)); } } diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index c0dbf77b68..420ae64397 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 13/13 + fetched rows / total rows = 12/12 +----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -44,8 +44,7 @@ SQL query:: | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | expand | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | flatten | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | expand_flatten | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index 44f847f0e5..b241c6a156 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -26,71 +26,71 @@ Example 1: Expand a field PPL query:: - os> source=expand | expand team | fields city, team.name + os> source=expand_flatten | expand teams | fields city, teams.name fetched rows / total rows = 7/7 - +--------------+-------------------+ - | city | team.name | - |--------------+-------------------| - | Seattle | Seattle Seahawks | - | Seattle | Seattle Kraken | - | Vancouver | Vancouver Canucks | - | Vancouver | BC Lions | - | San Antonio | San Antonio Spurs | - | Null Team | null | - | Missing Team | null | - +--------------+-------------------+ + +--------------+--------------------+ + | city | teams.name | + |--------------+--------------------| + | Seattle | Seattle Seahawks | + | Seattle | Seattle Kraken | + | Vancouver | Vancouver Canucks | + | Vancouver | BC Lions | + | San Antonio | San Antonio Spurs | + | Null City | null | + | Missing City | null | + +--------------+--------------------+ Example 2: Expand a nested field ================================= PPL query:: - os> source=expand | where city = 'San Antonio' | expand team.title | fields team.name, team.title + os> source=expand_flatten | where city = 'San Antonio' | expand teams.title | fields teams.name, teams.title fetched rows / total rows = 5/5 - +-------------------+------------+ - | team.name | team.title | - |-------------------+------------| - | San Antonio Spurs | 1999 | - | San Antonio Spurs | 2003 | - | San Antonio Spurs | 2005 | - | San Antonio Spurs | 2007 | - | San Antonio Spurs | 2014 | - +-------------------+------------+ + +-------------------+--------------+ + | teams.name | teams.title | + |-------------------+--------------| + | San Antonio Spurs | 1999 | + | San Antonio Spurs | 2003 | + | San Antonio Spurs | 2005 | + | San Antonio Spurs | 2007 | + | San Antonio Spurs | 2014 | + +-------------------+--------------+ Example 3: Expand multiple fields ================================== PPL query:: - os> source=expand | expand team | expand team.title | fields team.name, team.title + os> source=expand_flatten | expand teams | expand teams.title | fields teams.name, teams.title fetched rows / total rows = 16/16 - +-------------------+------------+ - | team.name | team.title | - |-------------------+------------| - | Seattle Seahawks | 2014 | - | Seattle Kraken | null | - | Vancouver Canucks | null | - | BC Lions | 1964 | - | BC Lions | 1985 | - | BC Lions | 1994 | - | BC Lions | 2000 | - | BC Lions | 2006 | - | BC Lions | 2011 | - | San Antonio Spurs | 1999 | - | San Antonio Spurs | 2003 | - | San Antonio Spurs | 2005 | - | San Antonio Spurs | 2007 | - | San Antonio Spurs | 2014 | - | null | null | - | null | null | - +-------------------+------------+ + +-------------------+--------------+ + | teams.name | teams.title | + |-------------------+--------------| + | Seattle Seahawks | 2014 | + | Seattle Kraken | null | + | Vancouver Canucks | null | + | BC Lions | 1964 | + | BC Lions | 1985 | + | BC Lions | 1994 | + | BC Lions | 2000 | + | BC Lions | 2006 | + | BC Lions | 2011 | + | San Antonio Spurs | 1999 | + | San Antonio Spurs | 2003 | + | San Antonio Spurs | 2005 | + | San Antonio Spurs | 2007 | + | San Antonio Spurs | 2014 | + | null | null | + | null | null | + +-------------------+--------------+ Example 4: Expand and flatten a field ===================================== PPL query:: - os> source=expand | expand team | flatten team | fields name, title + os> source=expand_flatten | expand teams | flatten teams | fields name, title fetched rows / total rows = 7/7 +-------------------+---------------------------------+ | name | title | diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 8971788ed5..3a2f812c85 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -26,15 +26,16 @@ Example 1: Flatten an object field PPL query:: - os> source=flatten | flatten location | fields name, country, province, coordinates, state - fetched rows / total rows = 4/4 + os> source=expand_flatten | flatten location | fields city, country, province, coordinates, state + fetched rows / total rows = 5/5 +------------------+---------------+------------------+-----------------------------------------------+------------+ - | name | country | province | coordinates | state | + | city | country | province | coordinates | state | |------------------+---------------+------------------+-----------------------------------------------+------------| | Seattle | United States | null | {'latitude': 47.6061, 'longitude': -122.3328} | Washington | | Vancouver | Canada | British Columbia | {'latitude': 49.2827, 'longitude': -123.1207} | null | - | Null Location | null | null | null | null | - | Null Coordinates | Australia | null | null | Victoria | + | San Antonio | United States | null | {'latitude': 29.4252, 'longitude': -98.4946 | Texas | + | Null City | null | null | null | null | + | Missing City | null | null | null | null | +------------------+---------------+------------------+-----------------------------------------------+------------+ Example 2: Flatten multiple object fields @@ -42,39 +43,41 @@ Example 2: Flatten multiple object fields PPL query:: - os> source=flatten | flatten location | flatten coordinates | fields name, location, latitude, longitude - fetched rows / total rows = 4/4 - +------------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------+ - | name | location | latitude | longitude | - |------------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------| - | Seattle | {'state': 'Washington', 'country': 'United States', 'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}} | 47.6061 | -122.3328 | - | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}} | 49.2827 | -123.1207 | - | Null Location | null | null | null | - | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | null | null | - +------------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------+ + os> source=expand_flatten | flatten location | flatten coordinates | fields city, location, latitude, longitude + fetched rows / total rows = 5/5 + +--------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------+ + | city | location | latitude | longitude | + |--------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------| + | Seattle | {'state': 'Washington', 'country': 'United States', 'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}} | 47.6061 | -122.3328 | + | Vancouver | {'province': 'British Columbia', 'country': 'Canada', 'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}} | 49.2827 | -123.1207 | + | San Antonio | {'state': 'Texas', 'country': 'United States', 'coordinates': {'latitude': 29.4252, 'longitude': -98.4946}} | 29.4252 | -98.4946 | + | Null City | null | null | null | + | Missing City | null | null | null | + +--------------+---------------------------------------------------------------------------------------------------------------------+----------+-----------+ Example 3: Flatten a nested object field ======================================== PPL query:: - os> source=flatten | flatten location.coordinates | fields name, location - fetched rows / total rows = 4/4 - +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | name | location | - |------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Seattle | {'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}, 'country': 'United States', 'state': 'Washington', 'latitude': 47.6061, 'longitude': -122.3328} | - | Vancouver | {'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}, 'country': 'Canada', 'province': 'British Columbia', 'latitude': 49.2827, 'longitude': -123.1207} | - | Null Location | null | - | Null Coordinates | {'state': 'Victoria', 'country': 'Australia'} | - +------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + os> source=expand_flatten | flatten location.coordinates | fields city, location + fetched rows / total rows = 5/5 + +--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | city | location | + |--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | Seattle | {'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}, 'country': 'United States', 'state': 'Washington', 'latitude': 47.6061, 'longitude': -122.3328} | + | Vancouver | {'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}, 'country': 'Canada', 'province': 'British Columbia', 'latitude': 49.2827, 'longitude': -123.1207} | + | San Antonio | {'coordinates': {'latitude': 29.4252, 'longitude': -98.4946 }, 'country': 'United States', 'state': 'Texas', 'latitude': 29.4252, 'longitude': -98.4946} | + | Null City | null | + | Missing City | null | + +--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Example 4: Flatten and expand an object field ============================================= PPL query:: - os> source=expand | where city = 'San Antonio' | flatten team | expand title | fields name, title + os> source=expand_flatten | where city = 'San Antonio' | flatten teams | expand title | fields name, title fetched rows / total rows = 5/5 +-------------------+-------+ | name | title | diff --git a/doctest/test_data/expand.json b/doctest/test_data/expand.json deleted file mode 100644 index 2861aa0a2a..0000000000 --- a/doctest/test_data/expand.json +++ /dev/null @@ -1,5 +0,0 @@ -{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": null}]} -{"city": "Vancouver", "team":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]} -{"city": "San Antonio", "team": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}} -{"city": "Null Team", "team": null} -{"city": "Missing Team"} \ No newline at end of file diff --git a/doctest/test_data/expand_flatten.json b/doctest/test_data/expand_flatten.json new file mode 100644 index 0000000000..eabd663834 --- /dev/null +++ b/doctest/test_data/expand_flatten.json @@ -0,0 +1,5 @@ +{"city": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}, "teams":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": null}]} +{"city": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}, "teams":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]} +{"city": "San Antonio", "location": { "state": "Texas", "country": "United States", "coordinates": {"latitude": 29.4252, "longitude": -98.4946}}, "teams": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}} +{"city": "Null City", "location": null, "teams": null} +{"city": "Missing City"} \ No newline at end of file diff --git a/doctest/test_data/flatten.json b/doctest/test_data/flatten.json deleted file mode 100644 index eb4cf6c2a2..0000000000 --- a/doctest/test_data/flatten.json +++ /dev/null @@ -1,4 +0,0 @@ -{"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}} -{"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}} -{"name": "Null Location", "location": null} -{"name": "Null Coordinates", "location": { "state": "Victoria", "country": "Australia", "coordinates": null}} \ No newline at end of file diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 3086c37fda..84fa860fdc 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -31,7 +31,7 @@ DATASOURCES = ".ql-datasources" WEBLOGS = "weblogs" JSON_TEST = "json_test" -EXPAND = "expand" +EXPAND_FLATTEN = "expand" FLATTEN = "flatten" class DocTestConnection(OpenSearchConnection): @@ -121,8 +121,7 @@ def set_up_test_indices(test): load_file("apache.json", index_name=APACHE) load_file("books.json", index_name=BOOKS) load_file("datasources.json", index_name=DATASOURCES) - load_file("expand.json", index_name=EXPAND) - load_file("flatten.json", index_name=FLATTEN) + load_file("expand_flatten.json", index_name=EXPAND_FLATTEN) load_file("json_test.json", index_name=JSON_TEST) load_file("nested_objects.json", index_name=NESTED) load_file("nyc_taxi.json", index_name=NYC_TAXI) @@ -162,8 +161,7 @@ def tear_down(test): APACHE, BOOKS, EMPLOYEES, - EXPAND, - FLATTEN, + EXPAND_FLATTEN, JSON_TEST, NESTED, NYC_TAXI, diff --git a/doctest/test_mapping/expand.json b/doctest/test_mapping/expand.json deleted file mode 100644 index 76d6e8a8bf..0000000000 --- a/doctest/test_mapping/expand.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "mappings": { - "properties": { - "city": { - "type": "keyword" - }, - "team": { - "properties": { - "name": { - "type":"keyword" - }, - "title": { - "type": "integer" - } - } - } - } - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/indexDefinitions/flatten_mapping.json b/doctest/test_mapping/expand_flatten.json similarity index 75% rename from integ-test/src/test/resources/indexDefinitions/flatten_mapping.json rename to doctest/test_mapping/expand_flatten.json index e85047c8a8..604a654dcd 100644 --- a/integ-test/src/test/resources/indexDefinitions/flatten_mapping.json +++ b/doctest/test_mapping/expand_flatten.json @@ -1,7 +1,7 @@ { "mappings": { "properties": { - "name": { + "city": { "type": "keyword" }, "location": { @@ -28,6 +28,16 @@ } } } + }, + "teams": { + "properties": { + "name": { + "type":"keyword" + }, + "title": { + "type": "integer" + } + } } } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 94c04b2754..a3ffe5471e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -20,8 +20,7 @@ import static org.opensearch.sql.legacy.TestUtils.getDogs2IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDogs3IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getEmployeeNestedTypeIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getExpandIndexMapping; -import static org.opensearch.sql.legacy.TestUtils.getFlattenIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getExpandFlattenIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGeoIpIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping; @@ -761,16 +760,11 @@ public enum Index { "json", getJsonTestIndexMapping(), "src/test/resources/json_test.json"), - EXPAND( - TestsConstants.TEST_INDEX_EXPAND, - "expand", - getExpandIndexMapping(), - "src/test/resources/expand.json"), - FLATTEN( - TestsConstants.TEST_INDEX_FLATTEN, - "flatten", - getFlattenIndexMapping(), - "src/test/resources/flatten.json"), + EXPAND_FLATTEN( + TestsConstants.TEST_INDEX_EXPAND_FLATTEN, + "expand_flatten", + getExpandFlattenIndexMapping(), + "src/test/resources/expand_flatten.json"), DATA_TYPE_ALIAS( TestsConstants.TEST_INDEX_ALIAS, "alias", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index c0e75f873e..4da4e2d5e7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -260,8 +260,8 @@ public static String getJsonTestIndexMapping() { return getMappingFile(mappingFile); } - public static String getExpandIndexMapping() { - String mappingFile = "expand_mapping.json"; + public static String getExpandFlattenIndexMapping() { + String mappingFile = "expand_flatten_mapping.json"; return getMappingFile(mappingFile); } @@ -270,11 +270,6 @@ public static String getAliasIndexMapping() { return getMappingFile(mappingFile); } - public static String getFlattenIndexMapping() { - String mappingFile = "flatten_mapping.json"; - return getMappingFile(mappingFile); - } - public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 037cbc95e6..a6a120feeb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -34,8 +34,7 @@ public class TestsConstants { public static final String TEST_INDEX_DOG3 = TEST_INDEX + "_dog3"; public static final String TEST_INDEX_DOGSUBQUERY = TEST_INDEX + "_subquery"; public static final String TEST_INDEX_EMPLOYEE_NESTED = TEST_INDEX + "_employee_nested"; - public static final String TEST_INDEX_EXPAND = TEST_INDEX + "_expand"; - public static final String TEST_INDEX_FLATTEN = TEST_INDEX + "_flatten"; + public static final String TEST_INDEX_EXPAND_FLATTEN = TEST_INDEX + "_expand_flatten"; public static final String TEST_INDEX_GAME_OF_THRONES = TEST_INDEX + "_game_of_thrones"; public static final String TEST_INDEX_GEOIP = TEST_INDEX + "_geoip"; public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java index 0530dfcd8d..31a20a8f31 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java @@ -5,7 +5,7 @@ package org.opensearch.sql.ppl; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_EXPAND; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_EXPAND_FLATTEN; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -21,16 +21,17 @@ public class ExpandCommandIT extends PPLIntegTestCase { @Override public void init() throws IOException { - loadIndex(Index.EXPAND); + loadIndex(Index.EXPAND_FLATTEN); } @Test public void testBasic() throws IOException { String query = - StringUtils.format("source=%s | expand team | fields city, team.name", TEST_INDEX_EXPAND); + StringUtils.format( + "source=%s | expand teams | fields city, teams.name", TEST_INDEX_EXPAND_FLATTEN); JSONObject result = executeQuery(query); - verifySchema(result, schema("city", "string"), schema("team.name", "string")); + verifySchema(result, schema("city", "string"), schema("teams.name", "string")); verifyDataRows( result, rows("Seattle", "Seattle Seahawks"), @@ -38,20 +39,20 @@ public void testBasic() throws IOException { rows("Vancouver", "Vancouver Canucks"), rows("Vancouver", "BC Lions"), rows("San Antonio", "San Antonio Spurs"), - rows("Null Team", null), - rows("Missing Team", null)); + rows("Null City", null), + rows("Missing City", null)); } @Test public void testNested() throws IOException { String query = StringUtils.format( - "source=%s | where city = 'San Antonio' | expand team.title | fields team.name," - + " team.title", - TEST_INDEX_EXPAND); + "source=%s | where city = 'San Antonio' | expand teams.title | fields teams.name," + + " teams.title", + TEST_INDEX_EXPAND_FLATTEN); JSONObject result = executeQuery(query); - verifySchema(result, schema("team.name", "string"), schema("team.title", "integer")); + verifySchema(result, schema("teams.name", "string"), schema("teams.title", "integer")); verifyDataRows( result, rows("San Antonio Spurs", 1999), @@ -65,11 +66,11 @@ public void testNested() throws IOException { public void testMultiple() throws IOException { String query = StringUtils.format( - "source=%s | expand team | expand team.title | fields team.name, team.title", - TEST_INDEX_EXPAND); + "source=%s | expand teams | expand teams.title | fields teams.name, teams.title", + TEST_INDEX_EXPAND_FLATTEN); JSONObject result = executeQuery(query); - verifySchema(result, schema("team.name", "string"), schema("team.title", "integer")); + verifySchema(result, schema("teams.name", "string"), schema("teams.title", "integer")); verifyDataRows( result, rows("Seattle Seahawks", 2014), @@ -94,7 +95,8 @@ public void testMultiple() throws IOException { public void testExpandFlatten() throws IOException { String query = StringUtils.format( - "source=%s | expand team | flatten team | fields name, title", TEST_INDEX_EXPAND); + "source=%s | expand teams | flatten teams | fields name, title", + TEST_INDEX_EXPAND_FLATTEN); JSONObject result = executeQuery(query); verifySchema(result, schema("name", "string"), schema("title", "integer")); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 4111285e63..5ec71524e2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -5,8 +5,7 @@ package org.opensearch.sql.ppl; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_EXPAND; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_FLATTEN; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_EXPAND_FLATTEN; import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; import com.google.common.io.Resources; @@ -22,8 +21,7 @@ public class ExplainIT extends PPLIntegTestCase { @Override public void init() throws IOException { loadIndex(Index.ACCOUNT); - loadIndex(Index.EXPAND); - loadIndex(Index.FLATTEN); + loadIndex(Index.EXPAND_FLATTEN); } @Test @@ -133,7 +131,7 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { @Test public void testExpand() throws Exception { - String query = StringUtils.format("source=%s | expand team", TEST_INDEX_EXPAND); + String query = StringUtils.format("source=%s | expand team", TEST_INDEX_EXPAND_FLATTEN); String actual = explainQueryToString(query); String expected = loadFromFile("expectedOutput/ppl/explain_expand.json"); assertJsonEquals(expected, actual); @@ -141,7 +139,7 @@ public void testExpand() throws Exception { @Test public void testFlatten() throws Exception { - String query = StringUtils.format("source=%s | flatten location", TEST_INDEX_FLATTEN); + String query = StringUtils.format("source=%s | flatten location", TEST_INDEX_EXPAND_FLATTEN); String actual = explainQueryToString(query); String expected = loadFromFile("expectedOutput/ppl/explain_flatten.json"); assertJsonEquals(expected, actual); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index d3ae3f7ee7..f61b8de714 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -5,7 +5,7 @@ package org.opensearch.sql.ppl; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_FLATTEN; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_EXPAND_FLATTEN; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -21,20 +21,20 @@ public class FlattenCommandIT extends PPLIntegTestCase { @Override public void init() throws IOException { - loadIndex(Index.FLATTEN); + loadIndex(Index.EXPAND_FLATTEN); } @Test public void testBasic() throws IOException { String query = StringUtils.format( - "source=%s | flatten location | fields name, country, province, coordinates, state", - TEST_INDEX_FLATTEN); + "source=%s | flatten location | fields city, country, province, coordinates, state", + TEST_INDEX_EXPAND_FLATTEN); JSONObject result = executeQuery(query); verifySchema( result, - schema("name", "string"), + schema("city", "string"), schema("country", "string"), schema("province", "string"), schema("coordinates", "struct"), @@ -53,22 +53,28 @@ public void testBasic() throws IOException { "British Columbia", Map.of("latitude", 49.2827, "longitude", -123.1207), null), - rows("Null Location", null, null, null, null), - rows("Null Coordinates", "Australia", null, null, "Victoria")); + rows( + "San Antonio", + "United States", + null, + Map.of("latitude", 29.4252, "longitude", -98.4946), + "Texas"), + rows("Null City", null, null, null, null), + rows("Missing City", null, null, null, null)); } @Test public void testMultiple() throws IOException { String query = StringUtils.format( - "source=%s | flatten location | flatten coordinates | fields name, location, latitude," + "source=%s | flatten location | flatten coordinates | fields city, location, latitude," + " longitude", - TEST_INDEX_FLATTEN); + TEST_INDEX_EXPAND_FLATTEN); JSONObject result = executeQuery(query); verifySchema( result, - schema("name", "string"), + schema("city", "string"), schema("location", "struct"), schema("latitude", "double"), schema("longitude", "double")); @@ -90,18 +96,27 @@ public void testMultiple() throws IOException { Map.entry("coordinates", Map.of("latitude", 49.2827, "longitude", -123.1207))), 49.2827, -123.1207), - rows("Null Location", null, null, null), - rows("Null Coordinates", Map.of("state", "Victoria", "country", "Australia"), null, null)); + rows( + "San Antonio", + Map.ofEntries( + Map.entry("state", "Texas"), + Map.entry("country", "United States"), + Map.entry("coordinates", Map.of("latitude", 29.4252, "longitude", -98.4946))), + 29.4252, + -98.4946), + rows("Null City", null, null, null), + rows("Missing City", null, null, null)); } @Test public void testNested() throws IOException { String query = StringUtils.format( - "source=%s | flatten location.coordinates | fields name, location", TEST_INDEX_FLATTEN); + "source=%s | flatten location.coordinates | fields city, location", + TEST_INDEX_EXPAND_FLATTEN); JSONObject result = executeQuery(query); - verifySchema(result, schema("name", "string"), schema("location", "struct")); + verifySchema(result, schema("city", "string"), schema("location", "struct")); verifyDataRows( result, rows( @@ -120,7 +135,33 @@ public void testNested() throws IOException { Map.entry("coordinates", Map.of("latitude", 49.2827, "longitude", -123.1207)), Map.entry("latitude", 49.2827), Map.entry("longitude", -123.1207))), - rows("Null Location", null), - rows("Null Coordinates", Map.of("state", "Victoria", "country", "Australia"))); + rows( + "San Antonio", + Map.ofEntries( + Map.entry("state", "Texas"), + Map.entry("country", "United States"), + Map.entry("coordinates", Map.of("latitude", 29.4252, "longitude", -98.4946)), + Map.entry("latitude", 29.4252), + Map.entry("longitude", -98.4946))), + rows("Null City", null), + rows("Missing City", null)); + } + + @Test + public void testWithFlatten() throws IOException { + String query = + StringUtils.format( + "source=%s | where city = 'San Antonio' | flatten teams | expand title | fields name, title", + TEST_INDEX_EXPAND_FLATTEN); + JSONObject result = executeQuery(query); + + verifySchema(result, schema("name", "string"), schema("title", "integer")); + verifyDataRows( + result, + rows("San Antonio Spurs", 1999), + rows("San Antonio Spurs", 2003), + rows("San Antonio Spurs", 2005), + rows("San Antonio Spurs", 2007), + rows("San Antonio Spurs", 2014)); } } diff --git a/integ-test/src/test/resources/expand.json b/integ-test/src/test/resources/expand.json deleted file mode 100644 index cc343c1689..0000000000 --- a/integ-test/src/test/resources/expand.json +++ /dev/null @@ -1,10 +0,0 @@ -{"index":{"_id":"1"}} -{"city": "Seattle", "team":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": null}]} -{"index":{"_id":"2"}} -{"city": "Vancouver", "team":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]} -{"index":{"_id":"3"}} -{"city": "San Antonio", "team": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}} -{"index":{"_id":"4"}} -{"city": "Null Team", "team": null} -{"index":{"_id":"5"}} -{"city": "Missing Team"} diff --git a/integ-test/src/test/resources/expand_flatten.json b/integ-test/src/test/resources/expand_flatten.json new file mode 100644 index 0000000000..3918f9fdcf --- /dev/null +++ b/integ-test/src/test/resources/expand_flatten.json @@ -0,0 +1,10 @@ +{"index":{"_id":"1"}} +{"city": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}, "teams":[{"name": "Seattle Seahawks", "title": 2014}, {"name": "Seattle Kraken", "title": null}]} +{"index":{"_id":"2"}} +{"city": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}, "teams":[{"name": "Vancouver Canucks", "title": null}, {"name": "BC Lions", "title": [1964, 1985, 1994, 2000, 2006, 2011]}]} +{"index":{"_id":"3"}} +{"city": "San Antonio", "location": { "state": "Texas", "country": "United States", "coordinates": {"latitude": 29.4252, "longitude": -98.4946}}, "teams": {"name": "San Antonio Spurs", "title": [1999, 2003, 2005, 2007, 2014]}} +{"index":{"_id":"4"}} +{"city": "Null City", "location": null, "teams": null} +{"index":{"_id":"5"}} +{"city": "Missing City"} diff --git a/integ-test/src/test/resources/flatten.json b/integ-test/src/test/resources/flatten.json deleted file mode 100644 index 28c8ce4940..0000000000 --- a/integ-test/src/test/resources/flatten.json +++ /dev/null @@ -1,8 +0,0 @@ -{"index":{"_id":"1"}} -{"name": "Seattle", "location": { "state": "Washington", "country": "United States", "coordinates": {"latitude": 47.6061, "longitude": -122.3328}}} -{"index":{"_id":"2"}} -{"name": "Vancouver", "location": { "province": "British Columbia", "country": "Canada", "coordinates": {"latitude": 49.2827, "longitude": -123.1207}}} -{"index":{"_id":"3"}} -{"name": "Null Location", "location": null} -{"index":{"_id":"4"}} -{"name": "Null Coordinates", "location": { "state": "Victoria", "country": "Australia", "coordinates": null}} diff --git a/doctest/test_mapping/flatten.json b/integ-test/src/test/resources/indexDefinitions/expand_flatten_mapping.json similarity index 75% rename from doctest/test_mapping/flatten.json rename to integ-test/src/test/resources/indexDefinitions/expand_flatten_mapping.json index 6b96246d5d..604a654dcd 100644 --- a/doctest/test_mapping/flatten.json +++ b/integ-test/src/test/resources/indexDefinitions/expand_flatten_mapping.json @@ -1,7 +1,7 @@ { "mappings": { "properties": { - "name": { + "city": { "type": "keyword" }, "location": { @@ -28,7 +28,17 @@ } } } + }, + "teams": { + "properties": { + "name": { + "type":"keyword" + }, + "title": { + "type": "integer" + } + } } } } -} +} \ No newline at end of file diff --git a/integ-test/src/test/resources/indexDefinitions/expand_mapping.json b/integ-test/src/test/resources/indexDefinitions/expand_mapping.json deleted file mode 100644 index 76d6e8a8bf..0000000000 --- a/integ-test/src/test/resources/indexDefinitions/expand_mapping.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "mappings": { - "properties": { - "city": { - "type": "keyword" - }, - "team": { - "properties": { - "name": { - "type":"keyword" - }, - "title": { - "type": "integer" - } - } - } - } - } -} \ No newline at end of file From 582fdaa3fa9e31061797dfcd616ee1fef85f40f3 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 12 Feb 2025 15:03:43 -0800 Subject: [PATCH 78/81] Spotless Signed-off-by: currantw --- .../opensearch/sql/ppl/FlattenCommandIT.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java index f61b8de714..e73b74434f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FlattenCommandIT.java @@ -150,18 +150,19 @@ public void testNested() throws IOException { @Test public void testWithFlatten() throws IOException { String query = - StringUtils.format( - "source=%s | where city = 'San Antonio' | flatten teams | expand title | fields name, title", - TEST_INDEX_EXPAND_FLATTEN); + StringUtils.format( + "source=%s | where city = 'San Antonio' | flatten teams | expand title | fields name," + + " title", + TEST_INDEX_EXPAND_FLATTEN); JSONObject result = executeQuery(query); verifySchema(result, schema("name", "string"), schema("title", "integer")); verifyDataRows( - result, - rows("San Antonio Spurs", 1999), - rows("San Antonio Spurs", 2003), - rows("San Antonio Spurs", 2005), - rows("San Antonio Spurs", 2007), - rows("San Antonio Spurs", 2014)); + result, + rows("San Antonio Spurs", 1999), + rows("San Antonio Spurs", 2003), + rows("San Antonio Spurs", 2005), + rows("San Antonio Spurs", 2007), + rows("San Antonio Spurs", 2014)); } } From e8494f22f5678c62dfad8bdaae306a3b91a794a3 Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 12 Feb 2025 15:46:46 -0800 Subject: [PATCH 79/81] Fix failing doctests Signed-off-by: currantw --- docs/user/ppl/cmd/expand.rst | 80 +++++++++++++++++------------------ docs/user/ppl/cmd/flatten.rst | 20 ++++----- doctest/test_docs.py | 3 +- 3 files changed, 51 insertions(+), 52 deletions(-) diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index b241c6a156..00f179105b 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -28,17 +28,17 @@ PPL query:: os> source=expand_flatten | expand teams | fields city, teams.name fetched rows / total rows = 7/7 - +--------------+--------------------+ - | city | teams.name | - |--------------+--------------------| - | Seattle | Seattle Seahawks | - | Seattle | Seattle Kraken | - | Vancouver | Vancouver Canucks | - | Vancouver | BC Lions | - | San Antonio | San Antonio Spurs | - | Null City | null | - | Missing City | null | - +--------------+--------------------+ + +--------------+-------------------+ + | city | teams.name | + |--------------+-------------------| + | Seattle | Seattle Seahawks | + | Seattle | Seattle Kraken | + | Vancouver | Vancouver Canucks | + | Vancouver | BC Lions | + | San Antonio | San Antonio Spurs | + | Null City | null | + | Missing City | null | + +--------------+-------------------+ Example 2: Expand a nested field ================================= @@ -47,15 +47,15 @@ PPL query:: os> source=expand_flatten | where city = 'San Antonio' | expand teams.title | fields teams.name, teams.title fetched rows / total rows = 5/5 - +-------------------+--------------+ - | teams.name | teams.title | - |-------------------+--------------| - | San Antonio Spurs | 1999 | - | San Antonio Spurs | 2003 | - | San Antonio Spurs | 2005 | - | San Antonio Spurs | 2007 | - | San Antonio Spurs | 2014 | - +-------------------+--------------+ + +-------------------+-------------+ + | teams.name | teams.title | + |-------------------+-------------| + | San Antonio Spurs | 1999 | + | San Antonio Spurs | 2003 | + | San Antonio Spurs | 2005 | + | San Antonio Spurs | 2007 | + | San Antonio Spurs | 2014 | + +-------------------+-------------+ Example 3: Expand multiple fields ================================== @@ -64,26 +64,26 @@ PPL query:: os> source=expand_flatten | expand teams | expand teams.title | fields teams.name, teams.title fetched rows / total rows = 16/16 - +-------------------+--------------+ - | teams.name | teams.title | - |-------------------+--------------| - | Seattle Seahawks | 2014 | - | Seattle Kraken | null | - | Vancouver Canucks | null | - | BC Lions | 1964 | - | BC Lions | 1985 | - | BC Lions | 1994 | - | BC Lions | 2000 | - | BC Lions | 2006 | - | BC Lions | 2011 | - | San Antonio Spurs | 1999 | - | San Antonio Spurs | 2003 | - | San Antonio Spurs | 2005 | - | San Antonio Spurs | 2007 | - | San Antonio Spurs | 2014 | - | null | null | - | null | null | - +-------------------+--------------+ + +-------------------+-------------+ + | teams.name | teams.title | + |-------------------+-------------| + | Seattle Seahawks | 2014 | + | Seattle Kraken | null | + | Vancouver Canucks | null | + | BC Lions | 1964 | + | BC Lions | 1985 | + | BC Lions | 1994 | + | BC Lions | 2000 | + | BC Lions | 2006 | + | BC Lions | 2011 | + | San Antonio Spurs | 1999 | + | San Antonio Spurs | 2003 | + | San Antonio Spurs | 2005 | + | San Antonio Spurs | 2007 | + | San Antonio Spurs | 2014 | + | null | null | + | null | null | + +-------------------+-------------+ Example 4: Expand and flatten a field ===================================== diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 3a2f812c85..18e0f430b3 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -28,15 +28,15 @@ PPL query:: os> source=expand_flatten | flatten location | fields city, country, province, coordinates, state fetched rows / total rows = 5/5 - +------------------+---------------+------------------+-----------------------------------------------+------------+ - | city | country | province | coordinates | state | - |------------------+---------------+------------------+-----------------------------------------------+------------| - | Seattle | United States | null | {'latitude': 47.6061, 'longitude': -122.3328} | Washington | - | Vancouver | Canada | British Columbia | {'latitude': 49.2827, 'longitude': -123.1207} | null | - | San Antonio | United States | null | {'latitude': 29.4252, 'longitude': -98.4946 | Texas | - | Null City | null | null | null | null | - | Missing City | null | null | null | null | - +------------------+---------------+------------------+-----------------------------------------------+------------+ + +--------------+---------------+------------------+-----------------------------------------------+------------+ + | city | country | province | coordinates | state | + |--------------+---------------+------------------+-----------------------------------------------+------------| + | Seattle | United States | null | {'latitude': 47.6061, 'longitude': -122.3328} | Washington | + | Vancouver | Canada | British Columbia | {'latitude': 49.2827, 'longitude': -123.1207} | null | + | San Antonio | United States | null | {'latitude': 29.4252, 'longitude': -98.4946} | Texas | + | Null City | null | null | null | null | + | Missing City | null | null | null | null | + +--------------+---------------+------------------+-----------------------------------------------+------------+ Example 2: Flatten multiple object fields ========================================= @@ -67,7 +67,7 @@ PPL query:: |--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Seattle | {'coordinates': {'latitude': 47.6061, 'longitude': -122.3328}, 'country': 'United States', 'state': 'Washington', 'latitude': 47.6061, 'longitude': -122.3328} | | Vancouver | {'coordinates': {'latitude': 49.2827, 'longitude': -123.1207}, 'country': 'Canada', 'province': 'British Columbia', 'latitude': 49.2827, 'longitude': -123.1207} | - | San Antonio | {'coordinates': {'latitude': 29.4252, 'longitude': -98.4946 }, 'country': 'United States', 'state': 'Texas', 'latitude': 29.4252, 'longitude': -98.4946} | + | San Antonio | {'coordinates': {'latitude': 29.4252, 'longitude': -98.4946}, 'country': 'United States', 'state': 'Texas', 'latitude': 29.4252, 'longitude': -98.4946} | | Null City | null | | Missing City | null | +--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 84fa860fdc..c876800873 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -31,8 +31,7 @@ DATASOURCES = ".ql-datasources" WEBLOGS = "weblogs" JSON_TEST = "json_test" -EXPAND_FLATTEN = "expand" -FLATTEN = "flatten" +EXPAND_FLATTEN = "expand_flatten" class DocTestConnection(OpenSearchConnection): From 028e074accb76c6a104fed95ec1799ff96de61ec Mon Sep 17 00:00:00 2001 From: currantw Date: Wed, 12 Feb 2025 19:09:06 -0800 Subject: [PATCH 80/81] Fix `ExplainIT` test Signed-off-by: currantw --- .../test/java/org/opensearch/sql/ppl/ExplainIT.java | 2 +- .../resources/expectedOutput/ppl/explain_expand.json | 10 +++++----- .../resources/expectedOutput/ppl/explain_flatten.json | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 5ec71524e2..47bb1346f6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -131,7 +131,7 @@ public void testTrendlineWithSortPushDownExplain() throws Exception { @Test public void testExpand() throws Exception { - String query = StringUtils.format("source=%s | expand team", TEST_INDEX_EXPAND_FLATTEN); + String query = StringUtils.format("source=%s | expand teams", TEST_INDEX_EXPAND_FLATTEN); String actual = explainQueryToString(query); String expected = loadFromFile("expectedOutput/ppl/explain_expand.json"); assertJsonEquals(expected, actual); diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json index 7935375af9..5e47c130ce 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_expand.json @@ -2,17 +2,17 @@ "root": { "name": "ProjectOperator", "description": { - "fields": "[city, team]" + "fields": "[teams, city, location]" }, "children": [ { "name": "ExpandOperator", "description": { "expandField": { - "attr": "team", - "rawPath": "team", + "attr": "teams", + "rawPath": "teams", "paths": [ - "team" + "teams" ], "type": "STRUCT" } @@ -21,7 +21,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_expand, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_expand_flatten, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json index 2483720800..37956bf0a8 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_flatten.json @@ -2,7 +2,7 @@ "root": { "name": "ProjectOperator", "description": { - "fields": "[name, location, country, province, coordinates, state]" + "fields": "[teams, city, location, country, province, coordinates, state]" }, "children": [ { @@ -21,7 +21,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_flatten, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_expand_flatten, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" }, "children": [] } @@ -29,4 +29,4 @@ } ] } -} \ No newline at end of file +} From 01569ae785a38b5bdd547d3f28fcee9b97d8f9fb Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 13 Feb 2025 11:06:39 -0800 Subject: [PATCH 81/81] Handle empty collections. Signed-off-by: currantw --- .../sql/planner/physical/ExpandOperator.java | 28 +++++++++++++------ .../planner/physical/ExpandOperatorTest.java | 12 ++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java index 9b30710ccd..10359f59bc 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ExpandOperator.java @@ -63,7 +63,7 @@ public ExprValue next() { /** * Expands the nested {@link ExprCollectionValue} with the specified qualified name within the * given root value, and returns the results. If the root value does not contain a nested value - * with the qualified name, if the nested value is null or missing, or if the nested value in not + * with the qualified name, if the nested value is null or missing, or if the nested value is not * an {@link ExprCollectionValue}, returns the unmodified root value. * * @throws SemanticCheckException if the root value is not an {@link ExprTupleValue}. @@ -90,11 +90,7 @@ private static List expandNestedExprValue( // Expand the child value. List expandedChildExprValues; if (components.size() == 1) { - expandedChildExprValues = - new LinkedList<>( - childExprValue.type().equals(ARRAY) - ? childExprValue.collectionValue() - : List.of(childExprValue)); + expandedChildExprValues = expandExprValue(childExprValue); } else { String remainingQualifiedName = ExprValueUtils.joinQualifiedName(components.subList(1, components.size())); @@ -104,12 +100,28 @@ private static List expandNestedExprValue( // Build expanded values. List expandedExprValues = new LinkedList<>(); - for (ExprValue expandedChildExprValue : expandedChildExprValues) { + for (ExprValue expanded : expandedChildExprValues) { Map newFieldsMap = new HashMap<>(fieldsMap); - newFieldsMap.put(fieldName, expandedChildExprValue); + newFieldsMap.put(fieldName, expanded); expandedExprValues.add(ExprTupleValue.fromExprValueMap(newFieldsMap)); } return expandedExprValues; } + + /** Expands the given value and returns the results. */ + private static List expandExprValue(ExprValue exprValue) { + if (!exprValue.type().equals(ARRAY)) { + return List.of(exprValue); + } + + List collectionExprValues = exprValue.collectionValue(); + + // Expand an empyt collection expands to a null value. + if (collectionExprValues.isEmpty()) { + return List.of(ExprValueUtils.nullValue()); + } + + return collectionExprValues; + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java index 29ccb9b809..9d67ef6af4 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ExpandOperatorTest.java @@ -43,6 +43,18 @@ class ExpandOperatorTest extends PhysicalPlanTestBase { private List actualRows; private List expectedRows; + @Test + void testArrayEmpty() { + inputRow = + ExprValueUtils.tupleValue(Map.of("array", ExprValueUtils.collectionValue(List.of()))); + mockInput(inputRow); + + actualRows = execute(expand(inputPlan, DSL.ref("array", ARRAY))); + expectedRows = List.of(ExprValueUtils.tupleValue(Map.of("array", nullExprValue))); + + assertEquals(expectedRows, actualRows); + } + @Test void testArray() { inputRow =