opensearch-project · penghuo · Feb 10, 2025 · Feb 5, 2025 · Feb 6, 2025 · Feb 6, 2025
@@ -29,7 +29,7 @@ LOOKUP <lookupIndex> (<lookupMappingField> [AS <sourceMappingField>])...
 **inputField**
 - Optional
 - Default: All fields of \<lookupIndex\> where matched values are applied to result output if no field is specified.
-- Description: A field in \<lookupIndex\> where matched values are applied to result output. You can specify multiple \<inputField\> with comma-delimited. If you don't specify any \<inputField\>, all fields of \<lookupIndex\> where matched values are applied to result output.
+- Description: A field in \<lookupIndex\> where matched values are applied to result output. You can specify multiple \<inputField\> with comma-delimited. If you don't specify any \<inputField\>, all fields expect \<lookupMappingField\> from \<lookupIndex\> where matched values are applied to result output.
 
 **outputField**
 - Optional

@@ -51,25 +51,24 @@ class FlintSparkPPLLookupITSuite
 
   test("test LOOKUP lookupTable uid AS id REPLACE department") {
     val frame = sql(s"source = $sourceTable| LOOKUP $lookupTable uid AS id REPLACE department")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
       Row(1000, "Jake", "Engineer", "England", 100000, "IT"),
       Row(1001, "Hello", "Artist", "USA", 70000, null),
       Row(1002, "John", "Doctor", "Canada", 120000, "DATA"),
       Row(1003, "David", "Doctor", null, 120000, "HR"),
       Row(1004, "David", null, "Canada", 0, null),
       Row(1005, "Jane", "Scientist", "Canada", 90000, "DATA"))
+    assertSameRows(expectedResults, frame)
 
-    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
-    assert(results.sorted.sameElements(expectedResults.sorted))
     val lookupProject =
       Project(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("uid")), lookupAlias)
     val joinCondition = EqualTo(UnresolvedAttribute("uid"), UnresolvedAttribute("id"))
     val joinPlan = Join(sourceAlias, lookupProject, LeftOuter, Some(joinCondition), JoinHint.NONE)
     val coalesceForSafeExpr =
-      Coalesce(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("department")))
+      Coalesce(
+        Seq(
+          UnresolvedAttribute("__auto_generated_subquery_name_l.department"),
+          UnresolvedAttribute("department")))
     val projectAfterJoin = Project(
       Seq(
         UnresolvedStar(Some(Seq("__auto_generated_subquery_name_s"))),
@@ -88,26 +87,24 @@ class FlintSparkPPLLookupITSuite
 
   test("test LOOKUP lookupTable uid AS id APPEND department") {
     val frame = sql(s"source = $sourceTable| LOOKUP $lookupTable uid AS id APPEND department")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
       Row(1000, "Jake", "Engineer", "England", 100000, "IT"),
       Row(1001, "Hello", "Artist", "USA", 70000, null),
       Row(1002, "John", "Doctor", "Canada", 120000, "DATA"),
       Row(1003, "David", "Doctor", null, 120000, "HR"),
       Row(1004, "David", null, "Canada", 0, null),
       Row(1005, "Jane", "Scientist", "Canada", 90000, "DATA"))
-
-    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
-    assert(results.sorted.sameElements(expectedResults.sorted))
+    assertSameRows(expectedResults, frame)
 
     val lookupProject =
       Project(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("uid")), lookupAlias)
     val joinCondition = EqualTo(UnresolvedAttribute("uid"), UnresolvedAttribute("id"))
     val joinPlan = Join(sourceAlias, lookupProject, LeftOuter, Some(joinCondition), JoinHint.NONE)
     val coalesceExpr =
-      Coalesce(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("department")))
+      Coalesce(
+        Seq(
+          UnresolvedAttribute("department"),
+          UnresolvedAttribute("__auto_generated_subquery_name_l.department")))
     val coalesceForSafeExpr = Coalesce(Seq(coalesceExpr, UnresolvedAttribute("department")))
     val projectAfterJoin = Project(
       Seq(
@@ -128,26 +125,24 @@ class FlintSparkPPLLookupITSuite
   test("test LOOKUP lookupTable uid AS id REPLACE department AS country") {
     val frame =
       sql(s"source = $sourceTable| LOOKUP $lookupTable uid AS id REPLACE department AS country")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
       Row(1000, "Jake", "Engineer", 100000, "IT"),
       Row(1001, "Hello", "Artist", 70000, "USA"),
       Row(1002, "John", "Doctor", 120000, "DATA"),
       Row(1003, "David", "Doctor", 120000, "HR"),
       Row(1004, "David", null, 0, "Canada"),
       Row(1005, "Jane", "Scientist", 90000, "DATA"))
-
-    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
-    assert(results.sorted.sameElements(expectedResults.sorted))
+    assertSameRows(expectedResults, frame)
 
     val lookupProject =
       Project(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("uid")), lookupAlias)
     val joinCondition = EqualTo(UnresolvedAttribute("uid"), UnresolvedAttribute("id"))
     val joinPlan = Join(sourceAlias, lookupProject, LeftOuter, Some(joinCondition), JoinHint.NONE)
     val coalesceForSafeExpr =
-      Coalesce(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("country")))
+      Coalesce(
+        Seq(
+          UnresolvedAttribute("__auto_generated_subquery_name_l.department"),
+          UnresolvedAttribute("__auto_generated_subquery_name_s.country")))
     val projectAfterJoin = Project(
       Seq(
         UnresolvedStar(Some(Seq("__auto_generated_subquery_name_s"))),
@@ -167,24 +162,26 @@ class FlintSparkPPLLookupITSuite
   test("test LOOKUP lookupTable uid AS id APPEND department AS country") {
     val frame =
       sql(s"source = $sourceTable| LOOKUP $lookupTable uid AS id APPEND department AS country")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
       Row(1000, "Jake", "Engineer", 100000, "England"),
       Row(1001, "Hello", "Artist", 70000, "USA"),
       Row(1002, "John", "Doctor", 120000, "Canada"),
       Row(1003, "David", "Doctor", 120000, "HR"),
       Row(1004, "David", null, 0, "Canada"),
       Row(1005, "Jane", "Scientist", 90000, "Canada"))
+    assertSameRows(expectedResults, frame)
 
     val lookupProject =
       Project(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("uid")), lookupAlias)
     val joinCondition = EqualTo(UnresolvedAttribute("uid"), UnresolvedAttribute("id"))
     val joinPlan = Join(sourceAlias, lookupProject, LeftOuter, Some(joinCondition), JoinHint.NONE)
     val coalesceExpr =
-      Coalesce(Seq(UnresolvedAttribute("country"), UnresolvedAttribute("department")))
-    val coalesceForSafeExpr = Coalesce(Seq(coalesceExpr, UnresolvedAttribute("country")))
+      Coalesce(
+        Seq(
+          UnresolvedAttribute("__auto_generated_subquery_name_s.country"),
+          UnresolvedAttribute("__auto_generated_subquery_name_l.department")))
+    val coalesceForSafeExpr =
+      Coalesce(Seq(coalesceExpr, UnresolvedAttribute("__auto_generated_subquery_name_s.country")))
     val projectAfterJoin = Project(
       Seq(
         UnresolvedStar(Some(Seq("__auto_generated_subquery_name_s"))),
@@ -204,19 +201,15 @@ class FlintSparkPPLLookupITSuite
   test("test LOOKUP lookupTable uid AS id, name REPLACE department") {
     val frame =
       sql(s"source = $sourceTable| LOOKUP $lookupTable uID AS id, name REPLACE department")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
       Row(1000, "Jake", "Engineer", "England", 100000, "IT"),
       Row(1001, "Hello", "Artist", "USA", 70000, null),
       Row(1002, "John", "Doctor", "Canada", 120000, "DATA"),
       Row(1003, "David", "Doctor", null, 120000, "HR"),
       Row(1004, "David", null, "Canada", 0, null),
       Row(1005, "Jane", "Scientist", "Canada", 90000, "DATA"))
+    assertSameRows(expectedResults, frame)
 
-    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
-    assert(results.sorted.sameElements(expectedResults.sorted))
     val lookupProject =
       Project(
         Seq(
@@ -232,7 +225,10 @@ class FlintSparkPPLLookupITSuite
           UnresolvedAttribute("__auto_generated_subquery_name_s.name")))
     val joinPlan = Join(sourceAlias, lookupProject, LeftOuter, Some(joinCondition), JoinHint.NONE)
     val coalesceForSafeExpr =
-      Coalesce(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("department")))
+      Coalesce(
+        Seq(
+          UnresolvedAttribute("__auto_generated_subquery_name_l.department"),
+          UnresolvedAttribute("department")))
     val projectAfterJoin = Project(
       Seq(
         UnresolvedStar(Some(Seq("__auto_generated_subquery_name_s"))),
@@ -253,19 +249,14 @@ class FlintSparkPPLLookupITSuite
   test("test LOOKUP lookupTable uid AS id, name APPEND department") {
     val frame =
       sql(s"source = $sourceTable| LOOKUP $lookupTable uid AS ID, name APPEND department")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
       Row(1000, "Jake", "Engineer", "England", 100000, "IT"),
       Row(1001, "Hello", "Artist", "USA", 70000, null),
       Row(1002, "John", "Doctor", "Canada", 120000, "DATA"),
       Row(1003, "David", "Doctor", null, 120000, "HR"),
       Row(1004, "David", null, "Canada", 0, null),
       Row(1005, "Jane", "Scientist", "Canada", 90000, "DATA"))
-
-    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
-    assert(results.sorted.sameElements(expectedResults.sorted))
+    assertSameRows(expectedResults, frame)
 
     val lookupProject =
       Project(
@@ -282,7 +273,10 @@ class FlintSparkPPLLookupITSuite
           UnresolvedAttribute("__auto_generated_subquery_name_s.name")))
     val joinPlan = Join(sourceAlias, lookupProject, LeftOuter, Some(joinCondition), JoinHint.NONE)
     val coalesceExpr =
-      Coalesce(Seq(UnresolvedAttribute("department"), UnresolvedAttribute("department")))
+      Coalesce(
+        Seq(
+          UnresolvedAttribute("department"),
+          UnresolvedAttribute("__auto_generated_subquery_name_l.department")))
     val coalesceForSafeExpr = Coalesce(Seq(coalesceExpr, UnresolvedAttribute("department")))
     val projectAfterJoin = Project(
       Seq(
@@ -303,38 +297,29 @@ class FlintSparkPPLLookupITSuite
 
   test("test LOOKUP lookupTable uid AS id, name") {
     val frame = sql(s"source = $sourceTable| LOOKUP $lookupTable uID AS id, name")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
-      Row(1000, "Jake", "Engineer", "England", 100000, 1000, "Jake", "IT", "Engineer"),
-      Row(1001, "Hello", "Artist", "USA", 70000, null, null, null, null),
-      Row(1002, "John", "Doctor", "Canada", 120000, 1002, "John", "DATA", "Scientist"),
-      Row(1003, "David", "Doctor", null, 120000, 1003, "David", "HR", "Doctor"),
-      Row(1004, "David", null, "Canada", 0, null, null, null, null),
-      Row(1005, "Jane", "Scientist", "Canada", 90000, 1005, "Jane", "DATA", "Engineer"))
-
-    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
-    assert(results.sorted.sameElements(expectedResults.sorted))
+      Row(1000, "Jake", "England", 100000, "IT", "Engineer"),
+      Row(1001, "Hello", "USA", 70000, null, null),
+      Row(1002, "John", "Canada", 120000, "DATA", "Scientist"),
+      Row(1003, "David", null, 120000, "HR", "Doctor"),
+      Row(1004, "David", "Canada", 0, null, null),
+      Row(1005, "Jane", "Canada", 90000, "DATA", "Engineer"))
+
+    assertSameRows(expectedResults, frame)
   }
 
   test("test LOOKUP lookupTable name REPLACE occupation") {
     val frame =
       sql(
         s"source = $sourceTable | eval major = occupation | fields id, name, major, country, salary | LOOKUP $lookupTable name REPLACE occupation AS major")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
       Row(1000, "Jake", "England", 100000, "Engineer"),
       Row(1001, "Hello", "USA", 70000, "Artist"),
       Row(1002, "John", "Canada", 120000, "Scientist"),
       Row(1003, "David", null, 120000, "Doctor"),
       Row(1004, "David", "Canada", 0, "Doctor"),
       Row(1005, "Jane", "Canada", 90000, "Engineer"))
-
-    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
-    assert(results.sorted.sameElements(expectedResults.sorted))
+    assertSameRows(expectedResults, frame)
 
     val sourceTbl = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test1"))
     val eval = Project(
@@ -356,7 +341,10 @@ class FlintSparkPPLLookupITSuite
       UnresolvedAttribute("__auto_generated_subquery_name_l.name"))
     val joinPlan = Join(sourceAlias, lookupProject, LeftOuter, Some(joinCondition), JoinHint.NONE)
     val coalesceForSafeExpr =
-      Coalesce(Seq(UnresolvedAttribute("occupation"), UnresolvedAttribute("major")))
+      Coalesce(
+        Seq(
+          UnresolvedAttribute("__auto_generated_subquery_name_l.occupation"),
+          UnresolvedAttribute("major")))
     val projectAfterJoin = Project(
       Seq(
         UnresolvedStar(Some(Seq("__auto_generated_subquery_name_s"))),
@@ -377,19 +365,14 @@ class FlintSparkPPLLookupITSuite
     val frame =
       sql(
         s"source = $sourceTable | eval major = occupation | fields id, name, major, country, salary | LOOKUP $lookupTable name APPEND occupation AS major")
-    // frame.show()
-    // frame.explain(true)
-    val results: Array[Row] = frame.collect()
     val expectedResults: Array[Row] = Array(
       Row(1000, "Jake", "England", 100000, "Engineer"),
       Row(1001, "Hello", "USA", 70000, "Artist"),
       Row(1002, "John", "Canada", 120000, "Doctor"),
       Row(1003, "David", null, 120000, "Doctor"),
       Row(1004, "David", "Canada", 0, "Doctor"),
       Row(1005, "Jane", "Canada", 90000, "Scientist"))
-
-    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Integer](_.getAs[Integer](0))
-    assert(results.sorted.sameElements(expectedResults.sorted))
+    assertSameRows(expectedResults, frame)
 
     val sourceTbl = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test1"))
     val eval = Project(
@@ -411,7 +394,10 @@ class FlintSparkPPLLookupITSuite
       UnresolvedAttribute("__auto_generated_subquery_name_l.name"))
     val joinPlan = Join(sourceAlias, lookupProject, LeftOuter, Some(joinCondition), JoinHint.NONE)
     val coalesceExpr =
-      Coalesce(Seq(UnresolvedAttribute("major"), UnresolvedAttribute("occupation")))
+      Coalesce(
+        Seq(
+          UnresolvedAttribute("major"),
+          UnresolvedAttribute("__auto_generated_subquery_name_l.occupation")))
     val coalesceForSafeExpr =
       Coalesce(Seq(coalesceExpr, UnresolvedAttribute("major")))
     val projectAfterJoin = Project(
@@ -429,4 +415,32 @@ class FlintSparkPPLLookupITSuite
 
     comparePlans(expectedPlan, logicalPlan, checkAnalysis = false)
   }
+
+  test("test LOOKUP lookupTable name") {
+    val frame =
+      sql(s"source = $sourceTable | LOOKUP $lookupTable name")
+    val expectedResults: Array[Row] = Array(
+      Row(1000, "Jake", "England", 100000, 1000, "IT", "Engineer"),
+      Row(1001, "Hello", "USA", 70000, null, null, null),
+      Row(1002, "John", "Canada", 120000, 1002, "DATA", "Scientist"),
+      Row(1003, "David", null, 120000, 1003, "HR", "Doctor"),
+      Row(1004, "David", "Canada", 0, 1003, "HR", "Doctor"),
+      Row(1005, "Jane", "Canada", 90000, 1005, "DATA", "Engineer"))
+    assertSameRows(expectedResults, frame)
+  }
+
+  test("test LOOKUP lookupTable name REPLACE occupation - 2") {
+    val frame =
+      sql(s"source = $sourceTable | LOOKUP $lookupTable name REPLACE occupation")
+    frame.show()
+    frame.explain(true)
+    val expectedResults: Array[Row] = Array(
+      Row(1000, "Jake", "England", 100000, "Engineer"),
+      Row(1001, "Hello", "USA", 70000, "Artist"),
+      Row(1002, "John", "Canada", 120000, "Scientist"),
+      Row(1003, "David", null, 120000, "Doctor"),
+      Row(1004, "David", "Canada", 0, "Doctor"),
+      Row(1005, "Jane", "Canada", 90000, "Engineer"))
+    assertSameRows(expectedResults, frame)
+  }
 }
@@ -368,6 +368,7 @@ public Expression visitWindowFunction(WindowFunction node, CatalystPlanContext c
     @Override
     public Expression visitInSubquery(InSubquery node, CatalystPlanContext outerContext) {
         CatalystPlanContext innerContext = new CatalystPlanContext();
+        innerContext.withSparkSession(outerContext.getSparkSession());
         visitExpressionList(node.getChild(), innerContext);
         Seq<Expression> values = innerContext.retainAllNamedParseExpressions(p -> p);
         UnresolvedPlan outerPlan = node.getQuery();
@@ -387,6 +388,7 @@ public Expression visitInSubquery(InSubquery node, CatalystPlanContext outerCont
     @Override
     public Expression visitScalarSubquery(ScalarSubquery node, CatalystPlanContext context) {
         CatalystPlanContext innerContext = new CatalystPlanContext();
+        innerContext.withSparkSession(context.getSparkSession());
         UnresolvedPlan outerPlan = node.getQuery();
         LogicalPlan subSearch = outerPlan.accept(planVisitor, innerContext);
         Expression scalarSubQuery = ScalarSubquery$.MODULE$.apply(
@@ -402,6 +404,7 @@ public Expression visitScalarSubquery(ScalarSubquery node, CatalystPlanContext c
     @Override
     public Expression visitExistsSubquery(ExistsSubquery node, CatalystPlanContext context) {
         CatalystPlanContext innerContext = new CatalystPlanContext();
+        innerContext.withSparkSession(context.getSparkSession());
         UnresolvedPlan outerPlan = node.getQuery();
         LogicalPlan subSearch = outerPlan.accept(planVisitor, innerContext);
         Expression existsSubQuery = Exists$.MODULE$.apply(

@@ -6,6 +6,7 @@
 package org.opensearch.sql.ppl;
 
 import lombok.Getter;
+import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation;
 import org.apache.spark.sql.catalyst.expressions.AttributeReference;
 import org.apache.spark.sql.catalyst.expressions.Expression;
@@ -38,6 +39,8 @@
  * The context used for Catalyst logical plan.
  */
 public class CatalystPlanContext {
+
+    @Getter private SparkSession sparkSession;
     /**
      * Catalyst relations list
      **/
@@ -283,4 +286,8 @@ public Expression resolveJoinCondition(
         isResolvingJoinCondition = false;
         return result;
     }
+
+    public void withSparkSession(SparkSession sparkSession) {
+        this.sparkSession = sparkSession;
+    }
 }