From b628a6003da7e6703a79445fd3b5f5cec88bac29 Mon Sep 17 00:00:00 2001
From: Hari krishna <49222145+pobbatihari@users.noreply.github.com>
Date: Wed, 3 May 2023 14:18:52 +0530
Subject: [PATCH] Wrong results by ORCA when NULL TEST on LOJ (#15358)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Issue:
Orca tries to eliminate self comparisons at preprocessing, but this early optimization
misleading the further expression preprocesing of LOJ. This PR tries to avoid self comparison
check's of WHERE clause predicate when SELECT's logical child is LOJ.
NOTE:
Postgres Executor’s standard, restriction placed in the ON clause is processed before the join,
while a restriction placed in the WHERE clause is processed after the join.
That does not matter with inner joins, but it matters a lot with outer joins.
Setup:
CREATE TABLE t2(c0 int, c1 int not null);
INSERT INTO t2 values(1, 2),(3,4),(5,6),(7,8);
CREATE TABLE t3(c0 int not null, c1 int, c2 int);
SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
c1
----
(0 rows)
explain SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.07 rows=1 width=4) -> Nested Loop (cost=0.00..1324032.07 rows=1 width=4)
Join Filter: true
-> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=4)
Filter: (true IS NULL)
-> Materialize (cost=0.00..431.00 rows=1 width=1)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
-> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1)
Filter: c1 > c2
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows
set optimizer=off;
SET
SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
c1
----
4
8
2
6
(4 rows)
explain SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10044448648.78 rows=1117865000 width=4)
-> Nested Loop Left Join (cost=10000000000.00..10029543782.11 rows=372621667 width=4)
Filter: ((t3.c0 = t3.c0) IS NULL)
-> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=4)
-> Materialize (cost=0.00..834.64 rows=25967 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=4)
-> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=4)
Filter: (c1 > c2)
Optimizer: Postgres query optimizer
(8 rows)
After Fix:
SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
c1
----
6
4
8
2
(4 rows)
explain SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.37 rows=1 width=4)
-> Result (cost=0.00..1324032.37 rows=1 width=4)
Filter: ((t3.c0 = t3.c0) IS NULL)
-> Nested Loop Left Join (cost=0.00..1324032.37 rows=1 width=8)
Join Filter: true
-> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=4)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4)
Filter: (c1 > c2)
Optimizer: Pivotal Optimizer (GPORCA)
(cherry picked from gpdb commit d3dd98c1a8daf04fbf6cb91fc4afa6f91b317e93)
---
.../LOJ-With-Single-Pred-On-Outer.mdp | 395 ++++++++++++++++
.../LOJ_NULLTEST-On-SelfCheck-Pred.mdp | 434 ++++++++++++++++++
.../gpopt/operators/CExpressionPreprocessor.h | 3 +-
.../include/gpopt/operators/CPredicateUtils.h | 6 +-
.../src/operators/CExpressionPreprocessor.cpp | 12 +-
.../src/operators/CPredicateUtils.cpp | 12 +-
.../libgpopt/src/xforms/CXformUtils.cpp | 6 -
src/backend/gporca/server/CMakeLists.txt | 3 +-
src/test/regress/expected/bfv_joins.out | 344 ++++++++++++--
.../regress/expected/bfv_joins_optimizer.out | 353 ++++++++++++--
src/test/regress/sql/bfv_joins.sql | 47 +-
11 files changed, 1533 insertions(+), 82 deletions(-)
create mode 100644 src/backend/gporca/data/dxl/minidump/LOJ-With-Single-Pred-On-Outer.mdp
create mode 100644 src/backend/gporca/data/dxl/minidump/LOJ_NULLTEST-On-SelfCheck-Pred.mdp
diff --git a/src/backend/gporca/data/dxl/minidump/LOJ-With-Single-Pred-On-Outer.mdp b/src/backend/gporca/data/dxl/minidump/LOJ-With-Single-Pred-On-Outer.mdp
new file mode 100644
index 00000000000..fe7cbe252da
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/LOJ-With-Single-Pred-On-Outer.mdp
@@ -0,0 +1,395 @@
+
+
+ Nested Loop Left Join (cost=0.00..1324032.10 rows=1 width=12)
+ Join Filter: (t1.b = 1)
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12)
+ -> Materialize (cost=0.00..431.00 rows=1 width=1)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
+ -> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=1)
+ Optimizer: Pivotal Optimizer (GPORCA)
+ (8 rows)
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/backend/gporca/data/dxl/minidump/LOJ_NULLTEST-On-SelfCheck-Pred.mdp b/src/backend/gporca/data/dxl/minidump/LOJ_NULLTEST-On-SelfCheck-Pred.mdp
new file mode 100644
index 00000000000..93feb939661
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/LOJ_NULLTEST-On-SelfCheck-Pred.mdp
@@ -0,0 +1,434 @@
+
+
+ t2.a WHERE t2.a = t2.a IS NULL;
+ QUERY PLAN
+ ---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.36 rows=1 width=4)
+ -> Result (cost=0.00..1324032.36 rows=1 width=4)
+ Filter: ((t2.a = t2.a) IS NULL)
+ -> Nested Loop Left Join (cost=0.00..1324032.36 rows=1 width=8)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ -> Materialize (cost=0.00..431.00 rows=1 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
+ -> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=4)
+ Filter: (b > a)
+ Optimizer: Pivotal Optimizer (GPORCA)
+ (11 rows)
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h
index 660ad2ae6b8..6b750d2098d 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h
@@ -70,7 +70,8 @@ class CExpressionPreprocessor
// eliminate self comparisons
static CExpression *PexprEliminateSelfComparison(CMemoryPool *mp,
- CExpression *pexpr);
+ CExpression *pexpr,
+ CColRefSet *pcrsNotNull);
// remove CTE Anchor nodes
static CExpression *PexprRemoveCTEAnchors(CMemoryPool *mp,
diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h
index fb17d71b9b8..0c5903864c2 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h
@@ -130,11 +130,13 @@ class CPredicateUtils
static BOOL FPlainEquality(CExpression *pexpr);
// is the given expression a self comparison on some column
- static BOOL FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt);
+ static BOOL FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt,
+ CColRefSet *pcrsNotNull);
// eliminate self comparison if possible
static CExpression *PexprEliminateSelfComparison(CMemoryPool *mp,
- CExpression *pexpr);
+ CExpression *pexpr,
+ CColRefSet *pcrsNotNull);
// is the given expression in the form (col1 Is NOT DISTINCT FROM col2)
static BOOL FINDFScalarIdents(CExpression *pexpr);
diff --git a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
index d9afaf498ce..76000a737f3 100644
--- a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
@@ -70,7 +70,8 @@ using namespace gpopt;
// eliminate self comparisons in the given expression
CExpression *
CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,
- CExpression *pexpr)
+ CExpression *pexpr,
+ CColRefSet *pcrsNotNull)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
@@ -79,7 +80,8 @@ CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,
if (CUtils::FScalarCmp(pexpr))
{
- return CPredicateUtils::PexprEliminateSelfComparison(mp, pexpr);
+ return CPredicateUtils::PexprEliminateSelfComparison(mp, pexpr,
+ pcrsNotNull);
}
// recursively process children
@@ -88,7 +90,7 @@ CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
- PexprEliminateSelfComparison(mp, (*pexpr)[ul]);
+ PexprEliminateSelfComparison(mp, (*pexpr)[ul], pcrsNotNull);
pdrgpexprChildren->Append(pexprChild);
}
@@ -3077,8 +3079,8 @@ CExpressionPreprocessor::PexprPreprocess(
pexprConvert2In->Release();
// (11) eliminate self comparisons
- CExpression *pexprSelfCompEliminated =
- PexprEliminateSelfComparison(mp, pexprInferredPreds);
+ CExpression *pexprSelfCompEliminated = PexprEliminateSelfComparison(
+ mp, pexprInferredPreds, pexprInferredPreds->DeriveNotNullColumns());
GPOS_CHECK_ABORT;
pexprInferredPreds->Release();
diff --git a/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp b/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp
index 2c0d40ba454..b975940110d 100644
--- a/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp
@@ -856,7 +856,8 @@ CPredicateUtils::FPlainEquality(CExpression *pexpr)
// is an expression a self comparison on some column
BOOL
-CPredicateUtils::FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt)
+CPredicateUtils::FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt,
+ CColRefSet *pcrsNotNull)
{
GPOS_ASSERT(nullptr != pexpr);
GPOS_ASSERT(nullptr != pecmpt);
@@ -882,8 +883,8 @@ CPredicateUtils::FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt)
CColRef *colref =
const_cast(CScalarIdent::PopConvert(popLeft)->Pcr());
- return CColRef::EcrtTable == colref->Ecrt() &&
- !CColRefTable::PcrConvert(colref)->IsNullable();
+ // return true if column is a member of NotNull columns(pcrsNotNull) of parent expression
+ return pcrsNotNull->FMember(colref);
}
return false;
@@ -892,14 +893,15 @@ CPredicateUtils::FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt)
// eliminate self comparison and replace it with True or False if possible
CExpression *
CPredicateUtils::PexprEliminateSelfComparison(CMemoryPool *mp,
- CExpression *pexpr)
+ CExpression *pexpr,
+ CColRefSet *pcrsNotNull)
{
GPOS_ASSERT(pexpr->Pop()->FScalar());
pexpr->AddRef();
CExpression *pexprNew = pexpr;
IMDType::ECmpType cmp_type = IMDType::EcmptOther;
- if (FSelfComparison(pexpr, &cmp_type))
+ if (FSelfComparison(pexpr, &cmp_type, pcrsNotNull))
{
switch (cmp_type)
{
diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
index 224d018a26b..96a88a505ba 100644
--- a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
+++ b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
@@ -158,12 +158,6 @@ CXformUtils::ExfpExpandJoinOrder(CExpressionHandle &exprhdl,
return CXform::ExfpNone;
}
-//#ifdef GPOS_DEBUG
- CAutoMemoryPool amp;
- GPOS_ASSERT_FIXME(!FJoinPredOnSingleChild(amp.Pmp(), exprhdl) &&
- "join predicates are not pushed down");
-//#endif // GPOS_DEBUG
-
if (nullptr != exprhdl.Pgexpr())
{
// if handle is attached to a group expression, transformation is applied
diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt
index 924219e0c5e..ce6d0b4a19a 100644
--- a/src/backend/gporca/server/CMakeLists.txt
+++ b/src/backend/gporca/server/CMakeLists.txt
@@ -269,7 +269,8 @@ DPv2GreedyOnly DPv2MinCardOnly DPv2QueryOnly LOJ-PushDown LeftJoinDPv2JoinOrder;
COuterJoin2Test:
LOJ-IsNullPred Select-Proj-OuterJoin OuterJoin-With-OuterRefs Join-Disj-Subqs
EffectOfLocalPredOnJoin EffectOfLocalPredOnJoin2 EffectOfLocalPredOnJoin3
-LeftJoin-UnsupportedFilter-Cardinality LeftOuter2InnerUnionAllAntiSemiJoin;
+LeftJoin-UnsupportedFilter-Cardinality LeftOuter2InnerUnionAllAntiSemiJoin
+LOJ-With-Single-Pred-On-Outer LOJ_NULLTEST-On-SelfCheck-Pred;
COuterJoin3Test:
LOJ_IDF_no_convert_outer_ref_predicate_with_NULL
diff --git a/src/test/regress/expected/bfv_joins.out b/src/test/regress/expected/bfv_joins.out
index 944249df281..6d82f60dc39 100644
--- a/src/test/regress/expected/bfv_joins.out
+++ b/src/test/regress/expected/bfv_joins.out
@@ -11,13 +11,16 @@ create table y (a int, b int, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into y (select * from x);
-CREATE TABLE t1 (a int, b int);
-NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
+CREATE TABLE t1 (a int, b int, c int not null);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE t2 (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-INSERT INTO t1 VALUES (1,1),(2,1),(3,NULL);
+CREATE TABLE t3 (a int not null, b int, c int);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO t1 VALUES (1,1,1),(2,1,2),(3,NULL,3);
INSERT INTO t2 VALUES (2,3);
CREATE FUNCTION func_x(x int) RETURNS int AS $$
BEGIN
@@ -621,38 +624,313 @@ SELECT * from x left join y on True where func_x(y.a) > 0;
(100 rows)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS DISTINCT FROM t2.b;
- a | b | a | b
----+---+---+---
- 1 | 1 | |
- 2 | 1 | 2 | 3
+ a | b | c | a | b
+---+---+---+---+---
+ 2 | 1 | 2 | 2 | 3
+ 1 | 1 | 1 | |
(2 rows)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS DISTINCT FROM NULL;
- a | b | a | b
----+---+---+---
- 1 | 1 | |
- 2 | 1 | 2 | 3
+ a | b | c | a | b
+---+---+---+---+---
+ 2 | 1 | 2 | 2 | 3
+ 1 | 1 | 1 | |
(2 rows)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t2.b IS DISTINCT FROM NULL;
- a | b | a | b
----+---+---+---
- 2 | 1 | 2 | 3
+ a | b | c | a | b
+---+---+---+---+---
+ 2 | 1 | 2 | 2 | 3
(1 row)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t2.b IS NOT DISTINCT FROM NULL;
- a | b | a | b
----+---+---+---
- 1 | 1 | |
- 3 | | |
+ a | b | c | a | b
+---+---+---+---+---
+ 1 | 1 | 1 | |
+ 3 | | 3 | |
(2 rows)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS NOT DISTINCT FROM NULL;
- a | b | a | b
----+---+---+---
- 3 | | |
+ a | b | c | a | b
+---+---+---+---+---
+ 3 | | 3 | |
(1 row)
+--- Tests for LOJ with single predicate uses columns of outer child only
+explain select t1.* from t1 left outer join t3 on t1.b=1;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10059962503.02 rows=6068410 width=12)
+ -> Nested Loop Left Join (cost=10000000000.00..10059881590.89 rows=2022803 width=12)
+ Join Filter: (t1.b = 1)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=12)
+ -> Materialize (cost=0.00..1721.83 rows=77900 width=0)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1332.33 rows=77900 width=0)
+ -> Seq Scan on t3 (cost=0.00..293.67 rows=25967 width=0)
+ Optimizer: Postgres query optimizer
+(8 rows)
+
+select t1.* from t1 left outer join t3 on t1.b=1;
+ a | b | c
+---+---+---
+ 1 | 1 | 1
+ 2 | 1 | 2
+ 3 | | 3
+(3 rows)
+
+explain select t1.* from t1 left outer join t3 on t1.c=1;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10059962503.02 rows=6068410 width=12)
+ -> Nested Loop Left Join (cost=10000000000.00..10059881590.89 rows=2022803 width=12)
+ Join Filter: (t1.c = 1)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=12)
+ -> Materialize (cost=0.00..1721.83 rows=77900 width=0)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1332.33 rows=77900 width=0)
+ -> Seq Scan on t3 (cost=0.00..293.67 rows=25967 width=0)
+ Optimizer: Postgres query optimizer
+(8 rows)
+
+select t1.* from t1 left outer join t3 on t1.c=1;
+ a | b | c
+---+---+---
+ 1 | 1 | 1
+ 2 | 1 | 2
+ 3 | | 3
+(3 rows)
+
+--- Tests for LOJ with null-filtering on self check conditions.
+--- make sure that we dont optimize the equality checks of inner table of LOJ.
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10040215447.37 rows=1011401667 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10026730091.81 rows=337133889 width=4)
+ Filter: ((t3.a = t3.a) IS NULL)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=4)
+ -> Materialize (cost=0.00..834.64 rows=25967 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=4)
+ -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=4)
+ Filter: (b > a)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t1.c = t1.c IS NULL;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000052373.42 rows=2022803 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10000025402.71 rows=674268 width=4)
+ -> Seq Scan on t1 (cost=0.00..358.58 rows=26 width=4)
+ Filter: ((c = c) IS NULL)
+ -> Materialize (cost=0.00..834.64 rows=25967 width=0)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=0)
+ -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=0)
+ Filter: (b > a)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t1.c = t1.c IS NULL;
+ c
+---
+(0 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t3.b=2;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000002619.50 rows=101101 width=4)
+ -> Nested Loop (cost=10000000000.00..10000001271.49 rows=33700 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..553.36 rows=1 width=0)
+ -> Seq Scan on t3 (cost=0.00..553.33 rows=1 width=0)
+ Filter: ((b > a) AND ((a = a) IS NULL) AND (2 > a) AND (b = 2))
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=4)
+ Optimizer: Postgres query optimizer
+(7 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t3.a=2;
+ c
+---
+(0 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t1.b=1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000040573.74 rows=1011402 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10000027088.38 rows=337134 width=4)
+ Filter: ((t3.a = t3.a) IS NULL)
+ -> Seq Scan on t1 (cost=0.00..358.58 rows=26 width=4)
+ Filter: (b = 1)
+ -> Materialize (cost=0.00..834.64 rows=25967 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=4)
+ -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=4)
+ Filter: (b > a)
+ Optimizer: Postgres query optimizer
+(10 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t1.b=1;
+ c
+---
+ 2
+ 1
+(2 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.a is NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10046958125.15 rows=1517102500 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10026730091.81 rows=505700833 width=4)
+ Filter: (((t3.a = t3.a) IS NULL) OR (t3.a IS NULL))
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=4)
+ -> Materialize (cost=0.00..834.64 rows=25967 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=4)
+ -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=4)
+ Filter: (b > a)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.a is NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.b=2;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10041914602.17 rows=1012413068 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10028415761.26 rows=337471023 width=4)
+ Filter: (((t3.a = t3.a) IS NULL) OR (t3.b = 2))
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=4)
+ -> Materialize (cost=0.00..834.64 rows=25967 width=8)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=8)
+ -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=8)
+ Filter: (b > a)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.b=2;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t1.a=1;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10041914602.17 rows=1012413068 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10028415761.26 rows=337471023 width=4)
+ Filter: (((t3.a = t3.a) IS NULL) OR (t1.a = 1))
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=8)
+ -> Materialize (cost=0.00..834.64 rows=25967 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=4)
+ -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=4)
+ Filter: (b > a)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t1.a=1;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t.cc = t.cc IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10110451674.22 rows=3034205000 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10069995607.56 rows=1011401667 width=4)
+ Join Filter: (((t1.a + t1.b) = (t1.a + t1.b)) IS NULL)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=12)
+ -> Materialize (cost=0.00..1721.83 rows=77900 width=0)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1332.33 rows=77900 width=0)
+ -> Seq Scan on t3 (cost=0.00..293.67 rows=25967 width=0)
+ Optimizer: Postgres query optimizer
+(8 rows)
+
+SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t.cc = t.cc IS NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t3.a > t3.b where t.cc = t.cc IS NULL;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000052503.26 rows=2022803 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10000025532.55 rows=674268 width=4)
+ -> Seq Scan on t1 (cost=0.00..488.42 rows=26 width=4)
+ Filter: (((a + b) = (a + b)) IS NULL)
+ -> Materialize (cost=0.00..834.64 rows=25967 width=0)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=0)
+ -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=0)
+ Filter: (a > b)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t3.a > t3.b where t.cc = t.cc IS NULL;
+ c
+---
+ 3
+(1 row)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.cc = t.cc IS NULL;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10012810957.32 rows=6068410 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10012730045.19 rows=2022803 width=4)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=4)
+ -> Materialize (cost=0.00..489.84 rows=78 width=0)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..489.46 rows=78 width=0)
+ -> Seq Scan on t3 (cost=0.00..488.42 rows=26 width=0)
+ Filter: (((a + b) = (a + b)) IS NULL)
+ Optimizer: Postgres query optimizer
+(8 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.cc = t.cc IS NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10043586786.26 rows=1011401667 width=4)
+ -> Nested Loop Left Join (cost=10000000000.00..10030101430.70 rows=337133889 width=4)
+ Filter: (((t3.a + t3.b) = (t3.a + t3.b)) IS NULL)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=4)
+ -> Materialize (cost=0.00..834.64 rows=25967 width=8)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..704.81 rows=25967 width=8)
+ -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=8)
+ Filter: (b > a)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
-- Test for unexpected NLJ qual
--
explain select 1 as mrs_t1 where 1 <= ALL (select x from z);
@@ -3355,12 +3633,12 @@ SET optimizer_enable_nljoin=off;
EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a;
QUERY PLAN
------------------------------------------------------------------------------------------
- Gather Motion 3:1 (slice1; segments: 3) (cost=679.75..227587.25 rows=7413210 width=16)
- -> Hash Join (cost=679.75..128744.45 rows=2471070 width=16)
- Hash Cond: (t1.a = t2.a)
- -> Seq Scan on t1 (cost=0.00..321.00 rows=28700 width=8)
- -> Hash (cost=321.00..321.00 rows=28700 width=8)
- -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8)
+ Gather Motion 3:1 (slice1; segments: 3) (cost=618.25..205952.92 rows=6707190 width=20)
+ -> Hash Join (cost=618.25..116523.72 rows=2235730 width=20)
+ Hash Cond: (t2.a = t1.a)
+ -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8)
+ -> Hash (cost=293.67..293.67 rows=25967 width=12)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=12)
Optimizer: Postgres query optimizer
(7 rows)
@@ -3368,12 +3646,12 @@ SET optimizer_enable_nljoin=on;
EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a;
QUERY PLAN
------------------------------------------------------------------------------------------
- Gather Motion 3:1 (slice1; segments: 3) (cost=679.75..227587.25 rows=7413210 width=16)
- -> Hash Join (cost=679.75..128744.45 rows=2471070 width=16)
- Hash Cond: (t1.a = t2.a)
- -> Seq Scan on t1 (cost=0.00..321.00 rows=28700 width=8)
- -> Hash (cost=321.00..321.00 rows=28700 width=8)
- -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8)
+ Gather Motion 3:1 (slice1; segments: 3) (cost=618.25..205952.92 rows=6707190 width=20)
+ -> Hash Join (cost=618.25..116523.72 rows=2235730 width=20)
+ Hash Cond: (t2.a = t1.a)
+ -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8)
+ -> Hash (cost=293.67..293.67 rows=25967 width=12)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=12)
Optimizer: Postgres query optimizer
(7 rows)
diff --git a/src/test/regress/expected/bfv_joins_optimizer.out b/src/test/regress/expected/bfv_joins_optimizer.out
index 4fc2dd47357..9a337e1273c 100644
--- a/src/test/regress/expected/bfv_joins_optimizer.out
+++ b/src/test/regress/expected/bfv_joins_optimizer.out
@@ -11,13 +11,16 @@ create table y (a int, b int, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into y (select * from x);
-CREATE TABLE t1 (a int, b int);
-NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
+CREATE TABLE t1 (a int, b int, c int not null);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE t2 (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-INSERT INTO t1 VALUES (1,1),(2,1),(3,NULL);
+CREATE TABLE t3 (a int not null, b int, c int);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO t1 VALUES (1,1,1),(2,1,2),(3,NULL,3);
INSERT INTO t2 VALUES (2,3);
CREATE FUNCTION func_x(x int) RETURNS int AS $$
BEGIN
@@ -618,38 +621,332 @@ SELECT * from x left join y on True where func_x(y.a) > 0;
(100 rows)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS DISTINCT FROM t2.b;
- a | b | a | b
----+---+---+---
- 1 | 1 | |
- 2 | 1 | 2 | 3
+ a | b | c | a | b
+---+---+---+---+---
+ 2 | 1 | 2 | 2 | 3
+ 1 | 1 | 1 | |
(2 rows)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS DISTINCT FROM NULL;
- a | b | a | b
----+---+---+---
- 1 | 1 | |
- 2 | 1 | 2 | 3
+ a | b | c | a | b
+---+---+---+---+---
+ 1 | 1 | 1 | |
+ 2 | 1 | 2 | 2 | 3
(2 rows)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t2.b IS DISTINCT FROM NULL;
- a | b | a | b
----+---+---+---
- 2 | 1 | 2 | 3
+ a | b | c | a | b
+---+---+---+---+---
+ 2 | 1 | 2 | 2 | 3
(1 row)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t2.b IS NOT DISTINCT FROM NULL;
- a | b | a | b
----+---+---+---
- 1 | 1 | |
- 3 | | |
+ a | b | c | a | b
+---+---+---+---+---
+ 3 | | 3 | |
+ 1 | 1 | 1 | |
(2 rows)
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS NOT DISTINCT FROM NULL;
- a | b | a | b
----+---+---+---
- 3 | | |
+ a | b | c | a | b
+---+---+---+---+---
+ 3 | | 3 | |
(1 row)
+--- Tests for LOJ with single predicate uses columns of outer child only
+explain select t1.* from t1 left outer join t3 on t1.b=1;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.10 rows=1 width=12)
+ -> Nested Loop Left Join (cost=0.00..1324032.10 rows=1 width=12)
+ Join Filter: (t1.b = 1)
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12)
+ -> Materialize (cost=0.00..431.00 rows=1 width=1)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(8 rows)
+
+select t1.* from t1 left outer join t3 on t1.b=1;
+ a | b | c
+---+---+---
+ 2 | 1 | 2
+ 3 | | 3
+ 1 | 1 | 1
+(3 rows)
+
+explain select t1.* from t1 left outer join t3 on t1.c=1;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.10 rows=1 width=12)
+ -> Nested Loop Left Join (cost=0.00..1324032.10 rows=1 width=12)
+ Join Filter: (t1.c = 1)
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12)
+ -> Materialize (cost=0.00..431.00 rows=1 width=1)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(8 rows)
+
+select t1.* from t1 left outer join t3 on t1.c=1;
+ a | b | c
+---+---+---
+ 2 | 1 | 2
+ 3 | | 3
+ 1 | 1 | 1
+(3 rows)
+
+--- Tests for LOJ with null-filtering on self check conditions.
+--- make sure that we dont optimize the equality checks of inner table of LOJ.
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.37 rows=1 width=4)
+ -> Result (cost=0.00..1324032.37 rows=1 width=4)
+ Filter: ((t3.a = t3.a) IS NULL)
+ -> Nested Loop Left Join (cost=0.00..1324032.37 rows=1 width=8)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ -> Materialize (cost=0.00..431.00 rows=1 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4)
+ Filter: (b > a)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(11 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL;
+ c
+---
+ 1
+ 2
+ 3
+(3 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t1.c = t1.c IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.14 rows=2 width=4)
+ -> Nested Loop Left Join (cost=0.00..1324032.14 rows=1 width=4)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ Filter: (true IS NULL)
+ -> Materialize (cost=0.00..431.00 rows=1 width=1)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1)
+ Filter: (b > a)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(10 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t1.c = t1.c IS NULL;
+ c
+---
+(0 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t3.b=2;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.13 rows=1 width=4)
+ -> Nested Loop (cost=0.00..1324032.13 rows=1 width=4)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ -> Materialize (cost=0.00..431.00 rows=1 width=1)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1)
+ Filter: ((true IS NULL) AND (b = 2) AND (b > a))
+ Optimizer: Pivotal Optimizer (GPORCA)
+(9 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t3.a=2;
+ c
+---
+(0 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t1.b=1;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.38 rows=1 width=4)
+ -> Result (cost=0.00..1324032.38 rows=1 width=4)
+ Filter: ((t3.a = t3.a) IS NULL)
+ -> Nested Loop Left Join (cost=0.00..1324032.38 rows=1 width=8)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ Filter: (b = 1)
+ -> Materialize (cost=0.00..431.00 rows=1 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4)
+ Filter: (b > a)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(12 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t1.b=1;
+ c
+---
+ 1
+ 2
+(2 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.a is NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.37 rows=1 width=4)
+ -> Result (cost=0.00..1324032.37 rows=1 width=4)
+ Filter: (((t3.a = t3.a) IS NULL) OR (t3.a IS NULL))
+ -> Nested Loop Left Join (cost=0.00..1324032.37 rows=1 width=8)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ -> Materialize (cost=0.00..431.00 rows=1 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4)
+ Filter: (b > a)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(11 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.a is NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.b=2;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.61 rows=1 width=4)
+ -> Result (cost=0.00..1324032.61 rows=1 width=4)
+ Filter: (((t3.a = t3.a) IS NULL) OR (t3.b = 2))
+ -> Nested Loop Left Join (cost=0.00..1324032.61 rows=1 width=12)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ -> Materialize (cost=0.00..431.00 rows=1 width=8)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=8)
+ Filter: (b > a)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(11 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.b=2;
+ c
+---
+ 1
+ 2
+ 3
+(3 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t1.a=1;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.48 rows=1 width=4)
+ -> Result (cost=0.00..1324032.48 rows=1 width=4)
+ Filter: (((t3.a = t3.a) IS NULL) OR (t1.a = 1))
+ -> Nested Loop Left Join (cost=0.00..1324032.48 rows=1 width=12)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=8)
+ -> Materialize (cost=0.00..431.00 rows=1 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4)
+ Filter: (b > a)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(11 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t1.a=1;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t.cc = t.cc IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.13 rows=1 width=4)
+ -> Nested Loop Left Join (cost=0.00..1324032.13 rows=1 width=4)
+ Join Filter: ((((t1.a + t1.b)) = ((t1.a + t1.b))) IS NULL)
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12)
+ -> Materialize (cost=0.00..431.00 rows=1 width=1)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(8 rows)
+
+SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t.cc = t.cc IS NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t3.a > t3.b where t.cc = t.cc IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.19 rows=2 width=4)
+ -> Nested Loop Left Join (cost=0.00..1324032.19 rows=1 width=4)
+ Join Filter: true
+ -> Result (cost=0.00..431.00 rows=1 width=4)
+ Filter: ((((t1.a + t1.b)) = ((t1.a + t1.b))) IS NULL)
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12)
+ -> Materialize (cost=0.00..431.00 rows=1 width=1)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1)
+ Filter: (a > b)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(11 rows)
+
+SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t3.a > t3.b where t.cc = t.cc IS NULL;
+ c
+---
+ 3
+(1 row)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.cc = t.cc IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.09 rows=2 width=4)
+ -> Nested Loop Left Join (cost=0.00..1324032.09 rows=1 width=4)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ -> Materialize (cost=0.00..431.00 rows=1 width=1)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
+ -> Result (cost=0.00..431.00 rows=1 width=1)
+ Filter: ((((t3.a + t3.b)) = ((t3.a + t3.b))) IS NULL)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=8)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(10 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.cc = t.cc IS NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.22 rows=1 width=4)
+ -> Result (cost=0.00..1324032.22 rows=1 width=4)
+ Filter: ((((t3.a + t3.b)) = ((t3.a + t3.b))) IS NULL)
+ -> Nested Loop Left Join (cost=0.00..1324032.22 rows=1 width=8)
+ Join Filter: true
+ -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
+ -> Materialize (cost=0.00..431.00 rows=1 width=4)
+ -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
+ -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=8)
+ Filter: (b > a)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(11 rows)
+
+SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL;
+ c
+---
+ 2
+ 3
+ 1
+(3 rows)
+
-- Test for unexpected NLJ qual
--
explain select 1 as mrs_t1 where 1 <= ALL (select x from z);
@@ -3019,6 +3316,7 @@ EXPLAIN (costs off) SELECT * FROM coercejoin a, coercejoin b WHERE a.a=b.a;
-> Seq Scan on coercejoin
-> Hash
-> Seq Scan on coercejoin coercejoin_1
+ Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
-- Negative test, the join should not be colocated since the cast is a coercion
@@ -3037,6 +3335,7 @@ EXPLAIN (costs off) SELECT * FROM coercejoin a, coercejoin b WHERE a.a::numeric=
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: (coercejoin_1.a)::numeric
-> Seq Scan on coercejoin coercejoin_1
+ Optimizer: Pivotal Optimizer (GPORCA)
(11 rows)
--
@@ -3351,12 +3650,12 @@ SET optimizer_enable_nljoin=off;
EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a;
QUERY PLAN
------------------------------------------------------------------------------------------
- Gather Motion 3:1 (slice1; segments: 3) (cost=679.75..227587.25 rows=7413210 width=16)
- -> Hash Join (cost=679.75..128744.45 rows=2471070 width=16)
- Hash Cond: (t1.a = t2.a)
- -> Seq Scan on t1 (cost=0.00..321.00 rows=28700 width=8)
- -> Hash (cost=321.00..321.00 rows=28700 width=8)
- -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8)
+ Gather Motion 3:1 (slice1; segments: 3) (cost=618.25..205952.92 rows=6707190 width=20)
+ -> Hash Join (cost=618.25..116523.72 rows=2235730 width=20)
+ Hash Cond: (t2.a = t1.a)
+ -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8)
+ -> Hash (cost=293.67..293.67 rows=25967 width=12)
+ -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=12)
Optimizer: Postgres query optimizer
(7 rows)
diff --git a/src/test/regress/sql/bfv_joins.sql b/src/test/regress/sql/bfv_joins.sql
index d85fa11ee26..74aee8eff0a 100644
--- a/src/test/regress/sql/bfv_joins.sql
+++ b/src/test/regress/sql/bfv_joins.sql
@@ -9,10 +9,11 @@ insert into x values (generate_series(1,10), generate_series(1,10), generate_ser
create table y (a int, b int, c int);
insert into y (select * from x);
-CREATE TABLE t1 (a int, b int);
+CREATE TABLE t1 (a int, b int, c int not null);
CREATE TABLE t2 (a int, b int);
+CREATE TABLE t3 (a int not null, b int, c int);
-INSERT INTO t1 VALUES (1,1),(2,1),(3,NULL);
+INSERT INTO t1 VALUES (1,1,1),(2,1,2),(3,NULL,3);
INSERT INTO t2 VALUES (2,3);
CREATE FUNCTION func_x(x int) RETURNS int AS $$
@@ -69,6 +70,48 @@ SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t2.b IS NOT DISTINCT FR
SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS NOT DISTINCT FROM NULL;
+--- Tests for LOJ with single predicate uses columns of outer child only
+explain select t1.* from t1 left outer join t3 on t1.b=1;
+select t1.* from t1 left outer join t3 on t1.b=1;
+
+explain select t1.* from t1 left outer join t3 on t1.c=1;
+select t1.* from t1 left outer join t3 on t1.c=1;
+
+--- Tests for LOJ with null-filtering on self check conditions.
+--- make sure that we dont optimize the equality checks of inner table of LOJ.
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL;
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL;
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t1.c = t1.c IS NULL;
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t1.c = t1.c IS NULL;
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t3.b=2;
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t3.a=2;
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t1.b=1;
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t1.b=1;
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.a is NULL;
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.a is NULL;
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.b=2;
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.b=2;
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t1.a=1;
+SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t1.a=1;
+
+explain SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t.cc = t.cc IS NULL;
+SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t.cc = t.cc IS NULL;
+
+explain SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t3.a > t3.b where t.cc = t.cc IS NULL;
+SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t3.a > t3.b where t.cc = t.cc IS NULL;
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.cc = t.cc IS NULL;
+SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.cc = t.cc IS NULL;
+
+explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL;
+SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL;
+
-- Test for unexpected NLJ qual
--
explain select 1 as mrs_t1 where 1 <= ALL (select x from z);