Skip to content

Commit

Permalink
Wrong results by ORCA when NULL TEST on LOJ (#15358)
Browse files Browse the repository at this point in the history
Issue:
    Orca tries to eliminate self comparisons at preprocessing, but this early optimization
    misleading the further expression preprocesing of LOJ. This PR tries to avoid self comparison
    check's of WHERE clause predicate when SELECT's logical child is LOJ.

NOTE:
Postgres Executor’s standard, restriction placed in the ON clause is processed before the join,
while a restriction placed in the WHERE clause is processed after the join.
That does not matter with inner joins, but it matters a lot with outer joins.

Setup:
CREATE TABLE t2(c0 int, c1 int not null);
INSERT INTO t2 values(1, 2),(3,4),(5,6),(7,8);
CREATE TABLE t3(c0 int not null, c1 int, c2 int);

SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
 c1
----
(0 rows)

explain SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1324032.07 rows=1 width=4) ->  Nested Loop  (cost=0.00..1324032.07 rows=1 width=4)
   Join Filter: true
   ->  Seq Scan on t2  (cost=0.00..431.00 rows=1 width=4)
         Filter: (true IS NULL)
   ->  Materialize  (cost=0.00..431.00 rows=1 width=1)
         ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=1)
               ->  Seq Scan on t3  (cost=0.00..431.00 rows=1 width=1)
                     Filter: c1 > c2
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows
set optimizer=off;
SET
SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
 c1
----
  4
  8
  2
  6
(4 rows)
explain SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
                                               QUERY PLAN
---------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)  (cost=10000000000.00..10044448648.78 rows=1117865000 width=4)
   ->  Nested Loop Left Join  (cost=10000000000.00..10029543782.11 rows=372621667 width=4)
         Filter: ((t3.c0 = t3.c0) IS NULL)
         ->  Seq Scan on t2  (cost=0.00..321.00 rows=28700 width=4)
         ->  Materialize  (cost=0.00..834.64 rows=25967 width=4)
               ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..704.81 rows=25967 width=4)
                     ->  Seq Scan on t3  (cost=0.00..358.58 rows=8656 width=4)
                           Filter: (c1 > c2)
 Optimizer: Postgres query optimizer
(8 rows)

After Fix:
SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
 c1
----
  6
  4
  8
  2
(4 rows)
explain SELECT t2.c1 FROM t2 LEFT OUTER JOIN t3 ON t3.c1 > t3.c2 WHERE (t3.c0=t3.c0) IS NULL;
                                               QUERY PLAN
---------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1324032.37 rows=1 width=4)
   ->  Result  (cost=0.00..1324032.37 rows=1 width=4)
         Filter: ((t3.c0 = t3.c0) IS NULL)
         ->  Nested Loop Left Join  (cost=0.00..1324032.37 rows=1 width=8)
               Join Filter: true
               ->  Seq Scan on t2  (cost=0.00..431.00 rows=1 width=4)
               ->  Materialize  (cost=0.00..431.00 rows=1 width=4)
                     ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=4)
                           ->  Seq Scan on t3  (cost=0.00..431.00 rows=1 width=4)
                                 Filter: (c1 > c2)
 Optimizer: Pivotal Optimizer (GPORCA)
(cherry picked from gpdb commit d3dd98c1a8daf04fbf6cb91fc4afa6f91b317e93)
  • Loading branch information
pobbatihari authored and fanfuxiaoran committed Nov 21, 2024
1 parent f92faf0 commit b628a60
Show file tree
Hide file tree
Showing 11 changed files with 1,533 additions and 82 deletions.
395 changes: 395 additions & 0 deletions src/backend/gporca/data/dxl/minidump/LOJ-With-Single-Pred-On-Outer.mdp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ class CExpressionPreprocessor

// eliminate self comparisons
static CExpression *PexprEliminateSelfComparison(CMemoryPool *mp,
CExpression *pexpr);
CExpression *pexpr,
CColRefSet *pcrsNotNull);

// remove CTE Anchor nodes
static CExpression *PexprRemoveCTEAnchors(CMemoryPool *mp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,13 @@ class CPredicateUtils
static BOOL FPlainEquality(CExpression *pexpr);

// is the given expression a self comparison on some column
static BOOL FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt);
static BOOL FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt,
CColRefSet *pcrsNotNull);

// eliminate self comparison if possible
static CExpression *PexprEliminateSelfComparison(CMemoryPool *mp,
CExpression *pexpr);
CExpression *pexpr,
CColRefSet *pcrsNotNull);

// is the given expression in the form (col1 Is NOT DISTINCT FROM col2)
static BOOL FINDFScalarIdents(CExpression *pexpr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ using namespace gpopt;
// eliminate self comparisons in the given expression
CExpression *
CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,
CExpression *pexpr)
CExpression *pexpr,
CColRefSet *pcrsNotNull)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
Expand All @@ -79,7 +80,8 @@ CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,

if (CUtils::FScalarCmp(pexpr))
{
return CPredicateUtils::PexprEliminateSelfComparison(mp, pexpr);
return CPredicateUtils::PexprEliminateSelfComparison(mp, pexpr,
pcrsNotNull);
}

// recursively process children
Expand All @@ -88,7 +90,7 @@ CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprEliminateSelfComparison(mp, (*pexpr)[ul]);
PexprEliminateSelfComparison(mp, (*pexpr)[ul], pcrsNotNull);
pdrgpexprChildren->Append(pexprChild);
}

Expand Down Expand Up @@ -3077,8 +3079,8 @@ CExpressionPreprocessor::PexprPreprocess(
pexprConvert2In->Release();

// (11) eliminate self comparisons
CExpression *pexprSelfCompEliminated =
PexprEliminateSelfComparison(mp, pexprInferredPreds);
CExpression *pexprSelfCompEliminated = PexprEliminateSelfComparison(
mp, pexprInferredPreds, pexprInferredPreds->DeriveNotNullColumns());
GPOS_CHECK_ABORT;
pexprInferredPreds->Release();

Expand Down
12 changes: 7 additions & 5 deletions src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,8 @@ CPredicateUtils::FPlainEquality(CExpression *pexpr)

// is an expression a self comparison on some column
BOOL
CPredicateUtils::FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt)
CPredicateUtils::FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt,
CColRefSet *pcrsNotNull)
{
GPOS_ASSERT(nullptr != pexpr);
GPOS_ASSERT(nullptr != pecmpt);
Expand All @@ -882,8 +883,8 @@ CPredicateUtils::FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt)
CColRef *colref =
const_cast<CColRef *>(CScalarIdent::PopConvert(popLeft)->Pcr());

return CColRef::EcrtTable == colref->Ecrt() &&
!CColRefTable::PcrConvert(colref)->IsNullable();
// return true if column is a member of NotNull columns(pcrsNotNull) of parent expression
return pcrsNotNull->FMember(colref);
}

return false;
Expand All @@ -892,14 +893,15 @@ CPredicateUtils::FSelfComparison(CExpression *pexpr, IMDType::ECmpType *pecmpt)
// eliminate self comparison and replace it with True or False if possible
CExpression *
CPredicateUtils::PexprEliminateSelfComparison(CMemoryPool *mp,
CExpression *pexpr)
CExpression *pexpr,
CColRefSet *pcrsNotNull)
{
GPOS_ASSERT(pexpr->Pop()->FScalar());

pexpr->AddRef();
CExpression *pexprNew = pexpr;
IMDType::ECmpType cmp_type = IMDType::EcmptOther;
if (FSelfComparison(pexpr, &cmp_type))
if (FSelfComparison(pexpr, &cmp_type, pcrsNotNull))
{
switch (cmp_type)
{
Expand Down
6 changes: 0 additions & 6 deletions src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,6 @@ CXformUtils::ExfpExpandJoinOrder(CExpressionHandle &exprhdl,
return CXform::ExfpNone;
}

//#ifdef GPOS_DEBUG
CAutoMemoryPool amp;
GPOS_ASSERT_FIXME(!FJoinPredOnSingleChild(amp.Pmp(), exprhdl) &&
"join predicates are not pushed down");
//#endif // GPOS_DEBUG

if (nullptr != exprhdl.Pgexpr())
{
// if handle is attached to a group expression, transformation is applied
Expand Down
3 changes: 2 additions & 1 deletion src/backend/gporca/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ DPv2GreedyOnly DPv2MinCardOnly DPv2QueryOnly LOJ-PushDown LeftJoinDPv2JoinOrder;
COuterJoin2Test:
LOJ-IsNullPred Select-Proj-OuterJoin OuterJoin-With-OuterRefs Join-Disj-Subqs
EffectOfLocalPredOnJoin EffectOfLocalPredOnJoin2 EffectOfLocalPredOnJoin3
LeftJoin-UnsupportedFilter-Cardinality LeftOuter2InnerUnionAllAntiSemiJoin;
LeftJoin-UnsupportedFilter-Cardinality LeftOuter2InnerUnionAllAntiSemiJoin
LOJ-With-Single-Pred-On-Outer LOJ_NULLTEST-On-SelfCheck-Pred;
COuterJoin3Test:
LOJ_IDF_no_convert_outer_ref_predicate_with_NULL
Expand Down
Loading

0 comments on commit b628a60

Please sign in to comment.