Fix complex predicates being pulled into `ON` conditions for `LEFT JOIN` statements. (#11317)

* Fix complex predicates being pulled into `ON` conditions for `LEFT JOIN` statements.

Also convert `LEFT JOIN` statements with complex predicates into `INNER JOIN` when possible.

Signed-off-by: Arthur Schreiber <arthurschreiber@github.com>

* Allow outer join simplification for `IS NOT NULL` expressions.

Signed-off-by: Arthur Schreiber <arthurschreiber@github.com>

Signed-off-by: Arthur Schreiber <arthurschreiber@github.com>
This commit is contained in:
Arthur Schreiber 2022-09-23 02:22:21 +02:00 коммит произвёл GitHub
Родитель bf6aa7c197
Коммит 8af0a03838
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 162 добавлений и 17 удалений

Просмотреть файл

@ -46,22 +46,10 @@ func (j *Join) PushPredicate(expr sqlparser.Expr, semTable *semantics.SemTable)
}
j.LHS = lhs
return j, nil
case deps.IsSolvedBy(j.RHS.TableID()):
// we are looking for predicates like `tbl.col = <>` or `<> = tbl.col`,
// where tbl is on the rhs of the left outer join
if cmp, isCmp := expr.(*sqlparser.ComparisonExpr); isCmp && cmp.Operator != sqlparser.NullSafeEqualOp &&
(sqlparser.IsColName(cmp.Left) && semTable.RecursiveDeps(cmp.Left).IsSolvedBy(j.RHS.TableID()) ||
sqlparser.IsColName(cmp.Right) && semTable.RecursiveDeps(cmp.Right).IsSolvedBy(j.RHS.TableID())) {
// When the predicate we are pushing is using information from an outer table, we can
// check whether the predicate is "null-intolerant" or not. Null-intolerant in this context means that
// the predicate will not return true if the table columns are null.
// Since an outer join is an inner join with the addition of all the rows from the left-hand side that
// matched no rows on the right-hand, if we are later going to remove all the rows where the right-hand
// side did not match, we might as well turn the join into an inner join.
// This is based on the paper "Canonical Abstraction for Outerjoin Optimization" by J Rao et al
j.LeftJoin = false
}
case deps.IsSolvedBy(j.RHS.TableID()):
j.tryConvertToInnerJoin(expr, semTable)
if !j.LeftJoin {
rhs, err := j.RHS.PushPredicate(expr, semTable)
if err != nil {
@ -70,19 +58,68 @@ func (j *Join) PushPredicate(expr sqlparser.Expr, semTable *semantics.SemTable)
j.RHS = rhs
return j, err
}
op := &Filter{
Source: j,
Predicates: []sqlparser.Expr{expr},
}
return op, nil
case deps.IsSolvedBy(j.LHS.TableID().Merge(j.RHS.TableID())):
j.Predicate = sqlparser.AndExpressions(j.Predicate, expr)
return j, nil
j.tryConvertToInnerJoin(expr, semTable)
if !j.LeftJoin {
j.Predicate = sqlparser.AndExpressions(j.Predicate, expr)
return j, nil
}
op := &Filter{
Source: j,
Predicates: []sqlparser.Expr{expr},
}
return op, nil
}
return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "Cannot push predicate: %s", sqlparser.String(expr))
}
// When a predicate uses information from an outer table, we can convert from an outer join to an inner join
// if the predicate is "null-intolerant".
//
// Null-intolerant in this context means that the predicate will not be true if the table columns are null.
//
// Since an outer join is an inner join with the addition of all the rows from the left-hand side that
// matched no rows on the right-hand, if we are later going to remove all the rows where the right-hand
// side did not match, we might as well turn the join into an inner join.
//
// This is based on the paper "Canonical Abstraction for Outerjoin Optimization" by J Rao et al
func (j *Join) tryConvertToInnerJoin(expr sqlparser.Expr, semTable *semantics.SemTable) {
if !j.LeftJoin {
return
}
switch expr := expr.(type) {
case *sqlparser.ComparisonExpr:
if expr.Operator == sqlparser.NullSafeEqualOp {
return
}
if sqlparser.IsColName(expr.Left) && semTable.RecursiveDeps(expr.Left).IsSolvedBy(j.RHS.TableID()) ||
sqlparser.IsColName(expr.Right) && semTable.RecursiveDeps(expr.Right).IsSolvedBy(j.RHS.TableID()) {
j.LeftJoin = false
}
case *sqlparser.IsExpr:
if expr.Right != sqlparser.IsNotNullOp {
return
}
if sqlparser.IsColName(expr.Left) && semTable.RecursiveDeps(expr.Left).IsSolvedBy(j.RHS.TableID()) {
j.LeftJoin = false
}
}
}
// TableID implements the Operator interface
func (j *Join) TableID() semantics.TableSet {
return j.RHS.TableID().Merge(j.LHS.TableID())

Просмотреть файл

@ -5965,6 +5965,114 @@ Gen4 plan same as above
}
Gen4 plan same as above
# For left joins, where conditions using both sides of the join are not pulled into the join conditions
"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE (user.name = 'Trent Reznor' OR music.genre = 'pop') AND music.user_id = 5"
{
"QueryType": "SELECT",
"Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE (user.name = 'Trent Reznor' OR music.genre = 'pop') AND music.user_id = 5",
"Instructions": {
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select music.id from music left join `user` on music.user_id = `user`.id where 1 != 1",
"Query": "select music.id from music left join `user` on music.user_id = `user`.id where music.user_id = 5 and (`user`.`name` = 'Trent Reznor' or music.genre = 'pop')",
"Table": "`user`, music",
"Values": [
"INT64(5)"
],
"Vindex": "user_index"
},
"TablesUsed": [
"user.music",
"user.user"
]
}
Gen4 plan same as above
# For left joins, where conditions using both sides of the join are not pulled into the join conditions (swapped order)
"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND (user.name = 'Trent Reznor' OR music.genre = 'pop')"
{
"QueryType": "SELECT",
"Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND (user.name = 'Trent Reznor' OR music.genre = 'pop')",
"Instructions": {
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select music.id from music left join `user` on music.user_id = `user`.id where 1 != 1",
"Query": "select music.id from music left join `user` on music.user_id = `user`.id where music.user_id = 5 and (`user`.`name` = 'Trent Reznor' or music.genre = 'pop')",
"Table": "`user`, music",
"Values": [
"INT64(5)"
],
"Vindex": "user_index"
},
"TablesUsed": [
"user.music",
"user.user"
]
}
Gen4 plan same as above
# For left joins, null intolerant where conditions using both sides of the join are transformed to inner joins
"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND music.componist = user.name"
{
"QueryType": "SELECT",
"Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND music.componist = user.name",
"Instructions": {
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select music.id from music, `user` where 1 != 1",
"Query": "select music.id from music, `user` where music.user_id = 5 and music.user_id = `user`.id and music.componist = `user`.`name`",
"Table": "`user`, music",
"Values": [
"INT64(5)"
],
"Vindex": "user_index"
},
"TablesUsed": [
"user.music",
"user.user"
]
}
Gen4 plan same as above
# For left joins, null intolerant where conditions using `IS NOT NULL` allow outer join simplification
"SELECT music.id FROM music LEFT OUTER JOIN user ON user.id = music.user_id WHERE music.user_id = 5 AND user.id IS NOT NULL"
{
"QueryType": "SELECT",
"Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON user.id = music.user_id WHERE music.user_id = 5 AND user.id IS NOT NULL",
"Instructions": {
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select music.id from music, `user` where 1 != 1",
"Query": "select music.id from music, `user` where music.user_id = 5 and `user`.id is not null and `user`.id = music.user_id",
"Table": "`user`, music",
"Values": [
"INT64(5)"
],
"Vindex": "user_index"
},
"TablesUsed": [
"user.music",
"user.user"
]
}
Gen4 plan same as above
# optimize ORs to IN route op codes #1
"select col from user where id = 1 or id = 2"
{