зеркало из https://github.com/github/vitess-gh.git
Fix complex predicates being pulled into `ON` conditions for `LEFT JOIN` statements. (#11317)
* Fix complex predicates being pulled into `ON` conditions for `LEFT JOIN` statements. Also convert `LEFT JOIN` statements with complex predicates into `INNER JOIN` when possible. Signed-off-by: Arthur Schreiber <arthurschreiber@github.com> * Allow outer join simplification for `IS NOT NULL` expressions. Signed-off-by: Arthur Schreiber <arthurschreiber@github.com> Signed-off-by: Arthur Schreiber <arthurschreiber@github.com>
This commit is contained in:
Родитель
bf6aa7c197
Коммит
8af0a03838
|
@ -46,22 +46,10 @@ func (j *Join) PushPredicate(expr sqlparser.Expr, semTable *semantics.SemTable)
|
|||
}
|
||||
j.LHS = lhs
|
||||
return j, nil
|
||||
case deps.IsSolvedBy(j.RHS.TableID()):
|
||||
// we are looking for predicates like `tbl.col = <>` or `<> = tbl.col`,
|
||||
// where tbl is on the rhs of the left outer join
|
||||
if cmp, isCmp := expr.(*sqlparser.ComparisonExpr); isCmp && cmp.Operator != sqlparser.NullSafeEqualOp &&
|
||||
(sqlparser.IsColName(cmp.Left) && semTable.RecursiveDeps(cmp.Left).IsSolvedBy(j.RHS.TableID()) ||
|
||||
sqlparser.IsColName(cmp.Right) && semTable.RecursiveDeps(cmp.Right).IsSolvedBy(j.RHS.TableID())) {
|
||||
// When the predicate we are pushing is using information from an outer table, we can
|
||||
// check whether the predicate is "null-intolerant" or not. Null-intolerant in this context means that
|
||||
// the predicate will not return true if the table columns are null.
|
||||
// Since an outer join is an inner join with the addition of all the rows from the left-hand side that
|
||||
// matched no rows on the right-hand, if we are later going to remove all the rows where the right-hand
|
||||
// side did not match, we might as well turn the join into an inner join.
|
||||
|
||||
// This is based on the paper "Canonical Abstraction for Outerjoin Optimization" by J Rao et al
|
||||
j.LeftJoin = false
|
||||
}
|
||||
case deps.IsSolvedBy(j.RHS.TableID()):
|
||||
j.tryConvertToInnerJoin(expr, semTable)
|
||||
|
||||
if !j.LeftJoin {
|
||||
rhs, err := j.RHS.PushPredicate(expr, semTable)
|
||||
if err != nil {
|
||||
|
@ -70,19 +58,68 @@ func (j *Join) PushPredicate(expr sqlparser.Expr, semTable *semantics.SemTable)
|
|||
j.RHS = rhs
|
||||
return j, err
|
||||
}
|
||||
|
||||
op := &Filter{
|
||||
Source: j,
|
||||
Predicates: []sqlparser.Expr{expr},
|
||||
}
|
||||
return op, nil
|
||||
|
||||
case deps.IsSolvedBy(j.LHS.TableID().Merge(j.RHS.TableID())):
|
||||
j.Predicate = sqlparser.AndExpressions(j.Predicate, expr)
|
||||
return j, nil
|
||||
j.tryConvertToInnerJoin(expr, semTable)
|
||||
|
||||
if !j.LeftJoin {
|
||||
j.Predicate = sqlparser.AndExpressions(j.Predicate, expr)
|
||||
return j, nil
|
||||
}
|
||||
|
||||
op := &Filter{
|
||||
Source: j,
|
||||
Predicates: []sqlparser.Expr{expr},
|
||||
}
|
||||
return op, nil
|
||||
}
|
||||
|
||||
return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "Cannot push predicate: %s", sqlparser.String(expr))
|
||||
}
|
||||
|
||||
// When a predicate uses information from an outer table, we can convert from an outer join to an inner join
|
||||
// if the predicate is "null-intolerant".
|
||||
//
|
||||
// Null-intolerant in this context means that the predicate will not be true if the table columns are null.
|
||||
//
|
||||
// Since an outer join is an inner join with the addition of all the rows from the left-hand side that
|
||||
// matched no rows on the right-hand, if we are later going to remove all the rows where the right-hand
|
||||
// side did not match, we might as well turn the join into an inner join.
|
||||
//
|
||||
// This is based on the paper "Canonical Abstraction for Outerjoin Optimization" by J Rao et al
|
||||
func (j *Join) tryConvertToInnerJoin(expr sqlparser.Expr, semTable *semantics.SemTable) {
|
||||
if !j.LeftJoin {
|
||||
return
|
||||
}
|
||||
|
||||
switch expr := expr.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
if expr.Operator == sqlparser.NullSafeEqualOp {
|
||||
return
|
||||
}
|
||||
|
||||
if sqlparser.IsColName(expr.Left) && semTable.RecursiveDeps(expr.Left).IsSolvedBy(j.RHS.TableID()) ||
|
||||
sqlparser.IsColName(expr.Right) && semTable.RecursiveDeps(expr.Right).IsSolvedBy(j.RHS.TableID()) {
|
||||
j.LeftJoin = false
|
||||
}
|
||||
|
||||
case *sqlparser.IsExpr:
|
||||
if expr.Right != sqlparser.IsNotNullOp {
|
||||
return
|
||||
}
|
||||
|
||||
if sqlparser.IsColName(expr.Left) && semTable.RecursiveDeps(expr.Left).IsSolvedBy(j.RHS.TableID()) {
|
||||
j.LeftJoin = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TableID implements the Operator interface
|
||||
func (j *Join) TableID() semantics.TableSet {
|
||||
return j.RHS.TableID().Merge(j.LHS.TableID())
|
||||
|
|
|
@ -5965,6 +5965,114 @@ Gen4 plan same as above
|
|||
}
|
||||
Gen4 plan same as above
|
||||
|
||||
# For left joins, where conditions using both sides of the join are not pulled into the join conditions
|
||||
"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE (user.name = 'Trent Reznor' OR music.genre = 'pop') AND music.user_id = 5"
|
||||
{
|
||||
"QueryType": "SELECT",
|
||||
"Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE (user.name = 'Trent Reznor' OR music.genre = 'pop') AND music.user_id = 5",
|
||||
"Instructions": {
|
||||
"OperatorType": "Route",
|
||||
"Variant": "EqualUnique",
|
||||
"Keyspace": {
|
||||
"Name": "user",
|
||||
"Sharded": true
|
||||
},
|
||||
"FieldQuery": "select music.id from music left join `user` on music.user_id = `user`.id where 1 != 1",
|
||||
"Query": "select music.id from music left join `user` on music.user_id = `user`.id where music.user_id = 5 and (`user`.`name` = 'Trent Reznor' or music.genre = 'pop')",
|
||||
"Table": "`user`, music",
|
||||
"Values": [
|
||||
"INT64(5)"
|
||||
],
|
||||
"Vindex": "user_index"
|
||||
},
|
||||
"TablesUsed": [
|
||||
"user.music",
|
||||
"user.user"
|
||||
]
|
||||
}
|
||||
Gen4 plan same as above
|
||||
|
||||
# For left joins, where conditions using both sides of the join are not pulled into the join conditions (swapped order)
|
||||
"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND (user.name = 'Trent Reznor' OR music.genre = 'pop')"
|
||||
{
|
||||
"QueryType": "SELECT",
|
||||
"Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND (user.name = 'Trent Reznor' OR music.genre = 'pop')",
|
||||
"Instructions": {
|
||||
"OperatorType": "Route",
|
||||
"Variant": "EqualUnique",
|
||||
"Keyspace": {
|
||||
"Name": "user",
|
||||
"Sharded": true
|
||||
},
|
||||
"FieldQuery": "select music.id from music left join `user` on music.user_id = `user`.id where 1 != 1",
|
||||
"Query": "select music.id from music left join `user` on music.user_id = `user`.id where music.user_id = 5 and (`user`.`name` = 'Trent Reznor' or music.genre = 'pop')",
|
||||
"Table": "`user`, music",
|
||||
"Values": [
|
||||
"INT64(5)"
|
||||
],
|
||||
"Vindex": "user_index"
|
||||
},
|
||||
"TablesUsed": [
|
||||
"user.music",
|
||||
"user.user"
|
||||
]
|
||||
}
|
||||
Gen4 plan same as above
|
||||
|
||||
# For left joins, null intolerant where conditions using both sides of the join are transformed to inner joins
|
||||
"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND music.componist = user.name"
|
||||
{
|
||||
"QueryType": "SELECT",
|
||||
"Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND music.componist = user.name",
|
||||
"Instructions": {
|
||||
"OperatorType": "Route",
|
||||
"Variant": "EqualUnique",
|
||||
"Keyspace": {
|
||||
"Name": "user",
|
||||
"Sharded": true
|
||||
},
|
||||
"FieldQuery": "select music.id from music, `user` where 1 != 1",
|
||||
"Query": "select music.id from music, `user` where music.user_id = 5 and music.user_id = `user`.id and music.componist = `user`.`name`",
|
||||
"Table": "`user`, music",
|
||||
"Values": [
|
||||
"INT64(5)"
|
||||
],
|
||||
"Vindex": "user_index"
|
||||
},
|
||||
"TablesUsed": [
|
||||
"user.music",
|
||||
"user.user"
|
||||
]
|
||||
}
|
||||
Gen4 plan same as above
|
||||
|
||||
# For left joins, null intolerant where conditions using `IS NOT NULL` allow outer join simplification
|
||||
"SELECT music.id FROM music LEFT OUTER JOIN user ON user.id = music.user_id WHERE music.user_id = 5 AND user.id IS NOT NULL"
|
||||
{
|
||||
"QueryType": "SELECT",
|
||||
"Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON user.id = music.user_id WHERE music.user_id = 5 AND user.id IS NOT NULL",
|
||||
"Instructions": {
|
||||
"OperatorType": "Route",
|
||||
"Variant": "EqualUnique",
|
||||
"Keyspace": {
|
||||
"Name": "user",
|
||||
"Sharded": true
|
||||
},
|
||||
"FieldQuery": "select music.id from music, `user` where 1 != 1",
|
||||
"Query": "select music.id from music, `user` where music.user_id = 5 and `user`.id is not null and `user`.id = music.user_id",
|
||||
"Table": "`user`, music",
|
||||
"Values": [
|
||||
"INT64(5)"
|
||||
],
|
||||
"Vindex": "user_index"
|
||||
},
|
||||
"TablesUsed": [
|
||||
"user.music",
|
||||
"user.user"
|
||||
]
|
||||
}
|
||||
Gen4 plan same as above
|
||||
|
||||
# optimize ORs to IN route op codes #1
|
||||
"select col from user where id = 1 or id = 2"
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче