diff --git a/go/vt/vtgate/planbuilder/abstract/join.go b/go/vt/vtgate/planbuilder/abstract/join.go index 55ad992f3e..b15c0b780d 100644 --- a/go/vt/vtgate/planbuilder/abstract/join.go +++ b/go/vt/vtgate/planbuilder/abstract/join.go @@ -46,22 +46,10 @@ func (j *Join) PushPredicate(expr sqlparser.Expr, semTable *semantics.SemTable) } j.LHS = lhs return j, nil - case deps.IsSolvedBy(j.RHS.TableID()): - // we are looking for predicates like `tbl.col = <>` or `<> = tbl.col`, - // where tbl is on the rhs of the left outer join - if cmp, isCmp := expr.(*sqlparser.ComparisonExpr); isCmp && cmp.Operator != sqlparser.NullSafeEqualOp && - (sqlparser.IsColName(cmp.Left) && semTable.RecursiveDeps(cmp.Left).IsSolvedBy(j.RHS.TableID()) || - sqlparser.IsColName(cmp.Right) && semTable.RecursiveDeps(cmp.Right).IsSolvedBy(j.RHS.TableID())) { - // When the predicate we are pushing is using information from an outer table, we can - // check whether the predicate is "null-intolerant" or not. Null-intolerant in this context means that - // the predicate will not return true if the table columns are null. - // Since an outer join is an inner join with the addition of all the rows from the left-hand side that - // matched no rows on the right-hand, if we are later going to remove all the rows where the right-hand - // side did not match, we might as well turn the join into an inner join. - // This is based on the paper "Canonical Abstraction for Outerjoin Optimization" by J Rao et al - j.LeftJoin = false - } + case deps.IsSolvedBy(j.RHS.TableID()): + j.tryConvertToInnerJoin(expr, semTable) + if !j.LeftJoin { rhs, err := j.RHS.PushPredicate(expr, semTable) if err != nil { @@ -70,19 +58,68 @@ func (j *Join) PushPredicate(expr sqlparser.Expr, semTable *semantics.SemTable) j.RHS = rhs return j, err } + op := &Filter{ Source: j, Predicates: []sqlparser.Expr{expr}, } return op, nil + case deps.IsSolvedBy(j.LHS.TableID().Merge(j.RHS.TableID())): - j.Predicate = sqlparser.AndExpressions(j.Predicate, expr) - return j, nil + j.tryConvertToInnerJoin(expr, semTable) + + if !j.LeftJoin { + j.Predicate = sqlparser.AndExpressions(j.Predicate, expr) + return j, nil + } + + op := &Filter{ + Source: j, + Predicates: []sqlparser.Expr{expr}, + } + return op, nil } return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "Cannot push predicate: %s", sqlparser.String(expr)) } +// When a predicate uses information from an outer table, we can convert from an outer join to an inner join +// if the predicate is "null-intolerant". +// +// Null-intolerant in this context means that the predicate will not be true if the table columns are null. +// +// Since an outer join is an inner join with the addition of all the rows from the left-hand side that +// matched no rows on the right-hand, if we are later going to remove all the rows where the right-hand +// side did not match, we might as well turn the join into an inner join. +// +// This is based on the paper "Canonical Abstraction for Outerjoin Optimization" by J Rao et al +func (j *Join) tryConvertToInnerJoin(expr sqlparser.Expr, semTable *semantics.SemTable) { + if !j.LeftJoin { + return + } + + switch expr := expr.(type) { + case *sqlparser.ComparisonExpr: + if expr.Operator == sqlparser.NullSafeEqualOp { + return + } + + if sqlparser.IsColName(expr.Left) && semTable.RecursiveDeps(expr.Left).IsSolvedBy(j.RHS.TableID()) || + sqlparser.IsColName(expr.Right) && semTable.RecursiveDeps(expr.Right).IsSolvedBy(j.RHS.TableID()) { + j.LeftJoin = false + } + + case *sqlparser.IsExpr: + if expr.Right != sqlparser.IsNotNullOp { + return + } + + if sqlparser.IsColName(expr.Left) && semTable.RecursiveDeps(expr.Left).IsSolvedBy(j.RHS.TableID()) { + j.LeftJoin = false + } + } +} + // TableID implements the Operator interface func (j *Join) TableID() semantics.TableSet { return j.RHS.TableID().Merge(j.LHS.TableID()) diff --git a/go/vt/vtgate/planbuilder/testdata/filter_cases.txt b/go/vt/vtgate/planbuilder/testdata/filter_cases.txt index 83edf5118f..75ef917849 100644 --- a/go/vt/vtgate/planbuilder/testdata/filter_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/filter_cases.txt @@ -5965,6 +5965,114 @@ Gen4 plan same as above } Gen4 plan same as above +# For left joins, where conditions using both sides of the join are not pulled into the join conditions +"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE (user.name = 'Trent Reznor' OR music.genre = 'pop') AND music.user_id = 5" +{ + "QueryType": "SELECT", + "Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE (user.name = 'Trent Reznor' OR music.genre = 'pop') AND music.user_id = 5", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.id from music left join `user` on music.user_id = `user`.id where 1 != 1", + "Query": "select music.id from music left join `user` on music.user_id = `user`.id where music.user_id = 5 and (`user`.`name` = 'Trent Reznor' or music.genre = 'pop')", + "Table": "`user`, music", + "Values": [ + "INT64(5)" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.music", + "user.user" + ] +} +Gen4 plan same as above + +# For left joins, where conditions using both sides of the join are not pulled into the join conditions (swapped order) +"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND (user.name = 'Trent Reznor' OR music.genre = 'pop')" +{ + "QueryType": "SELECT", + "Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND (user.name = 'Trent Reznor' OR music.genre = 'pop')", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.id from music left join `user` on music.user_id = `user`.id where 1 != 1", + "Query": "select music.id from music left join `user` on music.user_id = `user`.id where music.user_id = 5 and (`user`.`name` = 'Trent Reznor' or music.genre = 'pop')", + "Table": "`user`, music", + "Values": [ + "INT64(5)" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.music", + "user.user" + ] +} +Gen4 plan same as above + +# For left joins, null intolerant where conditions using both sides of the join are transformed to inner joins +"SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND music.componist = user.name" +{ + "QueryType": "SELECT", + "Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON music.user_id = user.id WHERE music.user_id = 5 AND music.componist = user.name", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.id from music, `user` where 1 != 1", + "Query": "select music.id from music, `user` where music.user_id = 5 and music.user_id = `user`.id and music.componist = `user`.`name`", + "Table": "`user`, music", + "Values": [ + "INT64(5)" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.music", + "user.user" + ] +} +Gen4 plan same as above + +# For left joins, null intolerant where conditions using `IS NOT NULL` allow outer join simplification +"SELECT music.id FROM music LEFT OUTER JOIN user ON user.id = music.user_id WHERE music.user_id = 5 AND user.id IS NOT NULL" +{ + "QueryType": "SELECT", + "Original": "SELECT music.id FROM music LEFT OUTER JOIN user ON user.id = music.user_id WHERE music.user_id = 5 AND user.id IS NOT NULL", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.id from music, `user` where 1 != 1", + "Query": "select music.id from music, `user` where music.user_id = 5 and `user`.id is not null and `user`.id = music.user_id", + "Table": "`user`, music", + "Values": [ + "INT64(5)" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.music", + "user.user" + ] +} +Gen4 plan same as above + # optimize ORs to IN route op codes #1 "select col from user where id = 1 or id = 2" {