Description
Overview of the Issue
We have run into a query that gets incorrectly broken up by the Query Planner instead of fully being pushed down to MySQL.
Here's a simplified version of that query (using the vschema located at go/vt/vtgate/planbuilder/testdata/vschemas/schema.json
):
SELECT m1.id
FROM music as m1
JOIN (
SELECT max(id) as id
FROM music
WHERE music.user_id = 1234 AND music.foobar NOT IN (
SELECT foobar FROM user_extra WHERE user_extra.user_id = 1234
)
) as m2 ON m1.id = m2.id
The derived table m2
has enough information to be routed to a specific shard (user_extra
and music
have conditions on user_id
, which is a hash
vindex). The join between m1
and m2
happens on a shared lookup vindex.
The above mentioned conditions should be enough to have the query fully merged by the query planner and pushed down completely to MySQL. Instead, the query is broken up. Here's the query plan (taken on main
):
{
"QueryType": "SELECT",
"Original": "SELECT m1.id FROM music as m1 JOIN (SELECT max(id) as id FROM music WHERE music.user_id = 1234 AND music.foobar NOT IN (SELECT foobar FROM user_extra WHERE user_extra.user_id = 1234)) as m2 ON m1.id = m2.id",
"Instructions": {
"OperatorType": "Join",
"Variant": "Join",
"JoinColumnIndexes": "R:0",
"JoinVars": {
"m2_id": 0
},
"TableName": "music_music",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select m2.id from (select max(id) as id from music where 1 != 1) as m2 where 1 != 1",
"Query": "select m2.id from (select max(id) as id from music where music.user_id = 1234 and music.foobar not in (select foobar from user_extra where user_extra.user_id = 1234)) as m2",
"Table": "music",
"Values": [
"1234"
],
"Vindex": "user_index"
},
{
"OperatorType": "VindexLookup",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Values": [
":m2_id"
],
"Vindex": "music_user_map",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "IN",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select `name`, keyspace_id from name_user_vdx where 1 != 1",
"Query": "select `name`, keyspace_id from name_user_vdx where `name` in ::__vals",
"Table": "name_user_vdx",
"Values": [
"::name"
],
"Vindex": "user_index"
},
{
"OperatorType": "Route",
"Variant": "ByDestination",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select m1.id from music as m1 where 1 != 1",
"Query": "select m1.id from music as m1 where m1.id = :m2_id",
"Table": "music"
}
]
}
]
},
"TablesUsed": [
"user.music",
"user.user_extra"
]
}
Variations of this query get merged correctly. For example, a derived table with a nested join:
SELECT m1.id
FROM music as m1
JOIN (
SELECT max(id) as id
FROM music
INNER JOIN user_extra ue ON music.user_id = ue.user_id AND music.foobar = ue.foobar WHERE music.user_id = 1234
) as m2 ON m1.id = m2.id
{
"QueryType": "SELECT",
"Original": "SELECT m1.id FROM music as m1 JOIN (SELECT max(id) as id FROM music INNER JOIN user_extra ue ON music.user_id = ue.user_id AND music.foobar = ue.foobar WHERE music.user_id = 1234) as m2 ON m1.id = m2.id",
"Instructions": {
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select m1.id from (select max(id) as id from music, user_extra as ue where 1 != 1) as m2, music as m1 where 1 != 1",
"Query": "select m1.id from (select max(id) as id from music, user_extra as ue where music.user_id = 1234 and music.user_id = ue.user_id and music.foobar = ue.foobar) as m2, music as m1 where m1.id = m2.id",
"Table": "music, user_extra",
"Values": [
"1234"
],
"Vindex": "user_index"
},
"TablesUsed": [
"user.music",
"user.user_extra"
]
}
The query for the derived table gets merged correctly:
SELECT max(id) as id
FROM music
WHERE music.user_id = 1234 AND music.foobar NOT IN (
SELECT foobar FROM user_extra WHERE user_extra.user_id = 1234
)
{
"QueryType": "SELECT",
"Original": "SELECT max(id) as id FROM music WHERE music.user_id = 1234 AND music.foobar NOT IN (SELECT foobar FROM user_extra WHERE user_extra.user_id = 1234)",
"Instructions": {
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select max(id) as id from music where 1 != 1",
"Query": "select max(id) as id from music where music.user_id = 1234 and music.foobar not in (select foobar from user_extra where user_extra.user_id = 1234)",
"Table": "music",
"Values": [
"1234"
],
"Vindex": "user_index"
},
"TablesUsed": [
"user.music",
"user.user_extra"
]
}
And a join with a derived table without a subquery works fine too:
SELECT m1.id
FROM music as m1
JOIN (
SELECT max(id) as id FROM music WHERE music.user_id = 1234
) as m2 ON m1.id = m2.id
{
"QueryType": "SELECT",
"Original": "SELECT m1.id FROM music as m1 JOIN (SELECT max(id) as id FROM music WHERE music.user_id = 1234) as m2 ON m1.id = m2.id",
"Instructions": {
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select m1.id from (select max(id) as id from music where 1 != 1) as m2, music as m1 where 1 != 1",
"Query": "select m1.id from (select max(id) as id from music where music.user_id = 1234) as m2, music as m1 where m1.id = m2.id",
"Table": "music",
"Values": [
"1234"
],
"Vindex": "user_index"
},
"TablesUsed": [
"user.music"
]
}
Reproduction Steps
N/A
Binary Version
v19 - main
Operating System and Environment details
N/A
Log Fragments
No response