Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Multi-column IndexScan plan selection fix #1305

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions src/optimizer/rule_impls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//
// Identification: src/optimizer/rule_impls.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -386,17 +386,22 @@ void GetToIndexScan::Transform(
std::vector<oid_t> index_key_column_id_list;
std::vector<ExpressionType> index_expr_type_list;
std::vector<type::Value> index_value_list;
std::unordered_set<oid_t> index_col_set(
index_object->GetKeyAttrs().begin(),
index_object->GetKeyAttrs().end());
for (size_t offset = 0; offset < key_column_id_list.size(); offset++) {
auto col_id = key_column_id_list[offset];
if (index_col_set.find(col_id) != index_col_set.end()) {
index_key_column_id_list.push_back(col_id);

// Only pick the index if the query columns match the index's columns in
// the same order.
auto index_id_list = index_object->GetKeyAttrs();
for (size_t offset = 0; (offset < key_column_id_list.size()) &&
(offset < index_id_list.size());
offset++) {
if (index_id_list[offset] == key_column_id_list[offset]) {
index_key_column_id_list.push_back(key_column_id_list[offset]);
index_expr_type_list.push_back(expr_type_list[offset]);
index_value_list.push_back(value_list[offset]);
} else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should check for an exact same ordering here. For example, if you have an index on column (a, b), and your predicates are "b = 5 and a = 1", then we should be able to use the index scan. However, the check here won't identify that because it requires the order in the predicates to be exactly the same as in the index.

After thinking about this, I actually think that you should just keep the old index_key_column_id_list. You just need to add a flag about whether the lead (highest) column in the index has been referenced in the index. As long as that is true, we should be able to use the index for the scan. Thoughts? @chenboy @vkonagar

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. I also think we don't need to consider order here. The way to fix this issue is letting the cost model compute the correct cost for these indices.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, @chenboy! @vkonagar and I are discussing some implementation details for this on Slack. We've added you into the channel. There's some cost model related issue we think you probably have a better idea on what's going on. Can you take a look at Slack? Thanks!

break;
}
}

// Add transformed plan
if (!index_key_column_id_list.empty()) {
auto index_scan_op = PhysicalIndexScan::make(
Expand Down
86 changes: 86 additions & 0 deletions test/optimizer/optimizer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "planner/update_plan.h"
#include "sql/testing_sql_util.h"
#include "planner/seq_scan_plan.h"
#include "planner/index_scan_plan.h"
#include "planner/abstract_join_plan.h"
#include "planner/hash_join_plan.h"
#include "binder/bind_node_visitor.h"
Expand Down Expand Up @@ -494,5 +495,90 @@ TEST_F(OptimizerTests, ExecuteTaskStackTest) {
ASSERT_GT(timer.GetDuration(), start_time);
}

TEST_F(OptimizerTests, MultiColumnIndexScanPlanTest) {
auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
auto txn = txn_manager.BeginTransaction();
catalog::Catalog::GetInstance()->CreateDatabase(DEFAULT_DB_NAME, txn);
txn_manager.CommitTransaction(txn);

auto tuple_count = 100;

// Create table and insert tuples.
TestingSQLUtil::ExecuteSQLQuery(
"CREATE TABLE test(a INT, b INT, c INT, d INT, e INT);");

for (auto i = 0; i < tuple_count; i++) {
std::stringstream oss;
oss << "INSERT into test VALUES(" << i << "," << i + 1 << "," << i + 2
<< "," << i + 3 << "," << i + 4 << ");";
TestingSQLUtil::ExecuteSQLQuery(oss.str());
}

// Create a multi-column index
TestingSQLUtil::ExecuteSQLQuery("CREATE INDEX Index1 on test(a, c, d, e);");

txn = txn_manager.BeginTransaction();
optimizer::Optimizer optimizer;
auto &peloton_parser = parser::PostgresParser::GetInstance();

auto create_stmt =
peloton_parser.BuildParseTree("SELECT * FROM test where e = 8");
auto plan = optimizer.BuildPelotonPlanTree(create_stmt, DEFAULT_DB_NAME, txn);
EXPECT_EQ(plan->GetPlanNodeType(), PlanNodeType::SEQSCAN);

create_stmt =
peloton_parser.BuildParseTree("SELECT * FROM test where c = 4 and e = 6");
plan = optimizer.BuildPelotonPlanTree(create_stmt, DEFAULT_DB_NAME, txn);
EXPECT_EQ(plan->GetPlanNodeType(), PlanNodeType::SEQSCAN);

create_stmt =
peloton_parser.BuildParseTree("SELECT * FROM test where a = 4 and e = 8");
plan = optimizer.BuildPelotonPlanTree(create_stmt, DEFAULT_DB_NAME, txn);
EXPECT_EQ(plan->GetPlanNodeType(), PlanNodeType::INDEXSCAN);
auto index_scan_plan = static_cast<planner::IndexScanPlan *>(plan.get());
EXPECT_EQ(index_scan_plan->GetKeyColumnIds().size(), 1);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[0], 0);

create_stmt =
peloton_parser.BuildParseTree("SELECT * FROM test where a = 4 and c = 6");
plan = optimizer.BuildPelotonPlanTree(create_stmt, DEFAULT_DB_NAME, txn);
EXPECT_EQ(plan->GetPlanNodeType(), PlanNodeType::INDEXSCAN);
index_scan_plan = static_cast<planner::IndexScanPlan *>(plan.get());
EXPECT_EQ(index_scan_plan->GetKeyColumnIds().size(), 2);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[0], 0);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[1], 2);

create_stmt = peloton_parser.BuildParseTree(
"SELECT * FROM test where a = 4 and c = 6 and d = 7");
plan = optimizer.BuildPelotonPlanTree(create_stmt, DEFAULT_DB_NAME, txn);
EXPECT_EQ(plan->GetPlanNodeType(), PlanNodeType::INDEXSCAN);
index_scan_plan = static_cast<planner::IndexScanPlan *>(plan.get());
EXPECT_EQ(index_scan_plan->GetKeyColumnIds().size(), 3);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[0], 0);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[1], 2);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[2], 3);

create_stmt = peloton_parser.BuildParseTree(
"SELECT * FROM test where a = 4 and b = 5 and c = 6 and d = 7");
plan = optimizer.BuildPelotonPlanTree(create_stmt, DEFAULT_DB_NAME, txn);
EXPECT_EQ(plan->GetPlanNodeType(), PlanNodeType::INDEXSCAN);
index_scan_plan = static_cast<planner::IndexScanPlan *>(plan.get());
EXPECT_EQ(index_scan_plan->GetKeyColumnIds().size(), 1);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[0], 0);

create_stmt = peloton_parser.BuildParseTree(
"SELECT * FROM test where a = 4 and c = 6 and d = 7 and e = 8");
plan = optimizer.BuildPelotonPlanTree(create_stmt, DEFAULT_DB_NAME, txn);
EXPECT_EQ(plan->GetPlanNodeType(), PlanNodeType::INDEXSCAN);
index_scan_plan = static_cast<planner::IndexScanPlan *>(plan.get());
EXPECT_EQ(index_scan_plan->GetKeyColumnIds().size(), 4);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[0], 0);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[1], 2);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[2], 3);
EXPECT_EQ(index_scan_plan->GetKeyColumnIds()[3], 4);

txn_manager.CommitTransaction(txn);
}

} // namespace test
} // namespace peloton