Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 74 additions & 36 deletions src/common/bustub_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//
// Identification: src/common/bustub_ddl.cpp
//
// Copyright (c) 2015-2025, Carnegie Mellon University Database Group
// Copyright (c) 2015-2026, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -37,6 +37,7 @@
#include "common/util/string_util.h"
#include "concurrency/lock_manager.h"
#include "concurrency/transaction.h"
#include "container/hash/hash_function.h"
#include "execution/execution_engine.h"
#include "execution/executor_context.h"
#include "execution/executors/mock_scan_executor.h"
Expand All @@ -50,6 +51,7 @@
#include "recovery/log_manager.h"
#include "storage/disk/disk_manager.h"
#include "storage/disk/disk_manager_memory.h"
#include "storage/index/generic_key.h"
#include "type/value_factory.h"

namespace bustub {
Expand All @@ -69,18 +71,39 @@ void BusTubInstance::HandleCreateStatement(Transaction *txn, const CreateStateme
}
auto key_schema = Schema::CopySchema(&info->schema_, col_ids);

// TODO(spring2023): If you want to support composite index key for leaderboard optimization, remove this assertion
// and create index with different key type that can hold multiple keys based on number of index columns.
//
// You can also create clustered index that directly stores value inside the index by modifying the value type.

if (col_ids.empty() || col_ids.size() > 2) {
throw NotImplementedException("only support creating index with exactly one or two columns");
if (col_ids.empty()) {
throw NotImplementedException("Primary key cannot be empty");
}

index = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.table_ + "_pk", stmt.table_, info->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, true);
// We compute the size (in bytes) of the index key
uint32_t key_size = col_ids.size() * 4;

// We create an index of sufficient size depending on the key size.
//! NOTE: Currently, we support key sizes of at most 64 bytes.
//! This can be easily extended to support larger key sizes.
if (key_size <= 4) {
index = catalog_->CreateIndex<GenericKey<4>, RID, GenericComparator<4>>(txn, stmt.table_ + "_pk", stmt.table_,
info->schema_, key_schema, col_ids, 4,
HashFunction<GenericKey<4>>{}, true);
} else if (key_size <= 8) {
index = catalog_->CreateIndex<GenericKey<8>, RID, GenericComparator<8>>(txn, stmt.table_ + "_pk", stmt.table_,
info->schema_, key_schema, col_ids, 8,
HashFunction<GenericKey<8>>{}, true);
} else if (key_size <= 16) {
index = catalog_->CreateIndex<GenericKey<16>, RID, GenericComparator<16>>(txn, stmt.table_ + "_pk", stmt.table_,
info->schema_, key_schema, col_ids, 16,
HashFunction<GenericKey<16>>{}, true);
} else if (key_size <= 32) {
index = catalog_->CreateIndex<GenericKey<32>, RID, GenericComparator<32>>(txn, stmt.table_ + "_pk", stmt.table_,
info->schema_, key_schema, col_ids, 32,
HashFunction<GenericKey<32>>{}, true);
} else if (key_size <= 64) {
index = catalog_->CreateIndex<GenericKey<64>, RID, GenericComparator<64>>(txn, stmt.table_ + "_pk", stmt.table_,
info->schema_, key_schema, col_ids, 64,
HashFunction<GenericKey<64>>{}, true);
} else {
throw NotImplementedException("Unsupported: primary key size exceeds 64 bytes");
}
}
l.unlock();

Expand Down Expand Up @@ -108,41 +131,56 @@ void BusTubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement
}
auto key_schema = Schema::CopySchema(&stmt.table_->schema_, col_ids);

// TODO(spring2023): If you want to support composite index key for leaderboard optimization, remove this assertion
// and create index with different key type that can hold multiple keys based on number of index columns.
//
// You can also create clustered index that directly stores value inside the index by modifying the value type.

if (col_ids.empty() || col_ids.size() > 2) {
throw NotImplementedException("only support creating index with exactly one or two columns");
if (col_ids.empty()) {
throw NotImplementedException("Index columns cannot be empty");
}

std::unique_lock<std::shared_mutex> l(catalog_lock_);
std::shared_ptr<IndexInfo> info = nullptr;

if (stmt.index_type_.empty()) {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false); // create default index
} else if (stmt.index_type_ == "hash") {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::HashTableIndex);
} else if (stmt.index_type_ == "bplustree") {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::BPlusTreeIndex);
IndexType index_type = IndexType::BPlusTreeIndex; // Default
if (stmt.index_type_ == "hash") {
index_type = IndexType::HashTableIndex;
} else if (stmt.index_type_ == "bplustree" || stmt.index_type_.empty()) {
index_type = IndexType::BPlusTreeIndex;
} else if (stmt.index_type_ == "stl_ordered") {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::STLOrderedIndex);
index_type = IndexType::STLOrderedIndex;
} else if (stmt.index_type_ == "stl_unordered") {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{}, false, IndexType::STLUnorderedIndex);
index_type = IndexType::STLUnorderedIndex;
} else {
UNIMPLEMENTED("unsupported index type " + stmt.index_type_);
}

// We compute the size (in bytes) of the index key
uint32_t key_size = col_ids.size() * 4;

// We create an index of sufficient size depending on the key size.
//! NOTE: Currently, we support key sizes of at most 64 bytes.
//! This can be easily extended to support larger key sizes.
if (key_size <= 4) {
info = catalog_->CreateIndex<GenericKey<4>, RID, GenericComparator<4>>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 4,
HashFunction<GenericKey<4>>{}, false, index_type);
} else if (key_size <= 8) {
info = catalog_->CreateIndex<GenericKey<8>, RID, GenericComparator<8>>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 8,
HashFunction<GenericKey<8>>{}, false, index_type);
} else if (key_size <= 16) {
info = catalog_->CreateIndex<GenericKey<16>, RID, GenericComparator<16>>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 16,
HashFunction<GenericKey<16>>{}, false, index_type);
} else if (key_size <= 32) {
info = catalog_->CreateIndex<GenericKey<32>, RID, GenericComparator<32>>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 32,
HashFunction<GenericKey<32>>{}, false, index_type);
} else if (key_size <= 64) {
info = catalog_->CreateIndex<GenericKey<64>, RID, GenericComparator<64>>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 64,
HashFunction<GenericKey<64>>{}, false, index_type);
} else {
throw NotImplementedException("Unsupported: index key size exceeds 64 bytes");
}

l.unlock();

if (info == nullptr) {
Expand Down
176 changes: 176 additions & 0 deletions test/sql/p3.22-composite-key-index-scan.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# 6 pts

statement ok
set force_optimizer_starter_rule=yes

# Create a table
statement ok
create table t1(v1 int, v2 int, v3 int);

query
insert into t1 values (1, 50, 645), (2, 40, 721), (4, 20, 445), (5, 10, 445), (3, 30, 645);
----
5

# Build index
statement ok
create index t1v1v2 on t1(v1, v2);

statement ok
explain select * from t1 where v1 = 1 and v2 = 50;

query +ensure:index_scan
select * from t1 where v1 = 1 and v2 = 50;
----
1 50 645

statement ok
explain select * from t1 where v1 = 1;

query +ensure:index_scan
select * from t1 where v1 = 1;
----
1 50 645

query +ensure:index_scan
select * from t1 where v1 = 0;
----

query +ensure:index_scan
select * from t1 where v1 = 5 and v2 = 10 and v3 = 445;
----
5 10 445

# Create another table
statement ok
create table t2(v1 int, v2 int, v3 int, v4 int);

query
insert into t2 values (1, 50, 6, 7), (2, 10, 8, 9), (4, 20, 7, 11), (5, 40, 3, 15), (3, 30, 100, 200);
----
5

# Build index
statement ok
create index t2v2 on t2(v2);

statement ok
create index t2v3v4 on t2(v3, v4);

query +ensure:index_scan
select * from t2 where v2 = 50;
----
1 50 6 7

query +ensure:index_scan
select * from t2 where v3 = 8;
----
2 10 8 9

query +ensure:index_scan
select * from t2 where v3 = 8 and v4 = 9;
----
2 10 8 9

query +ensure:seq_scan
select * from t2 where v1 = 5;
----
5 40 3 15

query +ensure:seq_scan
select * from t2 where v4 = 15;
----
5 40 3 15

query +ensure:index_scan
select * from t2 where v2 = 20 and v3 = 7;
----
4 20 7 11

query +ensure:index_scan
select * from t2 where v1 = 4 and v3 = 7;
----
4 20 7 11

# Create another table
statement ok
create table t3(v1 int, v2 int, v3 int, v4 int);

# Build index
statement ok
create index t3v1v2v3 on t3(v1, v2, v3);

query
insert into t3 values (1, 21, 31, 41), (1, 21, 32, 42), (1, 22, 31, 42), (2, 22, 33, 43), (3, 23, 34, 44);
----
5

query +ensure:index_scan
select * from t3 where v1 = 1;
----
1 21 31 41
1 21 32 42
1 22 31 42

query +ensure:index_scan
select * from t3 where v1 = 1 and v2 = 21;
----
1 21 31 41
1 21 32 42

query +ensure:index_scan
select * from t3 where v1 = 1 and v2 = 21 and v3 = 31;
----
1 21 31 41

query +ensure:index_scan
select * from t3 where v1 = 1 and v2 = 21 and v3 = 31 and v4 = 41;
----
1 21 31 41

query +ensure:seq_scan
select * from t3 where v2 = 22;
----
1 22 31 42
2 22 33 43

# Create another table
statement ok
create table t4(v1 int, v2 int, v3 int);

query
insert into t4 values (1, 21, 32), (1, 22, 33), (2, 23, 34), (3, 24, 35), (4, 25, 38);
----
5

# Build index
statement ok
create index t4v1v2 on t4(v1, v2);

statement ok
create index t4v3 on t4(v3);

query +ensure:index_scan
select * from t4 where v1 = 1;
----
1 21 32
1 22 33

# Insert elements
query
insert into t4 values (1, 23, 36), (1, 24, 37), (3, 25, 39);
----
3

query +ensure:index_scan
select * from t4 where v1 = 1;
----
1 21 32
1 22 33
1 23 36
1 24 37

query +ensure:index_scan
select * from t4 where v3 = 39;
----
3 25 39
Loading