Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion mdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,13 @@ def create_query_plan(query, keywords, action):
dic['primary key'] = arglist[arglist.index('primary')-2]
else:
dic['primary key'] = None


if 'unique key' in args:
arglist = args[1:-1].split(' ')
dic['unique key'] = arglist[arglist.index('unique')-2]
else:
dic['unique key'] = None

if action=='import':
dic = {'import table' if key=='import' else key: val for key, val in dic.items()}

Expand Down
82 changes: 65 additions & 17 deletions miniDB/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os,sys
import logging
import warnings
import readline
from tabulate import tabulate

sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB')
Expand Down Expand Up @@ -54,7 +53,7 @@ def __init__(self, name, load=True, verbose = True):
self.create_table('meta_length', 'table_name,no_of_rows', 'str,int')
self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str')
self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list')
self.create_table('meta_indexes', 'table_name,index_name', 'str,str')
self.create_table('meta_indexes', 'table_name,column_name,index_name', 'str,str,str')
self.save_database()

def save_database(self):
Expand Down Expand Up @@ -101,7 +100,7 @@ def _update(self):
self._update_meta_insert_stack()


def create_table(self, name, column_names, column_types, primary_key=None, load=None):
def create_table(self, name, column_names, column_types, primary_key=None, unique_key=None, load=None):
'''
This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name

Expand All @@ -110,11 +109,11 @@ def create_table(self, name, column_names, column_types, primary_key=None, load=
column_names: list. Names of columns.
column_types: list. Types of columns.
primary_key: string. The primary key (if it exists).
unique_key: string. Unique key column name (if it exists).
load: boolean. Defines table object parameters as the name of the table and the column names.
'''
# print('here -> ', column_names.split(','))
self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)})
# self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, unique_key=unique_key, load=load), }) # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
# check that new dynamic var doesnt exist already
# self.no_of_tables += 1
self._update()
Expand Down Expand Up @@ -160,14 +159,15 @@ def drop_table(self, table_name):
self.save_database()


def import_table(self, table_name, filename, column_types=None, primary_key=None):
def import_table(self, table_name, filename, column_types=None, primary_key=None, unique_key=None):
'''
Creates table from CSV file.

Args:
filename: string. CSV filename. If not specified, filename's name will be used.
column_types: list. Types of columns. If not specified, all will be set to type str.
primary_key: string. The primary key (if it exists).
unique_key: string. Unique key column name (if it exists).
'''
file = open(filename, 'r')

Expand All @@ -177,7 +177,7 @@ def import_table(self, table_name, filename, column_types=None, primary_key=None
colnames = line.strip('\n')
if column_types is None:
column_types = ",".join(['str' for _ in colnames.split(',')])
self.create_table(name=table_name, column_names=colnames, column_types=column_types, primary_key=primary_key)
self.create_table(name=table_name, column_names=colnames, column_types=column_types, primary_key=primary_key, unique_key=unique_key)
lock_ownership = self.lock_table(table_name, mode='x')
first_line = False
continue
Expand Down Expand Up @@ -358,15 +358,22 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \
return table_name._select_where(columns, condition, distinct, order_by, desc, limit)

if condition is not None:
condition_column = split_condition(condition)[0]
if "between" in condition.split():
conditionColumn = condition.split(" ")[0]
elif "or" in condition.split():
conditionColumn = condition.split(" ")[0]
elif "and" in condition.split():
conditionColumn = condition.split(" ")[0]
else:
conditionColumn = split_condition(condition)[0]
else:
condition_column = ''
conditionColumn = ''


# self.lock_table(table_name, mode='x')
if self.is_locked(table_name):
return
if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
if self._has_index(table_name) and conditionColumn==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0]
bt = self._load_idx(index_name)
table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit)
Expand Down Expand Up @@ -659,37 +666,78 @@ def create_index(self, index_name, table_name, index_type='btree'):
table_name: string. Table name (must be part of database).
index_name: string. Name of the created index.
'''

if table_name.count('(') != 0:
split_text = table_name.split('(')[1].split(')')
table_name = table_name.split('(')[0].strip()
table_column = split_text[0].strip()
if table_column=='': table_column=self.tables[table_name].column_names[self.tables[table_name].pk_idx]
else:
table_column = self.tables[table_name].column_names[self.tables[table_name].pk_idx]

if self.tables[table_name].pk_idx is None: # if no primary key, no index
raise Exception('Cannot create index. Table has no primary key.')
if index_name not in self.tables['meta_indexes'].column_by_name('index_name'):
# currently only btree is supported. This can be changed by adding another if.
self.tables['meta_indexes']._insert([table_name, table_column, index_name])
if index_type=='btree':
logging.info('Creating Btree index.')
# insert a record with the name of the index and the table on which it's created to the meta_indexes table
self.tables['meta_indexes']._insert([table_name, index_name])
# crate the actual index
self._construct_index(table_name, index_name)
self.save_database()
elif index_type=='hash':
logging.info('Creating Hash index.')
# crate the actual index
self._construct_hash_index(table_name, index_name, table_column)
self.save_database()
else:
raise Exception('Cannot create index. Another index with the same name already exists.')

def _construct_index(self, table_name, index_name):
def _construct_index(self, table_name, index_name, column_name):
'''
Construct a btree on a table and save.

Args:
table_name: string. Table name (must be part of database).
index_name: string. Name of the created index.
column_name: string. Name of the table's column.
'''
bt = Btree(3) # 3 is arbitrary

# for each record in the primary key of the table, insert its value and index to the btree
for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)):
if key is None:
continue
bt.insert(key, idx)
# save the btree
self._save_index(index_name, bt)
if (column_name == self.tables[table_name].pk) or (column_name == self.tables[table_name].uk):
# for each record in the primary key of the table, insert its value and index to the btree
for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)):
if key is None:
continue
bt.insert(key, idx)
# save the btree
self._save_index(index_name, bt)
else:
raise Exception(f'Column "{column_name}" must be a primary or unique key in "{table_name}" table.')

def _construct_hash_index(self, table_name, index_name, column_name):
'''
Construct a hash tree index on a table and save.
Args:
table_name: string. Table name (must be part of database).
index_name: string. Name of the created index.
column_name: string. Name of the table's column.
'''
ht = HashTree(4) # 3 is arbitrary

if (column_name == self.tables[table_name].pk) or (column_name == self.tables[table_name].uk):
# for each record in the primary key of the table, insert its value and index to the hash tree
for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)):
if key is None:
continue
ht.insert(key, idx)
# save the hash tree
self._save_index(index_name, ht)
else:
raise Exception(f'Column "{column_name}" must be a primary or unique key in "{table_name}" table.')


def _has_index(self, table_name):
Expand Down
6 changes: 4 additions & 2 deletions miniDB/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ def get_op(op, a, b):
'<': operator.lt,
'>=': operator.ge,
'<=': operator.le,
'=': operator.eq}
'=': operator.eq,
'not':operator.ne}

try:
return ops[op](a,b)
Expand All @@ -20,7 +21,8 @@ def split_condition(condition):
'<=': operator.le,
'=': operator.eq,
'>': operator.gt,
'<': operator.lt}
'<': operator.lt,
'not':operator.ne}

for op_key in ops.keys():
splt=condition.split(op_key)
Expand Down
75 changes: 66 additions & 9 deletions miniDB/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Table:
- a dictionary that includes the appropriate info (all the attributes in __init__)

'''
def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None):
def __init__(self, name=None, column_names=None, column_types=None, primary_key=None,unique_key=None, load=None):

if load is not None:
# if load is a dict, replace the object dict with it (replaces the object with the specified one)
Expand Down Expand Up @@ -68,6 +68,12 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key=
self.pk_idx = None

self.pk = primary_key
if unique_key is not None:
self.uk_idx = self.column_names.index(unique_key)
else:
self.uk_idx = None

self.uk = unique_key
# self._update()

# if any of the name, columns_names and column types are none. return an empty table object
Expand Down Expand Up @@ -129,7 +135,11 @@ def _insert(self, row, insert_stack=[]):
raise ValueError(f'## ERROR -> Value {row[i]} already exists in primary key column.')
elif i==self.pk_idx and row[i] is None:
raise ValueError(f'ERROR -> The value of the primary key cannot be None.')

# if value is to be appended to a unique_key column, check that it doesn't already exists (no duplicate unique keys)
if i==self.uk_idx and row[i] in self.column_by_name(self.uk):
raise ValueError(f'## ERROR -> Value {row[i]} already exists in unique key column "{self.uk}".')
elif i==self.uk_idx and row[i] is None:
raise ValueError(f'ERROR -> The value in unique key column "{self.uk}" cannot be None.')
# if insert_stack is not empty, append to its last index
if insert_stack != []:
self.data[insert_stack[-1]] = row
Expand Down Expand Up @@ -233,9 +243,53 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
# if condition is None, return all rows
# if not, return the rows with values where condition is met for value
if condition is not None:
column_name, operator, value = self._parse_condition(condition)
column = self.column_by_name(column_name)
rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
if "between" in condition.split():
splitCondition = condition.split()
if(splitCondition[3]=='and'):
low = splitCondition[2]
high = splitCondition[4]
name = splitCondition[0]
column = self.column_by_name(name)
rows = []

if(low.isdigit() and high.isdigit()):
x = int(low)
y = int(high)
for i,j in enumerate(column):
z = int(j)
if z>=x and z<=y:
rows.append(i)
else:
print('Please use integers.')
elif "or" in condition.split():
conditions = condition.split("OR")
conditions = conditions[0].split("or")
row_lists =[]
for i in conditions:
name, operator, value = self._parse_condition(i)
column = self.column_by_name(name)
row_lists.append([ind for ind, x in enumerate(column) if get_op(operator, x, value)])

rows = []
for k in row_lists:
for row in k:
if not(row in rows):
rows.append(row)
elif "and" in condition.split():
conditions = condition.split("AND")
conditions = conditions[0].split("and")
row_lists =[]
for i in conditions:
name, operator, value = self._parse_condition(i)
column = self.column_by_name(name)
row_lists.append([ind for ind, x in enumerate(column) if get_op(operator, x, value)])
rows = set(row_lists[0]).intersection(*row_lists)

else:
name, operator, value = self._parse_condition(condition)
column = self.column_by_name(name)
rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]

else:
rows = [i for i in range(len(self.data))]

Expand Down Expand Up @@ -281,10 +335,9 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False

column_name, operator, value = self._parse_condition(condition)

# if the column in condition is not a primary key, abort the select
if column_name != self.column_names[self.pk_idx]:
print('Column is not PK. Aborting')

# if the column in condition is not a primary or unique key, abort the select
if (column_name != self.column_names[self.pk_idx]) and (column_name != self.column_names[self.uk_idx]):
print('Column is not PK or UK. Aborting')
# here we run the same select twice, sequentially and using the btree.
# we then check the results match and compare performance (number of operation)
column = self.column_by_name(column_name)
Expand Down Expand Up @@ -534,6 +587,10 @@ def show(self, no_of_rows=None, is_locked=False):
# table has a primary key, add PK next to the appropriate column
headers[self.pk_idx] = headers[self.pk_idx]+' #PK#'
# detect the rows that are no tfull of nones (these rows have been deleted)
if self.uk_idx is not None:
# table has unique key, add UK next to the appropriate column
headers[self.uk_idx] = headers[self.uk_idx]+' #UK#'
# detect the rows that are not full of nones (these rows have been deleted)
# if we dont skip these rows, the returning table has empty rows at the deleted positions
non_none_rows = [row for row in self.data if any(row)]
# print using tabulate
Expand Down