Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions mdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import re
from pprint import pprint
import sys
import readline
from pyreadline3 import Readline
readline = Readline()
import traceback
import shutil
sys.path.append('miniDB')

from database import Database
from table import Table
from miniDB.database import Database
from miniDB.table import Table
# art font is "big"
art = '''
_ _ _____ ____
Expand Down Expand Up @@ -105,7 +106,14 @@ def create_query_plan(query, keywords, action):
dic['primary key'] = arglist[arglist.index('primary')-2]
else:
dic['primary key'] = None

if 'unique' in args:
arglist = args[1:-1].replace(',','')
arglist = arglist.split(' ')
print(" Unique condition detected")
dic['unique'] = arglist[arglist.index('unique')-2]
else:
dic['unique'] = None

if action=='import':
dic = {'import table' if key=='import' else key: val for key, val in dic.items()}

Expand Down Expand Up @@ -175,7 +183,7 @@ def interpret(query):
'unlock table': ['unlock table', 'force'],
'delete from': ['delete from', 'where'],
'update table': ['update table', 'set', 'where'],
'create index': ['create index', 'on', 'using'],
'create index': ['create index', 'on', 'column', 'using'], # added column keyword
'drop index': ['drop index'],
'create view' : ['create view', 'as']
}
Expand Down Expand Up @@ -247,8 +255,11 @@ def remove_db(db_name):


if __name__ == "__main__":
fname = os.getenv('SQL')
dbname = os.getenv('DB')
'''
Hard coded file name and database name ,due to unresolvable technical difficulties
'''
fname = "sql_files/smallRelationsInsertFile.sql"
dbname = "smdb"

db = Database(dbname, load=True)

Expand Down
13 changes: 7 additions & 6 deletions miniDB/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import os,sys
import logging
import warnings
import readline
from pyreadline3 import Readline
readline = Readline()
from tabulate import tabulate

sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB')
Expand Down Expand Up @@ -54,7 +55,7 @@ def __init__(self, name, load=True, verbose = True):
self.create_table('meta_length', 'table_name,no_of_rows', 'str,int')
self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str')
self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list')
self.create_table('meta_indexes', 'table_name,index_name', 'str,str')
self.create_table('meta_indexes', 'table_name,index_column,index_name', 'str,str')
self.save_database()

def save_database(self):
Expand Down Expand Up @@ -101,7 +102,7 @@ def _update(self):
self._update_meta_insert_stack()


def create_table(self, name, column_names, column_types, primary_key=None, load=None):
def create_table(self, name, column_names, column_types,unique_cols=None, primary_key=None, load=None):
'''
This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name

Expand All @@ -113,7 +114,7 @@ def create_table(self, name, column_names, column_types, primary_key=None, load=
load: boolean. Defines table object parameters as the name of the table and the column names.
'''
# print('here -> ', column_names.split(','))
self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)})
self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','),unique_cols=unique_cols, primary_key=primary_key, load=load)})
# self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
# check that new dynamic var doesnt exist already
# self.no_of_tables += 1
Expand Down Expand Up @@ -331,7 +332,7 @@ def delete_from(self, table_name, condition):
self._add_to_insert_stack(table_name, deleted)
self.save_database()

def select(self, columns, table_name, condition, distinct=None, order_by=None, \
def select(self, columns, table_name, condition, distinct=None, order_by=None,
limit=True, desc=None, save_as=None, return_object=True):
'''
Selects and outputs a table's data where condtion is met.
Expand Down Expand Up @@ -366,7 +367,7 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \
# self.lock_table(table_name, mode='x')
if self.is_locked(table_name):
return
if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx] or self.tables[table_name].unique:
index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0]
bt = self._load_idx(index_name)
table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit)
Expand Down
162 changes: 162 additions & 0 deletions miniDB/indexHash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import math

class bucket_value:
def __init__(self,table_row,table_line_index):
self.table_row = table_row
self.table_line_index = table_line_index
class Bucket:
def __init__(self, values, local_depth):
self.values = values
self.local_depth = local_depth


'''
INFO:
An element is inserted into a bucket, if that bucket overflows then:
case 1: The bucket's local depth is equal to global depth
- directory expansion is performed
- bucket split is performed (each new bucket will have its local depth increased by 1)
case 2: The bucket's local depth is less than the global depth
- bucket split is performed (each new bucket will have its local depth increased by 1)

'''


def get_binary(value):
# initializing string
test_str = str(value)

# printing original string
# print("The original string is : " + str(test_str))

# using join() + ord() + format()
# Converting String to binary
res = ''.join(format(ord(i), '08b') for i in test_str)

# printing result
# print("The string after binary conversion : " + str(res))
return res


def insert(dict, position, elem, global_depth):
new_dict = {}
#print('dict', 'position ', position, 'element ', elem)
number_of_bucket_elements = len(dict[position].values)
#print(number_of_bucket_elements)
#print('local depth', dict[position].local_depth)
# There is no overflow
if (number_of_bucket_elements < 3):
dict[position].values += [elem]

# There is overflow
else:
# Case 1 local depth equal to global depth -> directory expansion + bucket split(+ local depth will increase
# by 1)
if dict[position].local_depth == global_depth:
#print('case 1')
# dictionary expansion
global_depth = int(math.sqrt(len(dict))) +1
#print('global d',global_depth)
# create new directory of size 2 ^ global depth
for i in range(pow(2, global_depth)):
#print("i ", i, "pow(2,global_depth+1) ", pow(2, global_depth + 1), "format(i,'b') ", format(i, 'b'))
new_dict.setdefault(format(i, 'b'), Bucket([], 1))
# print('new dict ',new_dict[format(i,'b')].values)
#print(new_dict.keys())
# rehash the new directory
for i in range(global_depth + 1):
# if this bucket is from the old dictionary and is not the one to be split it should be saved as it is
if i < pow(2, global_depth - 1) and i != int(position):
new_dict[format(i, 'b')] = dict[format(i, 'b')]
# reduce global depth by one so ONLY the split buckets will get an increased global depth
new_dict[format(i, 'b')].local_depth -= 1
# the bucket being split

#get the elements fromthe overflowed bucket + the new element
elements = dict[position].values
elements += [elem]
# position the elements according to the hash function
for item in elements:
#print('elements are ', elements)
# Convert to binary
b = get_binary(item)
# Hash function => result mod 2 ^ global depth
lsb = int(b) % pow(2, global_depth)
insert(new_dict, format(lsb, 'b'), item, global_depth)

# update the old dictionary
for count, value in enumerate(new_dict):
#print('Nkey', value, 'Bucket ', new_dict[value].values, ' local depth ', new_dict[value].local_depth)
# first half the already exists
if count < pow(2, global_depth - 1):
dict[value].values = new_dict[value].values
# creating and updating second half that did not exist before
else:
dict.setdefault(value, Bucket([], 1))
dict[value].values = new_dict[value].values
# increment local depths affected by the expansion by 1
if len(dict[value].values) >0:
dict[value].local_depth = new_dict[value].local_depth + 1
# local depth < global depth, in that case only a bucket split will be preformed
else:
#print('case 2')
# get bucket elements + new element
elements = dict[position].values
elements += [elem]

#get current local depth of the bucket being split
local_d= dict[position].local_depth

# empty this bucket
dict[position].values =[]


# position the elements according to the hash function
for item in elements:
#print('elements are ', elements)
# Convert to binary
b = get_binary(item)
# Hash function => result mod 2 ^ global depth
lsb = int(b) % pow(2, global_depth)
# lsb points to the bucket in which the elements will be stored
# we want to increase this bucket's local depth by 1
dict[format(lsb, 'b')].local_depth +=1
insert(dict, format(lsb, 'b'), item, global_depth)

def get_hash_index(list_of_values):
global_depth = 1
buckets = [Bucket([], 1), Bucket([], 1)]
dictionary = {}
dictionary.setdefault('0', buckets[0])
dictionary.setdefault('1', buckets[1])
for element in list_of_values:
# Convert to binary
b = get_binary(element)
# Hash function => result mod 2 ^ global depth
lsb = int(b) % pow(2, global_depth)
# print(b," ", pow(2,global_depth)," ",lsb)
# Insert element in bucket and dictionary
insert(dictionary, str(lsb), element, global_depth)
for x in dictionary:
print('key', x, 'Bucket ', dictionary[x].values)
return dictionary
def find_value(v,d):
# Convert to binary
b = get_binary(v)
#calculate global depth
global_depth = int(math.sqrt(len(d)))
# Hash function => result mod 2 ^ global depth
lsb = int(b) % pow(2, global_depth)
for i in d[str(lsb)].values:
if(i == v):
print (i)

lista = [1, 2, 3, 4, 5, 6, 7,8]
print('This list ', lista)
print('Is stored in a hash table, with extendable hashing in buckets of size = 3')
print('Due to the bucket size finding an element will take O(3) instead of O(1)')
print('The hash table is the following:')
_dictionary = get_hash_index(lista)
print('The value is successfully found!')
find_value(8,_dictionary)

2 changes: 2 additions & 0 deletions miniDB/index_Btree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


72 changes: 52 additions & 20 deletions miniDB/misc.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,82 @@
import operator


def get_op(op, a, b):
'''
Get op as a function of a and b by using a symbol
'''
'''
Added not equal operator
'''
ops = {'>': operator.gt,
'<': operator.lt,
'>=': operator.ge,
'<=': operator.le,
'=': operator.eq}
'<': operator.lt,
'>=': operator.ge,
'<=': operator.le,
'=': operator.eq,
'!=': operator.ne
}

try:
return ops[op](a,b)
return ops[op](a, b) # does the operation and returns a boolean result
except TypeError: # if a or b is None (deleted record), python3 raises typerror
return False


def split_condition(condition):
'''
Added not equal operator
'''
ops = {'>=': operator.ge,
'<=': operator.le,
'=': operator.eq,
'>': operator.gt,
'<': operator.lt}
'<': operator.lt,
'!=': operator.ne
}

for op_key in ops.keys():
splt=condition.split(op_key)
if len(splt)>1:
if 'between' in condition:
splt = condition.split('between')
left, right = splt[0].strip(), splt[1].strip()
return left, 'between', right
if 'and' in condition:
splt = condition.split('and')
print('and found ', splt[0], splt[1])
return splt
if 'or_condition' in condition:
splt = condition.split('or_condition')
print('or found ', splt[0], splt[1])
return splt
else:
splt = condition.split(op_key)
if len(splt) > 1:
left, right = splt[0].strip(), splt[1].strip()

if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them.
right = right.strip('"')
elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw.
raise ValueError(
f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.')

if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them.
right = right.strip('"')
elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw.
raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.')
if right.find(
'"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
raise ValueError(
f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')

if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')
return left, op_key, right

return left, op_key, right

def reverse_op(op):
'''
Reverse the operator given
'''
'''
Added not equal operator as the reverse counterpart of the equal operator
'''
return {
'>' : '<',
'>=' : '<=',
'<' : '>',
'<=' : '>=',
'=' : '='
'>': '<',
'>=': '<=',
'<': '>',
'<=': '>=',
'=': '!='
}.get(op)
Loading