DataStories-UniPi · vassilikikrg · Jan 16, 2023 · Jan 16, 2023 · Jan 16, 2023 · Jan 16, 2023
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,5 @@ dbdata/
 __pycache__/
 .idea/
 .pytest_cache
+.vscode/launch.json
+.vscode/settings.json
diff --git a/README.md b/README.md
@@ -2,6 +2,13 @@
   <img width="550" alt="mdblogo" src="https://user-images.githubusercontent.com/15364873/146045747-5dbdce9c-a70a-494b-8fdd-52ba932cdd19.png">
 </p>
 
+# Fork by P20074,P20199,P20220
+
+This fork provides implementations for key features of a modern RDBMS which include 
+ - Enriching WHERE statement by supporting (a) NOT and BETWEEN operators and (b) AND and OR operators
+ - Enriching indexing functionality by supporting (a) BTree index over unique (non-PK) columns and (b) Hash index over PK or unique columns
+ - Implementing miniDB’s query optimiser by building equivalent query plans based on respective RA expressions
+
 # miniDB
 
 The miniDB project is a minimal and easy to expand and develop for RMDBS tool, written exclusivelly in Python 3. MiniDB's main goal is to provide the user with as much functionality as possible while being easy to understand and even easier to expand. Thus, miniDB's primary market are students and researchers that want to work with a tool that they can understand through and through, while being able to implement additional features as quickly as possible.

diff --git a/bash.exe.stackdump b/bash.exe.stackdump
@@ -0,0 +1,16 @@
+Stack trace:
+Frame        Function    Args
+000FFFFCD30  0018006286E (001802825B0, 0018026DE51, 00000000059, 000FFFFB710)
+000FFFFCD30  0018004846A (000FFFFCD30, 00100000000, 00000000000, 00000000001)
+000FFFFCD30  001800484A2 (00000000000, 00000000000, 00000000059, 432508F9B254)
+000FFFFCD30  0018006E166 (00180045323, 00180350B78, 00000000000, 0000000000D)
+000FFFFCD30  0018006E179 (00180045170, 001802357E0, 001800448F2, 000FFFFC910)
+000FFFFCD30  001800707F4 (00000000013, 00000000001, 000FFFFC910, 00180270615)
+000FFFFCD30  0018005AAFF (000FFFFCA60, 00000000000, 00000000000, 008FFFFFFFF)
+000FFFFCD30  0018005B245 (00800000010, 00000000000, 000FFFFCD30, 000000303E9)
+000FFFFCD30  0018005B757 (001800D8DFE, 00000000000, 00000000000, 00000000000)
+000FFFFCD30  0018005BA66 (00000000000, 000FFFFCD30, FFFFFFFFFFFFFFC6, 00000000000)
+000FFFFCD30  00180048C0C (00000000000, 00000000000, 00000000000, 00000000000)
+000FFFFFFF0  00180047716 (00000000000, 00000000000, 00000000000, 00000000000)
+000FFFFFFF0  001800477C4 (00000000000, 00000000000, 00000000000, 00000000000)
+End of stack trace
diff --git a/mdb.py b/mdb.py
@@ -5,10 +5,11 @@
 import readline
 import traceback
 import shutil
+from miniDB.equivalentQueries import equiv_print
 sys.path.append('miniDB')
 
-from database import Database
-from table import Table
+from miniDB.database import Database
+from miniDB.table import Table
 # art font is "big"
 art = '''
              _         _  _____   ____  
@@ -45,14 +46,14 @@ def create_query_plan(query, keywords, action):
     This can and will be used recursively
     '''
 
-    dic = {val: None for val in keywords if val!=';'}
+    dic = {val: None for val in keywords if val!=';'} # dict of query words
 
-    ql = [val for val in query.split(' ') if val !='']
+    ql = [val for val in query.split(' ') if val !=''] # list of query words
 
     kw_in_query = []
     kw_positions = []
     i=0
-    while i<len(ql):
+    while i<len(ql): # finds indexes of query keywords in the action format (eg. [0,2,4,6], 0:select)
         if in_paren(ql, i): 
             i+=1
             continue
@@ -68,15 +69,15 @@ def create_query_plan(query, keywords, action):
         i+=1
 
 
-
+    # Fill dict (eg. 'select':'*') excluding ;
     for i in range(len(kw_in_query)-1):
         dic[kw_in_query[i]] = ' '.join(ql[kw_positions[i]+1:kw_positions[i+1]])
 
     if action == 'create view':
         dic['as'] = interpret(dic['as'])
 
     if action=='select':
-        dic = evaluate_from_clause(dic)
+        dic = evaluate_from_clause(dic) # for subqueries using join statement
 
         if dic['distinct'] is not None:
             dic['select'] = dic['distinct']
@@ -88,23 +89,28 @@ def create_query_plan(query, keywords, action):
                 dic['desc'] = True
             else:
                 dic['desc'] = False
-            dic['order by'] = dic['order by'].removesuffix(' asc').removesuffix(' desc')
+            dic['order by'] = dic['order by'].replace(' asc','').replace(' desc','')
 
         else:
             dic['desc'] = None
 
     if action=='create table':
         args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1]
-        dic['create table'] = dic['create table'].removesuffix(args).strip()
+        dic['create table'] = dic['create table'].replace(args,"").strip()
         arg_nopk = args.replace('primary key', '')[1:-1]
         arglist = [val.strip().split(' ') for val in arg_nopk.split(',')]
         dic['column_names'] = ','.join([val[0] for val in arglist])
         dic['column_types'] = ','.join([val[1] for val in arglist])
         if 'primary key' in args:
-            arglist = args[1:-1].split(' ')
-            dic['primary key'] = arglist[arglist.index('primary')-2]
+            arglist = args[1:-1].split(' ') # remove () from create table arguments statement and split into keywords
+
+            dic['primary key'] = arglist[arglist.index('primary')-2] # search for index of keyword primary, and get index of the PK (-2 because -1 is type)
         else:
             dic['primary key'] = None
+        if 'unique' in args:
+            arglist = args[1:-1].split(' ')
+            matched_indexes = [i for i, kw in enumerate(arglist) if kw in ('unique', 'unique,')]
+            dic['unique'] =[arglist[m-2] for m in matched_indexes] # list of columns with unique keyword
 
     if action=='import': 
         dic = {'import table' if key=='import' else key: val for key, val in dic.items()}
@@ -121,6 +127,13 @@ def create_query_plan(query, keywords, action):
         else:
             dic['force'] = False
 
+    if action=='create index':
+        if '(' and ')' in dic['on']: #if user has specified a column on which we will create the index
+            l=dic['on'].split(' ')
+            dic['index_column']=l[2] #key is the specified column
+            dic['on']=l[0] #now the key contains only the table name
+        else:
+            dic['index_column']=None #default case->index column is not specified
     return dic
 
 
@@ -130,7 +143,7 @@ def evaluate_from_clause(dic):
     Evaluate the part of the query (argument or subquery) that is supplied as the 'from' argument
     '''
     join_types = ['inner', 'left', 'right', 'full', 'sm', 'inl']
-    from_split = dic['from'].split(' ')
+    from_split = dic['from'].split(' ') # if from key in () then we have an inner query from the join_types
     if from_split[0] == '(' and from_split[-1] == ')':
         subquery = ' '.join(from_split[1:-1])
         dic['from'] = interpret(subquery)
@@ -162,7 +175,7 @@ def evaluate_from_clause(dic):
 
 def interpret(query):
     '''
-    Interpret the query.
+    Interpret the query. (keywords per action)
     '''
     kw_per_action = {'create table': ['create table'],
                      'drop table': ['drop table'],
@@ -180,11 +193,13 @@ def interpret(query):
                      'create view' : ['create view', 'as']
                      }
 
-    if query[-1]!=';':
+    if query[-1]!=';': # append ; to query if not there
         query+=';'
 
+    # format () and ; with one whitespace before and after
     query = query.replace("(", " ( ").replace(")", " ) ").replace(";", " ;").strip()
 
+    # find action from first word in query
     for kw in kw_per_action.keys():
         if query.startswith(kw):
             action = kw
@@ -213,7 +228,7 @@ def interpret_meta(command):
     cdb - change/create database
     rmdb - delete database
     """
-    action = command.split(' ')[0].removesuffix(';')
+    action = command.split(' ')[0].replace(';','')
 
     db_name = db._name if search_between(command, action,';')=='' else search_between(command, action,';')
 
@@ -223,10 +238,10 @@ def interpret_meta(command):
         verbose = False
 
     def list_databases(db_name):
-        [print(fold.removesuffix('_db')) for fold in os.listdir('dbdata')]
+        [print(fold.replace('_db','')) for fold in os.listdir('dbdata')]
 
     def list_tables(db_name):
-        [print(pklf.removesuffix('.pkl')) for pklf in os.listdir(f'dbdata/{db_name}_db') if pklf.endswith('.pkl')\
+        [print(pklf.replace('.pkl','')) for pklf in os.listdir(f'dbdata/{db_name}_db') if pklf.endswith('.pkl')\
             and not pklf.startswith('meta')]
 
     def change_db(db_name):
@@ -258,7 +273,7 @@ def remove_db(db_name):
         for line in open(fname, 'r').read().splitlines():
             if line.startswith('--'): continue
             if line.startswith('explain'):
-                dic = interpret(line.removeprefix('explain '))
+                dic = interpret(line.replace('explain ',''))
                 pprint(dic, sort_dicts=False)
             else :
                 dic = interpret(line.lower())
@@ -284,11 +299,14 @@ def remove_db(db_name):
         try:
             if line=='exit':
                 break
-            if line.split(' ')[0].removesuffix(';') in ['lsdb', 'lstb', 'cdb', 'rmdb']:
+            if line.split(' ')[0].replace(';','') in ['lsdb', 'lstb', 'cdb', 'rmdb']:
                 interpret_meta(line)
             elif line.startswith('explain'):
-                dic = interpret(line.removeprefix('explain '))
+                dic = interpret(line.replace('explain ',''))
                 pprint(dic, sort_dicts=False)
+            elif line.startswith('equivalent of'):
+                 dic = interpret(line.replace('equivalent of ',''))
+                 equiv_print(dic)
             else:
                 dic = interpret(line)
                 result = execute_dic(dic)