DataStories-UniPi · nikoletaxvs · Feb 20, 2023 · Feb 20, 2023 · Feb 20, 2023 · Feb 20, 2023
diff --git a/mdb.py b/mdb.py
@@ -2,13 +2,14 @@
 import re
 from pprint import pprint
 import sys
-import readline
+from pyreadline3 import Readline
+readline = Readline()
 import traceback
 import shutil
 sys.path.append('miniDB')
 
-from database import Database
-from table import Table
+from miniDB.database import Database
+from miniDB.table import Table
 # art font is "big"
 art = '''
              _         _  _____   ____  
@@ -105,7 +106,14 @@ def create_query_plan(query, keywords, action):
             dic['primary key'] = arglist[arglist.index('primary')-2]
         else:
             dic['primary key'] = None
-
+        if 'unique' in args:
+            arglist  = args[1:-1].replace(',','')
+            arglist = arglist.split(' ')
+            print(" Unique condition detected")
+            dic['unique'] = arglist[arglist.index('unique')-2]
+        else:
+            dic['unique'] = None
+
     if action=='import': 
         dic = {'import table' if key=='import' else key: val for key, val in dic.items()}
 
@@ -175,7 +183,7 @@ def interpret(query):
                      'unlock table': ['unlock table', 'force'],
                      'delete from': ['delete from', 'where'],
                      'update table': ['update table', 'set', 'where'],
-                     'create index': ['create index', 'on', 'using'],
+                     'create index': ['create index', 'on', 'column', 'using'], # added column keyword
                      'drop index': ['drop index'],
                      'create view' : ['create view', 'as']
                      }
@@ -247,8 +255,11 @@ def remove_db(db_name):
 
 
 if __name__ == "__main__":
-    fname = os.getenv('SQL')
-    dbname = os.getenv('DB')
+    '''
+        Hard coded file name and database name ,due to unresolvable technical difficulties 
+    '''
+    fname = "sql_files/smallRelationsInsertFile.sql"
+    dbname = "smdb"
 
     db = Database(dbname, load=True)
 

diff --git a/miniDB/database.py b/miniDB/database.py
@@ -4,7 +4,8 @@
 import os,sys
 import logging
 import warnings
-import readline
+from pyreadline3 import Readline
+readline = Readline()
 from tabulate import tabulate
 
 sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB')
@@ -54,7 +55,7 @@ def __init__(self, name, load=True, verbose = True):
         self.create_table('meta_length', 'table_name,no_of_rows', 'str,int')
         self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str')
         self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list')
-        self.create_table('meta_indexes', 'table_name,index_name', 'str,str')
+        self.create_table('meta_indexes', 'table_name,index_column,index_name', 'str,str')
         self.save_database()
 
     def save_database(self):
@@ -101,7 +102,7 @@ def _update(self):
         self._update_meta_insert_stack()
 
 
-    def create_table(self, name, column_names, column_types, primary_key=None, load=None):
+    def create_table(self, name, column_names, column_types,unique_cols=None, primary_key=None, load=None):
         '''
         This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name
 
@@ -113,7 +114,7 @@ def create_table(self, name, column_names, column_types, primary_key=None, load=
             load: boolean. Defines table object parameters as the name of the table and the column names.
         '''
         # print('here -> ', column_names.split(','))
-        self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)})
+        self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','),unique_cols=unique_cols, primary_key=primary_key, load=load)})
         # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
         # check that new dynamic var doesnt exist already
         # self.no_of_tables += 1
@@ -331,7 +332,7 @@ def delete_from(self, table_name, condition):
             self._add_to_insert_stack(table_name, deleted)
         self.save_database()
 
-    def select(self, columns, table_name, condition, distinct=None, order_by=None, \
+    def select(self, columns, table_name, condition, distinct=None, order_by=None,
                limit=True, desc=None, save_as=None, return_object=True):
         '''
         Selects and outputs a table's data where condtion is met.
@@ -366,7 +367,7 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \
         # self.lock_table(table_name, mode='x')
         if self.is_locked(table_name):
             return
-        if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
+        if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx] or self.tables[table_name].unique:
             index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0]
             bt = self._load_idx(index_name)
             table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit)

diff --git a/miniDB/indexHash.py b/miniDB/indexHash.py
@@ -0,0 +1,162 @@
+import math
+
+class bucket_value:
+    def __init__(self,table_row,table_line_index):
+        self.table_row = table_row
+        self.table_line_index = table_line_index
+class Bucket:
+    def __init__(self, values, local_depth):
+        self.values = values
+        self.local_depth = local_depth
+
+
+'''
+INFO:
+    An element is inserted into a bucket, if that bucket overflows then:
+     case 1: The bucket's local depth is equal to global depth
+        - directory expansion is performed
+        - bucket split is performed (each new bucket will have its local depth increased by 1)
+    case 2: The bucket's local depth is less than the global depth
+        - bucket split is performed (each new bucket will have its local depth increased by 1)
+
+'''
+
+
+def get_binary(value):
+    # initializing string
+    test_str = str(value)
+
+    # printing original string
+    # print("The original string is : " + str(test_str))
+
+    # using join() + ord() + format()
+    # Converting String to binary
+    res = ''.join(format(ord(i), '08b') for i in test_str)
+
+    # printing result
+    # print("The string after binary conversion : " + str(res))
+    return res
+
+
+def insert(dict, position, elem, global_depth):
+    new_dict = {}
+    #print('dict', 'position ', position, 'element ', elem)
+    number_of_bucket_elements = len(dict[position].values)
+    #print(number_of_bucket_elements)
+    #print('local depth', dict[position].local_depth)
+    # There is no overflow
+    if (number_of_bucket_elements < 3):
+        dict[position].values += [elem]
+
+    # There is overflow
+    else:
+        # Case 1 local depth equal to global depth -> directory expansion + bucket split(+ local depth will increase
+        # by 1)
+        if dict[position].local_depth == global_depth:
+            #print('case 1')
+            # dictionary expansion
+            global_depth = int(math.sqrt(len(dict))) +1
+            #print('global d',global_depth)
+            # create new directory of size 2 ^ global depth
+            for i in range(pow(2, global_depth)):
+                #print("i ", i, "pow(2,global_depth+1) ", pow(2, global_depth + 1), "format(i,'b') ", format(i, 'b'))
+                new_dict.setdefault(format(i, 'b'), Bucket([], 1))
+                # print('new dict ',new_dict[format(i,'b')].values)
+            #print(new_dict.keys())
+            # rehash the new directory
+            for i in range(global_depth + 1):
+                # if this bucket is from the old dictionary and is not the one to be split it should be saved as it is
+                if i < pow(2, global_depth - 1) and i != int(position):
+                    new_dict[format(i, 'b')] = dict[format(i, 'b')]
+                    # reduce global depth by one so ONLY the split buckets will get an increased global depth
+                    new_dict[format(i, 'b')].local_depth -= 1
+                # the bucket being split
+
+            #get the elements fromthe overflowed bucket + the new element
+            elements = dict[position].values
+            elements += [elem]
+            # position the elements according to the hash function
+            for item in elements:
+                #print('elements are ', elements)
+                # Convert to binary
+                b = get_binary(item)
+                # Hash function => result mod 2 ^ global depth
+                lsb = int(b) % pow(2, global_depth)
+                insert(new_dict, format(lsb, 'b'), item, global_depth)
+
+            # update the old dictionary
+            for count, value in enumerate(new_dict):
+                #print('Nkey', value, 'Bucket ', new_dict[value].values, ' local depth ', new_dict[value].local_depth)
+                # first half the already exists
+                if count < pow(2, global_depth - 1):
+                    dict[value].values = new_dict[value].values
+                # creating and updating second half that did not exist before
+                else:
+                    dict.setdefault(value, Bucket([], 1))
+                    dict[value].values = new_dict[value].values
+                # increment local depths affected by the expansion by 1
+                if len(dict[value].values) >0:
+                    dict[value].local_depth = new_dict[value].local_depth + 1
+        # local depth < global depth, in that case only a bucket split will be preformed
+        else:
+            #print('case 2')
+            # get bucket elements + new element
+            elements = dict[position].values
+            elements += [elem]
+
+            #get current local depth of the bucket being split
+            local_d= dict[position].local_depth
+
+            # empty this bucket
+            dict[position].values =[]
+
+
+            # position the elements according to the hash function
+            for item in elements:
+                #print('elements are ', elements)
+                # Convert to binary
+                b = get_binary(item)
+                # Hash function => result mod 2 ^ global depth
+                lsb = int(b) % pow(2, global_depth)
+                # lsb points to the bucket in which the elements will be stored
+                # we want to increase this bucket's local depth by 1
+                dict[format(lsb, 'b')].local_depth +=1
+                insert(dict, format(lsb, 'b'), item, global_depth)
+
+def get_hash_index(list_of_values):
+    global_depth = 1
+    buckets = [Bucket([], 1), Bucket([], 1)]
+    dictionary = {}
+    dictionary.setdefault('0', buckets[0])
+    dictionary.setdefault('1', buckets[1])
+    for element in list_of_values:
+        # Convert to binary
+        b = get_binary(element)
+        # Hash function => result mod 2 ^ global depth
+        lsb = int(b) % pow(2, global_depth)
+        # print(b," ", pow(2,global_depth)," ",lsb)
+        # Insert element in bucket and dictionary
+        insert(dictionary, str(lsb), element, global_depth)
+    for x in dictionary:
+        print('key', x, 'Bucket ', dictionary[x].values)
+    return dictionary
+def find_value(v,d):
+    # Convert to binary
+    b = get_binary(v)
+    #calculate global depth
+    global_depth = int(math.sqrt(len(d)))
+    # Hash function => result mod 2 ^ global depth
+    lsb = int(b) % pow(2, global_depth)
+    for i in d[str(lsb)].values:
+        if(i == v):
+            print (i)
+
+lista = [1, 2, 3, 4, 5, 6, 7,8]
+print('This list ', lista)
+print('Is stored in a hash table, with extendable hashing in buckets of size = 3')
+print('Due to the bucket size finding an element will take O(3) instead of O(1)')
+print('The hash table is the following:')
+_dictionary = get_hash_index(lista)
+print('The value is successfully found!')
+find_value(8,_dictionary)
+
diff --git a/miniDB/index_Btree.py b/miniDB/index_Btree.py
@@ -0,0 +1,2 @@
+
+
diff --git a/miniDB/misc.py b/miniDB/misc.py
@@ -1,50 +1,82 @@
 import operator
 
+
 def get_op(op, a, b):
     '''
     Get op as a function of a and b by using a symbol
     '''
+    '''
+    Added not equal operator
+    '''
     ops = {'>': operator.gt,
-                '<': operator.lt,
-                '>=': operator.ge,
-                '<=': operator.le,
-                '=': operator.eq}
+           '<': operator.lt,
+           '>=': operator.ge,
+           '<=': operator.le,
+           '=': operator.eq,
+           '!=': operator.ne
+           }
 
     try:
-        return ops[op](a,b)
+        return ops[op](a, b)  # does the operation and returns a boolean result
     except TypeError:  # if a or b is None (deleted record), python3 raises typerror
         return False
 
+
 def split_condition(condition):
+    '''
+       Added not equal operator
+    '''
     ops = {'>=': operator.ge,
            '<=': operator.le,
            '=': operator.eq,
            '>': operator.gt,
-           '<': operator.lt}
+           '<': operator.lt,
+           '!=': operator.ne
+           }
 
     for op_key in ops.keys():
-        splt=condition.split(op_key)
-        if len(splt)>1:
+        if 'between' in condition:
+            splt = condition.split('between')
             left, right = splt[0].strip(), splt[1].strip()
+            return left, 'between', right
+        if 'and' in condition:
+            splt = condition.split('and')
+            print('and found ', splt[0], splt[1])
+            return splt
+        if 'or_condition' in condition:
+            splt = condition.split('or_condition')
+            print('or found ', splt[0], splt[1])
+            return splt
+        else:
+            splt = condition.split(op_key)
+            if len(splt) > 1:
+                left, right = splt[0].strip(), splt[1].strip()
+
+                if right[0] == '"' == right[-1]:  # If the value has leading and trailing quotes, remove them.
+                    right = right.strip('"')
+                elif ' ' in right:  # If it has whitespaces but no leading and trailing double quotes, throw.
+                    raise ValueError(
+                        f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.')
 
-            if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them.
-                right = right.strip('"')
-            elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw.
-                raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.')
+                if right.find(
+                        '"') != -1:  # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
+                    raise ValueError(
+                        f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')
 
-            if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
-                raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')
+                return left, op_key, right
 
-            return left, op_key, right
 
 def reverse_op(op):
     '''
     Reverse the operator given
     '''
+    '''
+        Added not equal operator as the reverse counterpart of the equal operator
+    '''
     return {
-        '>' : '<',
-        '>=' : '<=',
-        '<' : '>',
-        '<=' : '>=',
-        '=' : '='
+        '>': '<',
+        '>=': '<=',
+        '<': '>',
+        '<=': '>=',
+        '=': '!='
     }.get(op)