DataStories-UniPi · Swixyy · Feb 20, 2023 · Feb 20, 2023 · Feb 22, 2023
diff --git a/mdb.py b/mdb.py
@@ -105,7 +105,13 @@ def create_query_plan(query, keywords, action):
             dic['primary key'] = arglist[arglist.index('primary')-2]
         else:
             dic['primary key'] = None
-
+
+        if 'unique key' in args:
+            arglist = args[1:-1].split(' ')
+            dic['unique key'] = arglist[arglist.index('unique')-2]
+        else:
+            dic['unique key'] = None
+
     if action=='import': 
         dic = {'import table' if key=='import' else key: val for key, val in dic.items()}
 

diff --git a/miniDB/database.py b/miniDB/database.py
@@ -4,7 +4,6 @@
 import os,sys
 import logging
 import warnings
-import readline
 from tabulate import tabulate
 
 sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB')
@@ -54,7 +53,7 @@ def __init__(self, name, load=True, verbose = True):
         self.create_table('meta_length', 'table_name,no_of_rows', 'str,int')
         self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str')
         self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list')
-        self.create_table('meta_indexes', 'table_name,index_name', 'str,str')
+        self.create_table('meta_indexes', 'table_name,column_name,index_name', 'str,str,str')
         self.save_database()
 
     def save_database(self):
@@ -101,7 +100,7 @@ def _update(self):
         self._update_meta_insert_stack()
 
 
-    def create_table(self, name, column_names, column_types, primary_key=None, load=None):
+    def create_table(self, name, column_names, column_types, primary_key=None, unique_key=None, load=None):        
         '''
         This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name
 
@@ -110,11 +109,11 @@ def create_table(self, name, column_names, column_types, primary_key=None, load=
             column_names: list. Names of columns.
             column_types: list. Types of columns.
             primary_key: string. The primary key (if it exists).
+            unique_key: string. Unique key column name (if it exists).
             load: boolean. Defines table object parameters as the name of the table and the column names.
         '''
         # print('here -> ', column_names.split(','))
-        self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)})
-        # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
+        self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, unique_key=unique_key, load=load), })        # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
         # check that new dynamic var doesnt exist already
         # self.no_of_tables += 1
         self._update()
@@ -160,14 +159,15 @@ def drop_table(self, table_name):
         self.save_database()
 
 
-    def import_table(self, table_name, filename, column_types=None, primary_key=None):
+    def import_table(self, table_name, filename, column_types=None, primary_key=None, unique_key=None):
         '''
         Creates table from CSV file.
 
         Args:
             filename: string. CSV filename. If not specified, filename's name will be used.
             column_types: list. Types of columns. If not specified, all will be set to type str.
             primary_key: string. The primary key (if it exists).
+             unique_key: string. Unique key column name (if it exists).
         '''
         file = open(filename, 'r')
 
@@ -177,7 +177,7 @@ def import_table(self, table_name, filename, column_types=None, primary_key=None
                 colnames = line.strip('\n')
                 if column_types is None:
                     column_types = ",".join(['str' for _ in colnames.split(',')])
-                self.create_table(name=table_name, column_names=colnames, column_types=column_types, primary_key=primary_key)
+                self.create_table(name=table_name, column_names=colnames, column_types=column_types, primary_key=primary_key, unique_key=unique_key)               
                 lock_ownership = self.lock_table(table_name, mode='x')
                 first_line = False
                 continue
@@ -358,15 +358,22 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \
             return table_name._select_where(columns, condition, distinct, order_by, desc, limit)
 
         if condition is not None:
-            condition_column = split_condition(condition)[0]
+            if "between" in condition.split():
+                conditionColumn = condition.split(" ")[0]
+            elif "or" in condition.split():
+                conditionColumn = condition.split(" ")[0]
+            elif "and" in condition.split():
+                conditionColumn = condition.split(" ")[0] 
+            else:
+                conditionColumn = split_condition(condition)[0]
         else:
-            condition_column = ''
+            conditionColumn = ''
 
 
         # self.lock_table(table_name, mode='x')
         if self.is_locked(table_name):
             return
-        if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
+        if self._has_index(table_name) and conditionColumn==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
             index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0]
             bt = self._load_idx(index_name)
             table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit)
@@ -659,37 +666,78 @@ def create_index(self, index_name, table_name, index_type='btree'):
             table_name: string. Table name (must be part of database).
             index_name: string. Name of the created index.
         '''
+
+        if table_name.count('(') != 0:
+            split_text = table_name.split('(')[1].split(')')
+            table_name = table_name.split('(')[0].strip()
+            table_column = split_text[0].strip()
+            if table_column=='': table_column=self.tables[table_name].column_names[self.tables[table_name].pk_idx]
+        else:
+            table_column = self.tables[table_name].column_names[self.tables[table_name].pk_idx]
+
         if self.tables[table_name].pk_idx is None: # if no primary key, no index
             raise Exception('Cannot create index. Table has no primary key.')
         if index_name not in self.tables['meta_indexes'].column_by_name('index_name'):
             # currently only btree is supported. This can be changed by adding another if.
+            self.tables['meta_indexes']._insert([table_name, table_column, index_name])
             if index_type=='btree':
                 logging.info('Creating Btree index.')
                 # insert a record with the name of the index and the table on which it's created to the meta_indexes table
                 self.tables['meta_indexes']._insert([table_name, index_name])
                 # crate the actual index
                 self._construct_index(table_name, index_name)
                 self.save_database()
+            elif index_type=='hash':
+                logging.info('Creating Hash index.')
+                # crate the actual index
+                self._construct_hash_index(table_name, index_name, table_column)
+                self.save_database()
         else:
             raise Exception('Cannot create index. Another index with the same name already exists.')
 
-    def _construct_index(self, table_name, index_name):
+    def _construct_index(self, table_name, index_name, column_name):
         '''
         Construct a btree on a table and save.
 
         Args:
             table_name: string. Table name (must be part of database).
             index_name: string. Name of the created index.
+            column_name: string. Name of the table's column.
         '''
         bt = Btree(3) # 3 is arbitrary
 
         # for each record in the primary key of the table, insert its value and index to the btree
-        for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)):
-            if key is None:
-                continue
-            bt.insert(key, idx)
-        # save the btree
-        self._save_index(index_name, bt)
+        if (column_name == self.tables[table_name].pk) or (column_name == self.tables[table_name].uk):
+            # for each record in the primary key of the table, insert its value and index to the btree
+            for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)):
+                if key is None:
+                    continue
+                bt.insert(key, idx)
+            # save the btree
+            self._save_index(index_name, bt)
+        else:
+            raise Exception(f'Column "{column_name}" must be a primary or unique key in "{table_name}" table.')
+
+    def _construct_hash_index(self, table_name, index_name, column_name):
+        '''
+        Construct a hash tree index on a table and save.
+        Args:
+            table_name: string. Table name (must be part of database).
+            index_name: string. Name of the created index.
+            column_name: string. Name of the table's column.
+        '''
+        ht = HashTree(4)  # 3 is arbitrary
+
+        if (column_name == self.tables[table_name].pk) or (column_name == self.tables[table_name].uk):
+            # for each record in the primary key of the table, insert its value and index to the hash tree
+            for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)):
+                if key is None:
+                    continue
+                ht.insert(key, idx)
+            # save the hash tree
+            self._save_index(index_name, ht)
+        else:
+            raise Exception(f'Column "{column_name}" must be a primary or unique key in "{table_name}" table.')
 
 
     def _has_index(self, table_name):

diff --git a/miniDB/misc.py b/miniDB/misc.py
@@ -8,7 +8,8 @@ def get_op(op, a, b):
                 '<': operator.lt,
                 '>=': operator.ge,
                 '<=': operator.le,
-                '=': operator.eq}
+                '=': operator.eq,
+                'not':operator.ne}
 
     try:
         return ops[op](a,b)
@@ -20,7 +21,8 @@ def split_condition(condition):
            '<=': operator.le,
            '=': operator.eq,
            '>': operator.gt,
-           '<': operator.lt}
+           '<': operator.lt,
+           'not':operator.ne}
 
     for op_key in ops.keys():
         splt=condition.split(op_key)

diff --git a/miniDB/table.py b/miniDB/table.py
@@ -26,7 +26,7 @@ class Table:
             - a dictionary that includes the appropriate info (all the attributes in __init__)
 
     '''
-    def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None):
+    def __init__(self, name=None, column_names=None, column_types=None, primary_key=None,unique_key=None, load=None):
 
         if load is not None:
             # if load is a dict, replace the object dict with it (replaces the object with the specified one)
@@ -68,6 +68,12 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key=
                 self.pk_idx = None
 
             self.pk = primary_key
+            if unique_key is not None:
+                self.uk_idx = self.column_names.index(unique_key)
+            else:
+                self.uk_idx = None
+
+            self.uk = unique_key
             # self._update()
 
     # if any of the name, columns_names and column types are none. return an empty table object
@@ -129,7 +135,11 @@ def _insert(self, row, insert_stack=[]):
                 raise ValueError(f'## ERROR -> Value {row[i]} already exists in primary key column.')
             elif i==self.pk_idx and row[i] is None:
                 raise ValueError(f'ERROR -> The value of the primary key cannot be None.')
-
+            # if value is to be appended to a unique_key column, check that it doesn't already exists (no duplicate unique keys)
+            if i==self.uk_idx and row[i] in self.column_by_name(self.uk):
+                raise ValueError(f'## ERROR -> Value {row[i]} already exists in unique key column "{self.uk}".')
+            elif i==self.uk_idx and row[i] is None:
+                raise ValueError(f'ERROR -> The value in unique key column "{self.uk}" cannot be None.')
         # if insert_stack is not empty, append to its last index
         if insert_stack != []:
             self.data[insert_stack[-1]] = row
@@ -233,9 +243,53 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
         # if condition is None, return all rows
         # if not, return the rows with values where condition is met for value
         if condition is not None:
-            column_name, operator, value = self._parse_condition(condition)
-            column = self.column_by_name(column_name)
-            rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
+            if "between" in condition.split():
+                splitCondition = condition.split()
+                if(splitCondition[3]=='and'):
+                    low = splitCondition[2]
+                    high = splitCondition[4]
+                    name = splitCondition[0]
+                    column = self.column_by_name(name)
+                    rows = []
+
+                    if(low.isdigit() and high.isdigit()):
+                         x = int(low) 
+                         y = int(high)
+                         for i,j in enumerate(column):
+                            z = int(j)
+                            if z>=x and z<=y:
+                                rows.append(i)
+                    else:
+                        print('Please use integers.')
+            elif "or" in condition.split():
+                conditions = condition.split("OR")
+                conditions = conditions[0].split("or")
+                row_lists =[]
+                for i in conditions:
+                    name, operator, value = self._parse_condition(i)
+                    column = self.column_by_name(name)
+                    row_lists.append([ind for ind, x in enumerate(column) if get_op(operator, x, value)])
+
+                rows = []
+                for k in row_lists:
+                    for row in k:
+                        if not(row in rows):
+                            rows.append(row)
+            elif "and" in condition.split():
+                conditions = condition.split("AND")
+                conditions = conditions[0].split("and")
+                row_lists =[]                
+                for i in conditions:
+                    name, operator, value = self._parse_condition(i)
+                    column = self.column_by_name(name)
+                    row_lists.append([ind for ind, x in enumerate(column) if get_op(operator, x, value)])
+                rows = set(row_lists[0]).intersection(*row_lists)
+
+            else:
+                name, operator, value = self._parse_condition(condition)
+                column = self.column_by_name(name)
+                rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
+
         else:
             rows = [i for i in range(len(self.data))]
 
@@ -281,10 +335,9 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False
 
         column_name, operator, value = self._parse_condition(condition)
 
-        # if the column in condition is not a primary key, abort the select
-        if column_name != self.column_names[self.pk_idx]:
-            print('Column is not PK. Aborting')
-
+        # if the column in condition is not a primary or unique key, abort the select
+        if (column_name != self.column_names[self.pk_idx]) and (column_name != self.column_names[self.uk_idx]):
+            print('Column is not PK or UK. Aborting')
         # here we run the same select twice, sequentially and using the btree.
         # we then check the results match and compare performance (number of operation)
         column = self.column_by_name(column_name)
@@ -534,6 +587,10 @@ def show(self, no_of_rows=None, is_locked=False):
             # table has a primary key, add PK next to the appropriate column
             headers[self.pk_idx] = headers[self.pk_idx]+' #PK#'
         # detect the rows that are no tfull of nones (these rows have been deleted)
+        if self.uk_idx is not None:
+            # table has unique key, add UK next to the appropriate column
+            headers[self.uk_idx] = headers[self.uk_idx]+' #UK#'
+        # detect the rows that are not full of nones (these rows have been deleted)
         # if we dont skip these rows, the returning table has empty rows at the deleted positions
         non_none_rows = [row for row in self.data if any(row)]
         # print using tabulate