dondi
diff --git a/‎database/README.md‎
Lines changed: 2 additions & 2 deletions b/‎database/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎database/expression-database/README.md‎
Lines changed: 2 additions & 2 deletions b/‎database/expression-database/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎database/expression-database/schema.sql‎
Lines changed: 14 additions & 14 deletions b/‎database/expression-database/schema.sql‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎database/expression-database/scripts/loader.py‎
Lines changed: 6 additions & 6 deletions b/‎database/expression-database/scripts/loader.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎database/network-database/README.md‎
Lines changed: 36 additions & 4 deletions b/‎database/network-database/README.md‎
Lines changed: 36 additions & 4 deletions
diff --git a/‎database/network-database/schema.sql‎
Lines changed: 7 additions & 7 deletions b/‎database/network-database/schema.sql‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎database/network-database/scripts/filter_genes.py‎
Lines changed: 1 addition & 1 deletion b/‎database/network-database/scripts/filter_genes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎database/network-database/scripts/generate_network.py‎
Lines changed: 1 addition & 1 deletion b/‎database/network-database/scripts/generate_network.py‎
Lines changed: 1 addition & 1 deletion
@@ -28,11 +28,11 @@ Here are the files pertaining to both the network and expression databases. Look
            From there, create the schemas using the following commands:
             
             ```
-            CREATE SCHEMA spring2022_network;
+            CREATE SCHEMA gene_regulatory_network;
             ```
             
             ```
-            CREATE SCHEMA fall2021;
+            CREATE SCHEMA gene_expression;
             ```
             
            Once they are created you can exit your database using the command `\q`.
 
@@ -6,9 +6,9 @@ All files pertaining the expression database live within this directory.
 
 #### Schema
 
-All network data is stored within the fall2021 schema on our Postgres database.
+All network data is stored within the gene_expression schema on our Postgres database.
 
-The schema is located within this directory at the top level in the file `schema.sql`. It defines the tables located within the fall2021 schema. 
+The schema is located within this directory at the top level in the file `schema.sql`. It defines the tables located within the gene_expression schema. 
 
 Usage:
 To load to local database
 
@@ -1,4 +1,4 @@
-CREATE TABLE fall2021.ref (
+CREATE TABLE gene_expression.ref (
   pubmed_id VARCHAR,
   authors VARCHAR,
   publication_year VARCHAR,
@@ -8,18 +8,18 @@ CREATE TABLE fall2021.ref (
   PRIMARY KEY(ncbi_geo_id, pubmed_id)
 );
 
-CREATE TABLE fall2021.gene (
+CREATE TABLE gene_expression.gene (
   gene_id VARCHAR, -- systematic like name
   display_gene_id VARCHAR, -- standard like name
   species VARCHAR,
   taxon_id VARCHAR,
   PRIMARY KEY(gene_id, taxon_id)
 ); 
 
-CREATE TABLE fall2021.expression_metadata (
+CREATE TABLE gene_expression.expression_metadata (
   ncbi_geo_id VARCHAR,
   pubmed_id VARCHAR,
-  FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES fall2021.ref(ncbi_geo_id, pubmed_id),
+  FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES gene_expression.ref(ncbi_geo_id, pubmed_id),
   control_yeast_strain VARCHAR,
   treatment_yeast_strain VARCHAR,
   control VARCHAR,
@@ -33,10 +33,10 @@ CREATE TABLE fall2021.expression_metadata (
   display_expression_table VARCHAR,
   PRIMARY KEY(ncbi_geo_id, pubmed_id, time_value)
 );
-CREATE TABLE fall2021.expression (
+CREATE TABLE gene_expression.expression (
   gene_id VARCHAR,
   taxon_id VARCHAR,
-  FOREIGN KEY (gene_id, taxon_id) REFERENCES fall2021.gene(gene_id, taxon_id),
+  FOREIGN KEY (gene_id, taxon_id) REFERENCES gene_expression.gene(gene_id, taxon_id),
   -- ncbi_geo_id VARCHAR,
   -- pubmed_id VARCHAR,
   sort_index INT,
@@ -45,27 +45,27 @@ CREATE TABLE fall2021.expression (
   time_point FLOAT,
   dataset VARCHAR, 
   PRIMARY KEY(gene_id, sample_id)
-  -- FOREIGN KEY (ncbi_geo_id, pubmed_id, time_point) REFERENCES fall2021.expression_metadata(ncbi_geo_id, pubmed_id, time_value)
+  -- FOREIGN KEY (ncbi_geo_id, pubmed_id, time_point) REFERENCES gene_expression.expression_metadata(ncbi_geo_id, pubmed_id, time_value)
 ); 
-CREATE TABLE fall2021.degradation_rate (
+CREATE TABLE gene_expression.degradation_rate (
   gene_id VARCHAR,
   taxon_id VARCHAR,
-  FOREIGN KEY (gene_id, taxon_id) REFERENCES fall2021.gene(gene_id, taxon_id),
+  FOREIGN KEY (gene_id, taxon_id) REFERENCES gene_expression.gene(gene_id, taxon_id),
   ncbi_geo_id VARCHAR,
   pubmed_id VARCHAR,
-  FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES fall2021.ref(ncbi_geo_id, pubmed_id),
+  FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES gene_expression.ref(ncbi_geo_id, pubmed_id),
   PRIMARY KEY(gene_id, ncbi_geo_id, pubmed_id),
   degradation_rate FLOAT
 );
 
-CREATE TABLE fall2021.production_rate (
+CREATE TABLE gene_expression.production_rate (
   gene_id VARCHAR,
   taxon_id VARCHAR,
-  FOREIGN KEY (gene_id, taxon_id) REFERENCES fall2021.gene(gene_id, taxon_id),
+  FOREIGN KEY (gene_id, taxon_id) REFERENCES gene_expression.gene(gene_id, taxon_id),
   ncbi_geo_id VARCHAR,
   pubmed_id VARCHAR,
-  FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES fall2021.ref(ncbi_geo_id, pubmed_id),
+  FOREIGN KEY (ncbi_geo_id, pubmed_id) REFERENCES gene_expression.ref(ncbi_geo_id, pubmed_id),
   PRIMARY KEY(gene_id, ncbi_geo_id, pubmed_id),
   production_rate FLOAT
-  -- FOREIGN KEY (gene_id, ncbi_geo_id, pubmed_id) REFERENCES fall2021.degradation_rate(gene_id, ncbi_geo_id, pubmed_id) -- not sure if we want to link the generated production rate to it's original degradation rate
+  -- FOREIGN KEY (gene_id, ncbi_geo_id, pubmed_id) REFERENCES gene_expression.degradation_rate(gene_id, ncbi_geo_id, pubmed_id) -- not sure if we want to link the generated production rate to it's original degradation rate
 );
@@ -45,7 +45,7 @@ def convert_int(potential_int):
 This program Loads Refs into the database
 """
 def LOAD_REFS():
-    print('COPY fall2021.ref (pubmed_id, authors, publication_year, title, doi, ncbi_geo_id) FROM stdin;')
+    print('COPY gene_expression.ref (pubmed_id, authors, publication_year, title, doi, ncbi_geo_id) FROM stdin;')
     REFS_SOURCE = '../script-results/processed-expression/refs.csv'
     with open(REFS_SOURCE, 'r+') as f:
         reader = csv.reader(f)
@@ -67,7 +67,7 @@ def LOAD_REFS():
 This program Loads ID Mapping into the database
 """
 def LOAD_GENES():
-    print('COPY fall2021.gene (gene_id, display_gene_id, species, taxon_id) FROM stdin;')
+    print('COPY gene_expression.gene (gene_id, display_gene_id, species, taxon_id) FROM stdin;')
     GENE_SOURCE = '../script-results/processed-expression/genes.csv'
     with open(GENE_SOURCE, 'r+') as f:
         reader = csv.reader(f)
@@ -87,7 +87,7 @@ def LOAD_GENES():
 This program Loads Expression Metadata into the database
 """
 def LOAD_EXPRESSION_METADATA():
-    print('COPY fall2021.expression_metadata (ncbi_geo_id, pubmed_id, control_yeast_strain, treatment_yeast_strain, control, treatment, concentration_value, concentration_unit, time_value, time_unit, number_of_replicates, expression_table) FROM stdin;')
+    print('COPY gene_expression.expression_metadata (ncbi_geo_id, pubmed_id, control_yeast_strain, treatment_yeast_strain, control, treatment, concentration_value, concentration_unit, time_value, time_unit, number_of_replicates, expression_table) FROM stdin;')
     EXPRESSION_METADATA_SOURCE = '../script-results/processed-expression/expression-metadata.csv'
     with open(EXPRESSION_METADATA_SOURCE, 'r+') as f:
         reader = csv.reader(f)
@@ -116,7 +116,7 @@ def LOAD_EXPRESSION_METADATA():
 This program Loads Expression Data into the database
 """
 def LOAD_EXPRESSION_DATA():
-    print('COPY fall2021.expression (gene_id, taxon_id, sort_index, sample_id, expression, time_point, dataset) FROM stdin;')
+    print('COPY gene_expression.expression (gene_id, taxon_id, sort_index, sample_id, expression, time_point, dataset) FROM stdin;')
     EXPRESSION_DATA_SOURCE = '../script-results/processed-expression/expression-data.csv'
     with open(EXPRESSION_DATA_SOURCE, 'r+') as f:
         reader = csv.reader(f)
@@ -140,7 +140,7 @@ def LOAD_EXPRESSION_DATA():
 This program Loads Production Rates into the database
 """
 def LOAD_PRODUCTION_RATES():
-    print('COPY fall2021.production_rate (gene_id, taxon_id, ncbi_geo_id, pubmed_id, production_rate) FROM stdin;')
+    print('COPY gene_expression.production_rate (gene_id, taxon_id, ncbi_geo_id, pubmed_id, production_rate) FROM stdin;')
     PRODUCTION_RATES_SOURCE = '../script-results/processed-expression/production-rates.csv'
     with open(PRODUCTION_RATES_SOURCE, 'r+') as f:
         reader = csv.reader(f)
@@ -161,7 +161,7 @@ def LOAD_PRODUCTION_RATES():
 This program Loads Degradation Rates into the database
 """
 def LOAD_DEGRADATION_RATES():
-    print('COPY fall2021.degradation_rate (gene_id, taxon_id, ncbi_geo_id, pubmed_id, degradation_rate) FROM stdin;')
+    print('COPY gene_expression.degradation_rate (gene_id, taxon_id, ncbi_geo_id, pubmed_id, degradation_rate) FROM stdin;')
     DEGRADATION_RATES_SOURCE = '../script-results/processed-expression/degradation-rates.csv'
     with open(DEGRADATION_RATES_SOURCE, 'r+') as f:
         reader = csv.reader(f)
 
@@ -6,9 +6,9 @@ All files pertaining the network database live within this directory.
 
 ### Schema
 
-All network data is stored within the spring2022_network schema on our Postgres database.
+All network data is stored within the gene_regulatory_network schema on our Postgres database.
 
-The schema is located within this directory at the top level in the file `schema.sql`. It defines the tables located within the spring2022_network schema. 
+The schema is located within this directory at the top level in the file `schema.sql`. It defines the tables located within the gene_regulatory_network schema. 
 
 Usage:
 To load to local database
@@ -32,10 +32,13 @@ Within the scripts directory, there are the following files:
 
 - `generate_network.py`
 - `loader.py`
+- `generate_new_network_verion.py`
+- `loader_updates.py`
 - `filter_genes.py`
 - `generate_sgd_network_from_yeastract_network.py`
 
-#### Network Generator (and data preprocessor)
+
+#### Network Generator (and data preprocessor) (FOR FRESH DATABASE INSTALLS ONLY)
 
 This script (`generate_network.py`) is a two-for-one. It first uses the yeastmine service from the SGD database to query for all regulator genes relating to Saccharomyces cerevisiae. From there it gets all all of the targets for each regulator gene. We then construct two networks from these connections (a regulator by regulator matrix as well as a regulator by target matrix). We also construct the processed loader files, so that they are ready to load using `loader.py`.
 
@@ -47,7 +50,7 @@ Usage:
 ```
 python3 generate_network.py
 ```
-#### Database Loader
+#### Database Loader (FOR FRESH DATABASE INSTALLS ONLY)
 
 This script (`loader.py`) is to be used to load your preprocessed genes into the database. 
 
@@ -62,6 +65,35 @@ To load to production database
 ```
 python3 loader.py | psql <address to database>
 ```
+#### Network Generator (and data preprocessor) (FOR UPDATES TO EXISTING DATABASE ONLY)
+
+This script (`generate_new_network_verion.py`) is similar to its counterpart `generate_network.py`. It gets all existing genes in the database using the environment variable 'DB_URL'. You can set this environment variable on the terminal right before the command.  It uses the yeastmine service from the SGD database to query for all regulator genes relating to Saccharomyces cerevisiae. From there it gets all all of the targets for each regulator gene. We then construct two networks from these connections (a regulator by regulator matrix as well as a regulator by target matrix). We then see if the genes in the newly constructed network have any updates (i.e a gene's standard name was set or a new gene was added to the database). We also construct the processed loader files, so that they are ready to load using `loader_updates.py`.
+
+The resulting network matrices are located in `script-results/networks` and the resulting processed loader files are located within `script-results/processed-loader-files`
+
+Make sure to have all dependencies installed beforehand or you will recieve errors. (pip3 install intermine, tzlocal, etc. [see file for all imports]
+
+Usage: 
+```
+DB_URL="postgresql://[<db_user>:<password>]@<address to database>/<database name>" python3 generate_new_network_version.py
+```
+#### Database Loader (FOR UPDATES TO EXISTING DATABASE ONLY)
+
+This script (`loader_updates.py`) is to be used to load your preprocessed genes into the database. 
+
+This program generates direct SQL statements from the source files generated by the network generator in order to populate a relational database with those files’ data as well as make any needed updates to existing genes within the database. If necessary you will be prompted to enter a password.
+
+Usage: 
+To load to local database
+```
+python3 loader_updates.py | psql postgresql://localhost/postgres
+```
+To load to production database
+```
+python3 loader_updates.py | psql -h <grnsight database link> -U <user> <database name>
+
+```
+
 
 #### Filter Genes (beta functionality, not tested)
 
 
@@ -1,25 +1,25 @@
-CREATE TABLE spring2022_network.source (
+CREATE TABLE gene_regulatory_network.source (
   time_stamp TIMESTAMP WITH TIME ZONE,
   source VARCHAR,
-  source_display_name VARCHAR,
+  display_name VARCHAR,
   PRIMARY KEY(time_stamp, source)
 );
 
-CREATE TABLE spring2022_network.gene (
+CREATE TABLE gene_regulatory_network.gene (
   gene_id VARCHAR, -- systematic like name
   display_gene_id VARCHAR, -- standard like name
   species VARCHAR,
   taxon_id VARCHAR,
   regulator BOOLEAN,
   PRIMARY KEY(gene_id, taxon_id)
 ); 
-CREATE TABLE spring2022_network.network (
+CREATE TABLE gene_regulatory_network.network (
   regulator_gene_id VARCHAR,
   target_gene_id VARCHAR,
   taxon_id VARCHAR,
   time_stamp TIMESTAMP WITH TIME ZONE,
   source VARCHAR,
-  FOREIGN KEY (regulator_gene_id, taxon_id) REFERENCES spring2022_network.gene(gene_id, taxon_id),
-  FOREIGN KEY (target_gene_id, taxon_id) REFERENCES spring2022_network.gene(gene_id, taxon_id),
-  FOREIGN KEY (time_stamp, source) REFERENCES spring2022_network.source(time_stamp, source)
+  FOREIGN KEY (regulator_gene_id, taxon_id) REFERENCES gene_regulatory_network.gene(gene_id, taxon_id),
+  FOREIGN KEY (target_gene_id, taxon_id) REFERENCES gene_regulatory_network.gene(gene_id, taxon_id),
+  FOREIGN KEY (time_stamp, source) REFERENCES gene_regulatory_network.source(time_stamp, source)
 ); 
@@ -13,7 +13,7 @@
                                   port="5432",
                                   database="postgres")
     cursor = connection.cursor()
-    postgreSQL_select_Query = "select * from spring2022_network.gene"
+    postgreSQL_select_Query = "select * from gene_regulatory_network.gene"
 
     cursor.execute(postgreSQL_select_Query)
     print("Selecting rows from gene table using cursor.fetchall")
 
@@ -140,7 +140,7 @@ def create_regulator_to_target_row(target, all_regulators):
 # Source Table
 
 SOURCE_DESTINATION = '../script-results/processed-loader-files/source.csv'
-timestamp = datetime.datetime.now(datetime.timezone.utc)
+timestamp = datetime.datetime.now(datetime.timezone.utc).replace(microsecond=0)
 
 source = "YeastMine - Saccharomyces Genome Database"
 display_name = "Yeastmine - SGD"