diff --git a/3D clustering.R b/3D clustering.R
new file mode 100644
index 0000000..7785ed8
--- /dev/null
+++ b/3D clustering.R	
@@ -0,0 +1,275 @@
+# modeling Yadav et al. 2012 cluster analysis; tailored towards clustering of attachment points
+# initially coded by S. Johnson- updates and 3D analysis of spine heads added by K. Nett
+
+
+library(dplyr)          # to install these , enter install.packages("package name") into the console in RStudio. replace package name with each package name in quotes, ie install.packages("dplyr")
+library(ggplot2)
+library(lattice)
+library(latticeExtra)
+library(cluster)
+library(stats)
+library(readr)
+library(tidyr)
+library(bio3d) #install.packages("bio3d", dependencies = TRUE)
+library(DECIPHER)
+#how to install DECIPHER - a is enter in response to prompt for "all"
+#source("https://bioconductor.org/biocLite.R")
+#biocLite("DECIPHER")
+#a
+
+
+fileChosen <- file.choose()     # opens file dialog to open 
+filePath <- dirname(fileChosen) # gets the directory name that that file is in
+setwd(filePath)           # sets the working directory. important for saving files later
+fileList <- list.files(filePath)  # a list of files in the directory to go through
+fileList <- grep(".csv", fileList, value = TRUE) # finds only lists .csv files in case there are other types
+data_all <- data.frame()  # initializes a data.frame that will store all of the data for the experiment
+
+
+
+
+for(i in 1:length(fileList)){  ##add back in 'i' when adding for loop back in
+  fileName <- unlist(strsplit(fileList[i], "[.]"))[1]   # removes ".csv" from file name
+  fileLoc <- file.path(filePath, fileList[i])           # creates a path to each file (differs from FileChosen as the loop runs)
+  df <- read_csv(fileLoc, col_types = cols()) # reads the csv file, suppresses output information about columns
+  colnames(df) <- gsub("-", "_", colnames(df)) # R doesn't like dashes in column names, replaces "-" with "_" for all colnames of df
+  df <- df[complete.cases(df$SOMA_DISTANCE),] # removes cases where there is no soma distance data
+  df <- df[complete.cases(df$RAYBURST_VOLUME),] # removes cases where there is no data
+  df <- df[complete.cases(df$MAX_DTS),] # removes cases where there is no  data
+  df$file <- fileName # adds a column with repeated information about which file that spine came from
+  total_length <- df %>% group_by(SECTION_NUMBER)  %>% summarise(section_length=max(SECTION_LENGTH)) %>%ungroup() %>% summarise(total_length=sum(section_length)) %>% as.double() #this uses the dplyr package to take the data.frame and group it by section. Then it finds the length of each section before adding them all together
+  total_spines <- as.numeric(nrow(df)) # number of rows = number of spines
+  density_overall <- total_spines/total_length  # calculate density of segment
+  density_mushroom <- sum(as.numeric(df$TYPE=="mushroom"))/total_length  # counts the number of "mushroom" rows and divides by length for mushroom density
+  density_thin <- sum(as.numeric(df$TYPE=="thin"))/total_length  # same as above, with "thin"
+  density_stubby <- sum(as.numeric(df$TYPE=="stubby"))/total_length # same as above, with "stubby"
+  df <- df[order(df$SOMA_DISTANCE),]  #order data by column SOMA_DISTANCE so that coordinates pulled are in spine attachment order
+  
+  data_coords <- data.frame(df$X, df$Y, df$Z) # creates data frame with spine head coordinates
+
+# Shane's original clustering analysis using spine attachment distance from soma  
+    
+  agn <- agnes(df$SOMA_DISTANCE,metric = "euclidean", method = "average") # runs the agnes, computes agglomerative hierarchical clustering, "average" = UPGMA
+  dist_ac <- as.matrix(dist(df$SOMA_DISTANCE))  #creates a distance matrix of soma distances
+  df$nn_dist_ac <- apply(dist_ac,2, function(x) sort(x)[2])   # finds nearest neighbor for each spine
+  df$nn2_dist_ac <- apply(dist_ac,2, function(x) sort(x)[3])  # finds second nearest neighbor
+  df$nn3_dist_ac <- apply(dist_ac,2, function(x) sort(x)[4])  # finds third
+  dendrite_ac <- agn$ac # defines agglomeration coefficent of the dendrite
+  
+  ac_test <- list() #initializes list for ac's from next for loop  
+  possible_dist <- seq(1,total_length, by=0.01) # create list of possible soma distances by 0.01 increments to pull from
+  
+ #  for(k in 1:5000){
+  #  test_data <- sample(possible_dist, total_spines) # take a sample from all possible locations on dendrite to match total number of spines
+  #  test_dist <- as.matrix(dist(test_data)) #creates distance matrix for random sample
+  #  assign("test_cluster", agnes(test_data,metric = "euclidean", method = "average")) #runs UPGMA on sample and labels the agn output "test_cluster"
+   # ac_test <- rbind(ac_test,test_cluster$ac) # add row to ac using the 'test_cluster' ac
+ # } 
+  
+#  cScore_ac_1D <- sum(as.numeric(ac_test<dendrite_ac))/5000 # average (divide by 1000 samples) how many times random ac is smaller than dendrite_ac, the smaller the value, the more "clustering"
+ # df$c_score_ac_1D <- cScore_ac_1D #add a row to df with the cScore of the dendrite
+#end Shane's original analysis
+  
+
+  
+#1D clustering (from spine attachment points)
+#  dist_1D <- as.matrix(dist(df$SOMA_DISTANCE)) # creates a distance matrix using distance from soma, same as dist_ac, but keeping separate for different analysis for now
+#  UPGMA_obsv_1D <- IdClusters(dist_1D, method = "UPGMA", cutoff=0.75, showPlot=TRUE)  #runs cluster analysis with cutoff from Yadav paper on observed spines
+#  UPGMA_obsv_1D$rn <- rownames(UPGMA_obsv_1D) # adds a column with row names to keep spine ID, not sure if this step is necessary
+#  cluster_all_1D <- cbind(UPGMA_obsv_1D, df$SOMA_DISTANCE)  # adds cluster number, spine ID(row number) and corresponding distance from soma together
+#  cluster_all_1D <- cluster_all_1D[order(cluster_all_1D$cluster),] #orders data by cluster number
+#  cluster_freq_1D <- table(cluster_all_1D$cluster) #creates a table with how many spines are in each cluster
+#  cluster_freq_1D <- as.data.frame(cluster_freq_1D) #converts above table to a data frame
+#  cluster_freq_1D$Freq <- as.numeric(cluster_freq_1D$Freq) # turns the "cluster" column to a number
+#  cluster_freq_1D$is_clustered <- as.numeric(cluster_freq_1D$Freq > 1) #returns 1 if there is >1 spine in a cluster and 0 if not-- therefore all 1s reflect a true "cluster" since it has more than 1 spine in ity 
+#  num_clusters_1D <- sum(cluster_freq_1D$is_clustered) #count how many clusters are on this segment
+  
+#  spines_clustered_1D <- sum(cluster_freq_1D$Freq>1)
+
+#  spines_clustered_1D <- as.matrix(spines_clustered_1D) #converts # of spines clustered to matrix
+#  spines_clustered_1D <- as.numeric(spines_clustered_1D) #converts to numerical form
+#  spines_clustered_1D[is.na(spines_clustered_1D)] <- 0
+#  spines_not_1D <- as.numeric(total_spines - spines_clustered_1D) #define and calculate number of spines not clustered (total spines minus number of spines clustered)
+#  
+#  test_spines_clustered_all_1D <- data.frame() #initialize dataframe to contain # of spines in a cluster for all random samples
+#  test_num_clusters_all_1D <- data.frame()
+  
+# 1D random spines for-loop
+#  for(j in 1:10000){
+#    random_spines_1D <- sample(possible_dist, total_spines) # take a sample from all possible locations on dendrite to match total number of spines
+#    test_dist_1D <- as.matrix(dist(random_spines_1D)) #creates distance matrix for random sample
+#    UPGMA_test_1D <- IdClusters(test_dist_1D, method="UPGMA", cutoff=0.75, showPlot=FALSE) #runs UPGMA with cutoff (same as with obsv but with random sample)
+#   UPGMA_test_1D$rn <- rownames(UPGMA_test_1D) #add row name (spine ID) to cluster number  
+#    cluster_all_test_1D <- cbind(UPGMA_test_1D, random_spines_1D) #add cluster number/spine ID to randomly generated soma distances
+#   cluster_all_test_1D <- cluster_all_test_1D[order(cluster_all_test_1D$cluster),] #order by cluster number
+#    cluster_freq_test_1D <- table(cluster_all_test_1D$cluster) #generate table with number of spines in each cluster  
+#    cluster_freq_test_1D <- as.data.frame(cluster_freq_test_1D) #change table to data frame 
+#    cluster_freq_test_1D$is_clustered <- as.numeric(cluster_freq_test_1D$Freq > 1) #ask whether cluster has >1 spines in it (1 for yes, 0 for no)
+#   num_clusters_test_1D <- sum(cluster_freq_test_1D$is_clustered) # add how many 1s (or how many clusters have >1 spines) to get true number of clusters
+#    num_clusters_test_1D[is.na(num_clusters_test_1D)] <- 0 #changes possible NA from 0 clusters to 0
+#    test_num_clusters_all_1D <- rbind(test_num_clusters_all_1D, num_clusters_test_1D) #adds total number of clusters to running list
+#    spines_clustered_test_1D <- sum(cluster_freq_test_1D$Freq>1)
+#
+ #   spines_not_test_1D <- as.numeric(total_spines - spines_clustered_test_1D) # calculate how many spines are not clustered
+#    spines_clustered_test_1D[is.na(spines_clustered_test_1D)] <- 0  #returns 0 instead of Na if no spines are clustered in the random sample
+#    test_spines_clustered_all_1D <- rbind(test_spines_clustered_all_1D, spines_clustered_test_1D) #add number of clustered spines to running list 
+#    
+#  } # end of random spines 1D for-loop
+  
+#  test_spines_clustered_all_1D <- as.matrix(test_spines_clustered_all_1D) #changes data.frame to matrix (not sure if this is neccesary but it works)
+#  test_spines_clustered_all_1D <- as.numeric(test_spines_clustered_all_1D) #changes all vales to numeric (again, not sure if neccessary)
+#  
+  
+#  std_test_1D <- sd(test_spines_clustered_all_1D) #generates STD of total number of spines clustered in entire random sample
+#  mean_test_1D <- mean(test_spines_clustered_all_1D) #generates average number of spines in a cluster in random data
+# 
+#  curve_dnorm_1D <- dnorm(test_spines_clustered_all_1D, mean_test_1D, std_test_1D) #gives probability density function, or height of probability distribution at each point(height = frequency)
+#  std_curve_1D <- sd(curve_dnorm_1D)
+#  mean_curve_1D <- mean(curve_dnorm_1D)
+  
+#  Cscore_1D <- pnorm(spines_clustered_1D, mean_test_1D, std_test_1D) #calculates the probability that given the random sample distribution (curve mean+SD) the total number of spines observed is higher
+  #therefore, the closer to 1, higher probability that a given number has more clustered spines than the random normal distribution and vice versa
+  
+ # add all 1D data to all data 
+#  cluster_data_1D <- data.frame()  
+#  cluster_data_1D <- rbind(cluster_data_1D, num_clusters_1D)
+#  cluster_data_1D <- cbind(cluster_data_1D, spines_clustered_1D)
+#  cluster_data_1D <- cbind(cluster_data_1D, spines_not_1D)
+#  cluster_data_1D <- cbind(cluster_data_1D, Cscore_1D)
+#  cluster_data_1D[is.na(cluster_data_1D)] <- 0
+#  colnames(cluster_data_1D) <- c("# of clusters - 1D", "spines clustered - 1D", "spines not clustered - 1D", "Cscore - 1D")
+  
+  
+# 3D clustering analysis  
+  dist_3D <- as.matrix(dist.xyz(data_coords)) # creates distance matrix of spine head coordinates
+  UPGMA_obsv_3D <- IdClusters(dist_3D, method = "UPGMA", cutoff=0.75, showPlot=TRUE) #run cluster analysis with cutoff used in Yadav paper
+  #gives cluster number associated with which spine (i.e. spines 25 and 26 are in cluster 1)
+  UPGMA_obsv_3D$rn <- rownames(UPGMA_obsv_3D)   #adds a column with rownows to keep spine ID, not sure if this step is neccessary
+  cluster_all_3D <- cbind(UPGMA_obsv_3D, data_coords)   #get the coordinates for each spine ID, if kept in row name/number order, will correctly correspond to each spine
+  cluster_all_3D <- cluster_all_3D[order(cluster_all_3D$cluster),] #sort data frame by cluster number
+  cluster_freq_3D <- table(cluster_all_3D$cluster) #create a table counting how many times each cluster Variable occurs (i.e. how many spines in each cluster)
+  cluster_freq_3D <- as.data.frame(cluster_freq_3D)
+  cluster_freq_3D$Freq <- as.numeric(cluster_freq_3D$Freq)
+  cluster_freq_3D$is_clustered <- as.numeric(cluster_freq_3D$Freq >1) # create column where 1 means there is more than one spine in a cluster or 0 if just 1
+  num_clusters_3D <- sum(cluster_freq_3D$is_clustered) #count how many 1s to determine how many clusters (spines > 1) in the segment
+  spines_clustered_3D <- cluster_freq_3D %>% 
+    filter(is_clustered == 1) %>% 
+    summarise(spines_clustered_3D = sum(Freq))
+  null_length <- nrow(spines_clustered_3D)
+  ifelse(null_length==0, spines_clustered_3D <- 0, NA)
+  spines_clustered_3D <- as.numeric(spines_clustered_3D) #converts to numerical form
+  spines_not_3D <- as.numeric(total_spines - spines_clustered_3D)
+  
+  test_spines_clustered_all_3D <- data.frame()
+  test_num_clusters_all_3D <- data.frame()
+
+  
+# 3D random spines for loop
+  for(j in 1:10){
+    test_data_X <- data.frame(sample(df$X), df$Y, df$Z) # randomize the X's, Y's, and Z's to make a "biologically plausible" dataframe.
+    colnames(test_data_X) <- c( "x", "Y", "Z")
+    test_data_Y <- data.frame(df$X, sample(df$Y), df$Z)
+    colnames(test_data_Y) <- c( "x", "Y", "Z")
+    test_data_Z <- data.frame(df$X, df$Y, sample(df$Z))
+    colnames(test_data_Z) <- c( "x", "Y", "Z")
+    test_data <- rbind(test_data_X, test_data_Y, test_data_Z)
+    test_data_final <-data.frame(sample_n(test_data, total_spines))
+    test_dist_3D <- as.matrix(dist(test_data_final)) #creates distance matrix for random sample
+
+    UPGMA_test_3D <- IdClusters(test_dist_3D, method = "UPGMA", cutoff=0.75, showPlot=TRUE) #run cluster analysis with cutoff used in Yadav paper
+    #gives cluster number associated with which spine (i.e. spines 25 and 26 are in cluster 1)
+    UPGMA_test_3D$rn <- rownames(UPGMA_test_3D)   #adds a column with rownows to keep spine ID, not sure if this step is neccessary
+    cluster_all_test_3D <- cbind(UPGMA_test_3D, test_data_final)   #get the coordinates for each spine ID, if kept in row name/number order, will correctly correspond to each spine
+    cluster_all_test_3D <- cluster_all_test_3D[order(cluster_all_test_3D$cluster),] #sort data frame by cluster number
+    cluster_freq_test_3D <- table(cluster_all_test_3D$cluster) #create a table counting how many times each cluster Variable occurs (i.e. how many spines in each cluster)
+    cluster_freq_test_3D <- as.data.frame(cluster_freq_test_3D)
+    cluster_freq_test_3D$is_clustered <- as.numeric(cluster_freq_test_3D$Freq >1) # create column where 1 means there is more than one spine in a cluster or 0 if just 1
+    num_clusters_test_3D <- sum(cluster_freq_test_3D$is_clustered)
+    num_clusters_test_3D[is.na(num_clusters_test_3D)] <- 0
+    test_num_clusters_all_3D <- rbind(test_num_clusters_all_3D, num_clusters_test_3D)
+    spines_clustered_test_3D <- cluster_freq_test_3D %>% group_by(is_clustered) %>% filter(is_clustered == 1) %>% summarise(spines_clustered_test_3D = sum(Freq))
+    spines_not_test_3D <- as.numeric(total_spines - spines_clustered_test_3D)
+    
+    
+    spines_clustered_test_3D[is.na(spines_clustered_test_3D)] <- 0
+    test_spines_clustered_all_3D <- rbind(test_spines_clustered_all_3D, spines_clustered_test_3D)
+    
+  } # end of random spines 3D for-loop
+  
+  
+  test_spines_clustered_all_3D <- as.numeric(as.matrix(test_spines_clustered_all_3D))
+
+  
+  std_test_3D <- sd(test_spines_clustered_all_3D)
+  mean_test_3D <- mean(test_spines_clustered_all_3D)
+  curve_dnorm_3D <- dnorm(test_spines_clustered_all_3D, mean_test_3D, std_test_3D)
+  std_curve_3D <- sd(curve_dnorm_3D)
+  mean_curve_3D <- mean(curve_dnorm_3D)
+  
+  Cscore_3D <- pnorm(spines_clustered_3D, mean_test_3D, std_test_3D)
+  
+  
+  df$num_clusters_3D <- num_clusters_3D
+  df$spines_clustered_3D <- spines_clustered_3D
+  df$spines_not_3D <- spines_not_3D
+  df$Cscore_3D <- Cscore_3D
+  df$density_overall <- density_overall # add these data to df
+  df$density_mushroom <- density_mushroom
+  df$density_thin <- density_thin
+  df$density_stubby <- density_stubby
+  data_all <- rbind(data_all, df) # adds df as next row in the data_all file
+  
+  
+#  data_all <- cbind(data_all, cluster_data_1D)
+#  data_all <- cbind(data_all, cluster_data_3D)
+
+  
+} # end of file for-loop 
+
+data_all$animal_num <- lapply(data_all$file, function(x) unlist(strsplit(x, "-"))[2]) # this pulls out an animal number from the file number
+data_all$retro_label <- substring(data_all$animal_num, nchar(data_all$animal_num), nchar(data_all$animal_num))  #pulls off 'L' or 'N' from ID to indicate whether cell was retro-gradely labeled
+#data_all$PDB <- substring(data_all$animal_num, nchar(data_all$animal_num), nchar(data_all$animal_num))
+data_all$PDB <- lapply(data_all$file, function(x) unlist(strsplit(x, "-"))[3]) # pulls dendrite location out of name and adds column
+data_all$PDB <- replace(data_all$PDB, data_all$PDB=="p", "prox")
+data_all$PDB <- replace(data_all$PDB, data_all$PDB=="d", "dist")
+data_all$PDB <- replace(data_all$PDB, data_all$PDB=="b", "basal")
+data_all$retro_label <- replace(data_all$retro_label, data_all$retro_label=="L", "labeled") 
+data_all$retro_label <- replace(data_all$retro_label, data_all$retro_label=="N", "not labeled")
+data_all$stack <- lapply(data_all$file, function(x) unlist(strsplit(x, "-"))[4]) #gives letter ID of different imaging days
+data_all$stack <- unlist(data_all$stack)
+data_all$animal_num <- lapply(data_all$animal_num, function(x) unlist(strsplit(x, "L"))[1]) #removes letter from behind animal ID name
+data_all$animal_num <- lapply(data_all$animal_num, function(x) unlist(strsplit(x, "N"))[1])
+data_all$RAYBURST_VOLUME <- as.numeric(data_all$RAYBURST_VOLUME)
+data_all$MAX_DTS <- as.numeric(data_all$MAX_DTS)
+
+n <- readline(prompt="Enter group name:") # gives prompt on console to enter animal group
+data_all$group <- n # adds the group to the master data file
+
+data_all$animal_num <- unlist(data_all$animal_num) #changes animal number to a vector rather than a list, which is important for executing the following task
+data_all$PDB <- unlist(data_all$PDB) # same as above
+
+TYPE_ave <- data_all %>% group_by(group, animal_num, retro_label, TYPE) %>% summarise(Cscore_1D = mean(c_score_1D), nn_1D = mean(nn_dist_1D), nn2_1D = mean(nn2_dist_1D), nn3_1D = mean(nn3_dist_1D), Cscore_3D = mean(c_score_3D), nn_3D = mean(nn_dist_3D), nn2_3D = mean(nn2_dist_3D), nn3_3D = mean(nn3_dist_3D), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
+PDB_TYPE_ave <- data_all %>% group_by(group, animal_num, stack, retro_label, PDB, TYPE) %>% summarise(Cscore_1D = mean(c_score_1D), nn_1D = mean(nn_dist_1D), nn2_1D = mean(nn2_dist_1D), nn3_1D = mean(nn3_dist_1D), Cscore_3D = mean(c_score_3D), nn_3D = mean(nn_dist_3D), nn2_3D = mean(nn2_dist_3D), nn3_3D = mean(nn3_dist_3D), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
+
+PDB_ave <- data_all %>% group_by(group, animal_num, retro_label, PDB) %>% summarise(Cscore_1D = mean(c_score_1D), nn_1D = mean(nn_dist_1D), nn2_1D = mean(nn2_dist_1D), nn3_1D = mean(nn3_dist_1D), Cscore_3D = mean(c_score_3D), nn_3D = mean(nn_dist_3D), nn2_3D = mean(nn2_dist_3D), nn3_3D = mean(nn3_dist_3D), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
+# ^first groups by treatment group, then by animal, then by day of image (i.e. -F), then by dendrite location
+# summarise then finds averages for each group for all various pieces of data
+
+animal_ave <- data_all %>% group_by(group, animal_num) %>% summarise(Cscore_1D = mean(c_score_1D), nn_1D = mean(nn_dist_1D), nn2_1D = mean(nn2_dist_1D), nn3_1D = mean(nn3_dist_1D), Cscore_3D = mean(c_score_3D), nn_3D = mean(nn_dist_3D), nn2_3D = mean(nn2_dist_3D), nn3_3D = mean(nn3_dist_3D), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
+# groups by treatment then animal, then finds averages per animal for various data
+
+labeled_ave <- data_all %>% group_by(group, animal_num, retro_label) %>% summarise(Cscore_1D = mean(c_score_1D), nn_1D = mean(nn_dist_1D), nn2_1D = mean(nn2_dist_1D), nn3_1D = mean(nn3_dist_1D), Cscore_3D = mean(c_score_3D), nn_3D = mean(nn_dist_3D), nn2_3D = mean(nn2_dist_3D), nn3_3D = mean(nn3_dist_3D), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
+# groups by treatment then animal, then finds averages per animal for various data
+
+Cscore3D_avg <- data_all %>% group_by(animal_num, retro_label) %>% summarise(Cscore_3D = mean(Cscore_3D))
+
+#dir.create("analysis")  #creates a directory to create a file on the computer
+#savePath <- paste(filePath,"/", "analysis", collapse = "/", sep="") # creates the path where files can be saved
+#setwd(savePath) # sets working directory to the path created above
+#write.csv(data_all, "data_all.csv")
+#write.csv(PDB_ave, "PDB averages.csv")
+#write.csv(animal_ave, "Animal Averages.csv")
+#write.csv(TYPE_ave, "spine_type_averages.csv")
+#write.csv(PDB_TYPE_ave, "PDB_spine_type_averages.csv")
+#write.csv(labeled_ave, "labeled_averages.csv")
+
diff --git a/3D-update b/3D-update
new file mode 100644
index 0000000..038d718
--- /dev/null
+++ b/3D-update
@@ -0,0 +1 @@
+testing
diff --git a/folder_loop.R b/folder_loop.R
new file mode 100644
index 0000000..77f8ac4
--- /dev/null
+++ b/folder_loop.R
@@ -0,0 +1,683 @@
+#loop attempt with raw_data_cleanup.R again
+
+library(dplyr)           
+library(ggplot2)        
+library(lattice)
+library(latticeExtra)
+library(cluster)
+library(stats)
+library(readr)
+library(tidyr)
+library(bio3d)
+library(DECIPHER)
+library(svDialogs)
+library(rowr)
+library(tcltk)
+
+
+parent.folder <- tk_choose.dir(default = "", caption = "select folder")
+sub.folders <- list.dirs(parent.folder, recursive = TRUE)[-1]
+setwd(parent.folder)
+saveFolder <- dirname(parent.folder)
+
+for(i in 1:length(sub.folders)) {
+setwd(parent.folder)
+  fileChosen <- sub.folders[i]
+
+
+
+
+#for(i in 1:length(sub.folders)) {
+#fileChosen <-  sub.folders[i]
+
+
+# Initialize Code: Select file(s) and define group
+
+#fileChosen <- file.choose()                                             # opens a folder dialog box to select first file in folder containing data from 1 treatment group 
+filePath   <- dirname(fileChosen)                                       # gets the directory name that that file is in
+setwd(filePath)                                                         # sets the working directory. important for saving files later
+fileList   <- list.files(fileChosen)                                      # list the files in the directory to go through
+fileList   <- grep(".csv", fileList, value = TRUE)                      # finds only lists .csv files in case there are other types
+data_all   <- data.frame()                                              # initializes data frame to store data throughout the for-loop
+#rat_ID     <- dlgInput("Enter Rat ID #", Sys.info()["user"])$res       # prompt given in console to enter the animal treatment group (i.e. Sal-D0)
+
+
+
+# File list for-loop: Apply code to each file in fileList
+
+for(i in 1:length(fileList))  {                                         # peform code inside {} for each .csv file in the file list
+  fileName     <- unlist(strsplit(fileList[i], "[.]"))[1]               # remove '.csv' from the file name
+  fileLoc      <- file.path(fileChosen, fileList[i])                      # create path to file (will differ from initial FileChosen as loop progresses)
+  df           <- read_csv(fileLoc, col_types = cols())                 # read the csv file, uses column names from NeuronStudio file
+  colnames(df) <- gsub("-", "_", colnames(df))                          # replaces dashes ('-') w/ underscores ('_') for column names (R doesn't like dashes)
+  df           <- df[complete.cases(df$SOMA_DISTANCE) ,]                # removes cases where there is no soma distance data
+  df           <- df[complete.cases(df$RAYBURST_VOLUME), ]              # " " spine volume data
+  df           <- df[complete.cases(df$MAX_DTS), ]                      # " " length data
+  df$file      <- fileName                                              # add a column identifying the file name for each spine (row)
+  ## Current NeuronStudio data should only have 1 section/dendrite; however, it is possible
+  ## to have multiple if they are not linked together. The next 6 lines calculate total
+  ## length when there is more than 1 section
+  
+  total_length <- df %>%                                                # dplyr package: define total length of dendritic segment
+    group_by(SECTION_NUMBER)  %>%                                       # group data frame by section number
+    summarise(section_length = max(SECTION_LENGTH)) %>%                 # find the length of each section
+    ungroup() %>%                                                       # ungroup previous grouping by section number
+    summarise(total_length = sum(section_length)) %>%                   # sum section length of each section
+    as.double()                                                         # allows for 64 bit storage (increase precision with more significant digits)
+  
+
+  df$RAYBURST_VOLUME[df$RAYBURST_VOLUME == 0]      <- NA       #if no data, make NA
+  df$HEAD_DIAMETER[df$HEAD_DIAMETER == 0]          <- NA
+  df$MAX_DTS[df$MAX_DTS == 0]                      <- NA
+  
+  df$HEAD_DIAMETER[df$HEAD_DIAMETER > 1.5]                        <- NA      #mushroom HD cutoff
+  df$MAX_DTS[df$MAX_DTS > 3.00]                                   <- NA      #mushroom/thin length cutoff
+  df$MAX_DTS[df$TYPE == "stubby"       & df$MAX_DTS > 0.80]       <- NA      #stubby length cutoff
+  df$HEAD_DIAMETER[df$TYPE == "stubby" & df$HEAD_DIAMETER > 0.97] <- NA      #stubby HD cutoff
+  df$HEAD_DIAMETER[df$TYPE == "thin"   & df$HEAD_DIAMETER > 1.22] <- NA      #thin HD cutoff
+
+  df$RAYBURST_VOLUME[df$TYPE == "mushroom" & df$RAYBURST_VOLUME > 0.60] <- NA   #cut-offs from 2SD about mean in Harris et al. 1992 paper, pyramidal CA1 neurons
+  df$RAYBURST_VOLUME[df$TYPE == "thin"     & df$RAYBURST_VOLUME > 0.10] <- NA   
+  df$RAYBURST_VOLUME[df$TYPE == "stubby"   & df$RAYBURST_VOLUME > 0.05] <- NA
+
+  total_spines     <- count(df$ID[!is.na(df$ID)])                       # number of rows = number of spines
+  density_overall  <- total_spines/total_length                         # calculate density of dendritic segment
+  density_mushroom <- sum(as.numeric(df$TYPE == "mushroom"),            # find total number of mushroom spines and
+                          na.rm = TRUE)/total_length                    # divide by total length to find mushroom density
+  density_thin     <- sum(as.numeric(df$TYPE == "thin"),                # as above for thin spines
+                          na.rm = TRUE)/total_length                    # ""
+  density_stubby   <- sum(as.numeric(df$TYPE == "stubby"),              # as above for stubby spines
+                          na.rm = TRUE)/total_length                    # ""
+  
+  
+  #add data to running list in data_master file
+  df$density_overall  <- density_overall # add these data to df
+  df$density_mushroom <- density_mushroom
+  df$density_thin     <- density_thin
+  df$density_stubby   <- density_stubby
+  df$animal_num       <- lapply(df$file, function(x) unlist(strsplit(x, "-"))[2])             # pulls animal ID out of file name, with retro label attached
+  df$retro_label      <- substring(df$animal_num, nchar(df$animal_num), nchar(df$animal_num)) # adds column reported retro-label
+  df$retro_label      <- replace(df$retro_label, df$retro_label == "L", 1)                    # adds labeled for L and not labeled for N
+  df$retro_label      <- replace(df$retro_label, df$retro_label == "N", 0)
+  df$retro_label      <- as.numeric(df$retro_label)
+  df$location         <- lapply(df$file, function(x) unlist(strsplit(x, "-"))[3])
+  df$location         <- as.character(df$location)
+  df$animal_num       <- lapply(df$animal_num, function(x) unlist(strsplit(x, "L"))[1])       # removes L or N from animal number
+  df$animal_num       <- lapply(df$animal_num, function(x) unlist(strsplit(x, "N"))[1])       # removes L or N from animal number
+  df$animal_num       <- as.numeric(df$animal_num)
+  
+  
+  
+  df$density_overall[df$density_overall > 4.00]   <- NA
+  df$density_mushroom[df$density_mushroom > 1.00] <- NA
+  df$density_thin[df$density_thin > 4.00]         <- NA
+  df$density_stubby[df$density_stubby > 1.00]     <- NA
+  
+  
+
+  
+  
+  #df[!complete.cases(df),]                         <- NA
+  
+    data_all            <- rbind(data_all, df)                                                  # adds df as next row in the data_master file
+  
+}                                                                                             # end of file for-loop 
+
+
+rat_ID <- data_all$animal_num[1]
+data_all$MAX_DTS<- as.numeric(data_all$MAX_DTS)
+data_all$RAYBURST_VOLUME <- as.numeric(data_all$RAYBURST_VOLUME)
+data_all$HEAD_DIAMETER <- as.numeric(data_all$HEAD_DIAMETER)
+#A_A
+
+data_mast_A_A <- data_all %>%
+  group_by(file) %>%
+  summarise(den_ov_A_A   = mean(density_overall, na.rm = TRUE),
+            den_mush_A_A = mean(density_mushroom, na.rm = TRUE),
+            den_thin_A_A = mean(density_thin, na.rm = TRUE),
+            den_stub_A_A = mean(density_stubby, na.rm = TRUE),
+            vol_ov_A_A   = mean(RAYBURST_VOLUME, na.rm = TRUE),
+            len_ov_A_A   = mean(MAX_DTS, na.rm = TRUE),
+            hd_ov_A_A    = mean(HEAD_DIAMETER, na.rm = TRUE)) %>%
+  ungroup()
+
+data_mast_A_A <- data_mast_A_A %>%
+  summarise(den_ov   = mean(den_ov_A_A, na.rm = TRUE),
+            den_mush = mean(den_mush_A_A, na.rm = TRUE),
+            den_thin = mean(den_thin_A_A, na.rm = TRUE),
+            den_stub = mean(den_stub_A_A, na.rm = TRUE),
+            vol_ov   = mean(vol_ov_A_A, na.rm = TRUE),
+            len_ov   = mean(len_ov_A_A, na.rm = TRUE),
+            hd_ov    = mean(hd_ov_A_A, na.rm = TRUE)) %>%
+  ungroup()
+
+data_TYPE_A_A <- data_all %>%
+  group_by(file, TYPE) %>%
+  summarise(vol_TYPE_A_A = mean(RAYBURST_VOLUME, na.rm = TRUE),
+            len_TYPE_A_A = mean(MAX_DTS, na.rm = TRUE),
+            hd_TYPE_A_A  = mean(HEAD_DIAMETER, na.rm = TRUE)) %>%
+  ungroup()
+
+data_TYPE_A_A <- data_TYPE_A_A %>%
+  group_by(TYPE)               %>%
+  summarise(vol_TYPE = mean(vol_TYPE_A_A, na.rm = TRUE),
+            len_TYPE = mean(len_TYPE_A_A, na.rm = TRUE),
+            hd_TYPE  = mean(hd_TYPE_A_A, na.rm = TRUE))  %>%
+  ungroup()
+
+data_mush_A_A <- subset(data_TYPE_A_A, data_TYPE_A_A$TYPE == 'mushroom', select = 2:4)
+data_thin_A_A <- subset(data_TYPE_A_A, data_TYPE_A_A$TYPE == 'thin',     select = 2:4)
+data_stub_A_A <- subset(data_TYPE_A_A, data_TYPE_A_A$TYPE == 'stubby',   select = 2:4)
+
+label_location <- c('A_A')
+
+data_mast_A_A <- cbind.fill(label_location, 
+                            data_mast_A_A, 
+                            data_mush_A_A, 
+                            data_thin_A_A, 
+                            data_stub_A_A, 
+                            fill = 0)
+
+
+#N_A
+
+data_mast_NL_A <- data_all %>%
+  group_by(file, retro_label)%>%
+  summarise(den_ov_NL_A   = mean(density_overall, na.rm = TRUE),
+            den_mush_NL_A = mean(density_mushroom, na.rm = TRUE),
+            den_thin_NL_A = mean(density_thin, na.rm = TRUE),
+            den_stub_NL_A = mean(density_stubby, na.rm = TRUE),
+            vol_ov_NL_A   = mean(RAYBURST_VOLUME, na.rm = TRUE),
+            len_ov_NL_A   = mean(MAX_DTS, na.rm = TRUE),
+            hd_ov_NL_A    = mean(HEAD_DIAMETER, na.rm = TRUE))   %>%
+  ungroup()
+
+data_mast_NL_A <- data_mast_NL_A %>%
+  group_by(retro_label) %>%
+  summarise(den_ov   = mean(den_ov_NL_A, na.rm = TRUE),
+            den_mush = mean(den_mush_NL_A, na.rm = TRUE),
+            den_thin = mean(den_thin_NL_A, na.rm = TRUE),
+            den_stub = mean(den_stub_NL_A, na.rm = TRUE),
+            vol_ov   = mean(vol_ov_NL_A, na.rm = TRUE),
+            len_ov   = mean(len_ov_NL_A, na.rm = TRUE),
+            hd_ov    = mean(hd_ov_NL_A, na.rm = TRUE))    %>%
+  ungroup()
+
+data_mast_N_A <- subset(data_mast_NL_A, data_mast_NL_A$retro_label == 0, select = 2:8)
+
+data_TYPE_NL_A <- data_all %>%
+  group_by(file, retro_label, TYPE) %>%
+  summarise(vol_TYPE_NL_A = mean(RAYBURST_VOLUME, na.rm = TRUE),
+            len_TYPE_NL_A = mean(MAX_DTS, na.rm = TRUE),
+            hd_TYPE_NL_A  = mean(HEAD_DIAMETER, na.rm = TRUE))   %>%
+  ungroup()
+
+data_TYPE_NL_A <- data_TYPE_NL_A %>%
+  group_by(retro_label, TYPE) %>%
+  summarise(vol_TYPE = mean(vol_TYPE_NL_A, na.rm = TRUE),
+            len_TYPE = mean(len_TYPE_NL_A, na.rm = TRUE),
+            hd_TYPE = mean(hd_TYPE_NL_A, na.rm = TRUE))  %>%
+  ungroup()
+
+
+data_mush_N_A <- subset(data_TYPE_NL_A, data_TYPE_NL_A$retro_label == 0 & data_TYPE_NL_A$TYPE == 'mushroom', select= 3:5)
+data_thin_N_A <- subset(data_TYPE_NL_A, data_TYPE_NL_A$retro_label == 0 & data_TYPE_NL_A$TYPE == 'thin',     select= 3:5)
+data_stub_N_A <- subset(data_TYPE_NL_A, data_TYPE_NL_A$retro_label == 0 & data_TYPE_NL_A$TYPE == 'stubby',   select= 3:5)
+
+label_location <- c('N_A')
+
+data_mast_N_A <- cbind.fill(label_location, 
+                            data_mast_N_A, 
+                            data_mush_N_A, 
+                            data_thin_N_A, 
+                            data_stub_N_A, 
+                            fill = 0)
+
+
+#L_A
+
+data_mast_L_A <- subset(data_mast_NL_A, data_mast_NL_A$retro_label == 1,  select = 2:8)
+data_mush_L_A <- subset(data_TYPE_NL_A, data_TYPE_NL_A$retro_label == 1 & data_TYPE_NL_A$TYPE == 'mushroom', select = 3:5)
+data_thin_L_A <- subset(data_TYPE_NL_A, data_TYPE_NL_A$retro_label == 1 & data_TYPE_NL_A$TYPE == 'thin',     select = 3:5)
+data_stub_L_A <- subset(data_TYPE_NL_A, data_TYPE_NL_A$retro_label == 1 & data_TYPE_NL_A$TYPE == 'stubby',   select = 3:5)
+
+
+label_location <- c('L_A')
+
+data_mast_L_A <- cbind.fill(label_location, 
+                            data_mast_L_A, 
+                            data_mush_L_A, 
+                            data_thin_L_A, 
+                            data_stub_L_A, 
+                            fill = 0)
+
+# for A_b
+
+data_mast_A_bpd <- data_all %>%
+  group_by(file, location) %>%
+  summarise(den_ov_A_b   = mean(density_overall, na.rm = TRUE),
+            den_mush_A_b = mean(density_mushroom, na.rm = TRUE),
+            den_thin_A_b = mean(density_thin, na.rm = TRUE),
+            den_stub_A_b = mean(density_stubby, na.rm = TRUE),
+            vol_ov_A_b   = mean(RAYBURST_VOLUME, na.rm = TRUE),
+            len_ov_A_b   = mean(MAX_DTS, na.rm = TRUE),
+            hd_ov_A_b    = mean(HEAD_DIAMETER, na.rm = TRUE)) %>%
+  ungroup()
+
+data_mast_A_bpd <- data_mast_A_bpd %>%
+  group_by(location) %>%
+  summarise(den_ov   = mean(den_ov_A_b, na.rm = TRUE),
+            den_mush = mean(den_mush_A_b, na.rm = TRUE),
+            den_thin = mean(den_thin_A_b, na.rm = TRUE),
+            den_stub = mean(den_stub_A_b, na.rm = TRUE),
+            vol_ov   = mean(vol_ov_A_b, na.rm = TRUE),
+            len_ov   = mean(len_ov_A_b, na.rm = TRUE),
+            hd_ov    = mean(hd_ov_A_b, na.rm = TRUE)) %>%
+  ungroup()
+
+data_mast_A_b <- subset(data_mast_A_bpd, data_mast_A_bpd$location == 'b', select = 2:8)
+
+
+data_TYPE_A_bpd <- data_all %>%
+  group_by(file, location, TYPE) %>%
+  summarise(vol_TYPE_A_b = mean(RAYBURST_VOLUME, na.rm = TRUE),
+            len_TYPE_A_b = mean(MAX_DTS, na.rm = TRUE),
+            hd_TYPE_A_b  = mean(HEAD_DIAMETER, na.rm = TRUE)) %>%
+  ungroup()
+
+data_TYPE_A_bpd <- data_TYPE_A_bpd %>%
+  group_by(location, TYPE) %>%
+  summarise(vol_TYPE = mean(vol_TYPE_A_b, na.rm = TRUE),
+            len_TYPE = mean(len_TYPE_A_b, na.rm = TRUE),
+            hd_TYPE  = mean(hd_TYPE_A_b, na.rm = TRUE)) %>%
+  ungroup()
+
+data_mush_A_b <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'b' & data_TYPE_A_bpd$TYPE == 'mushroom', select = 3:5)
+data_thin_A_b <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'b' & data_TYPE_A_bpd$TYPE == 'thin',     select = 3:5)
+data_stub_A_b <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'b' & data_TYPE_A_bpd$TYPE == 'stubby',   select = 3:5)
+
+label_location <- c('A_b')
+
+data_mast_A_b <- cbind.fill(label_location, 
+                            data_mast_A_b, 
+                            data_mush_A_b, 
+                            data_thin_A_b, 
+                            data_stub_A_b,
+                            fill = 0)
+
+#A_p
+
+data_mast_A_p <- subset(data_mast_A_bpd, data_mast_A_bpd$location == 'p', select = 2:8)
+data_mush_A_p <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'p' & data_TYPE_A_bpd$TYPE == 'mushroom', select = 3:5)
+data_thin_A_p <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'p' & data_TYPE_A_bpd$TYPE == 'thin',     select = 3:5)
+data_stub_A_p <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'p' & data_TYPE_A_bpd$TYPE == 'stubby',   select = 3:5)
+
+label_location <- c('A_p')
+
+data_mast_A_p <- cbind.fill(label_location, 
+                            data_mast_A_p, 
+                            data_mush_A_p, 
+                            data_thin_A_p, 
+                            data_stub_A_p, 
+                            fill = 0)
+
+#A_d
+
+data_mast_A_d <- subset(data_mast_A_bpd, data_mast_A_bpd$location == 'd',  select = 2:8)
+data_mush_A_d <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'd' & data_TYPE_A_bpd$TYPE == 'mushroom', select = 3:5)
+data_thin_A_d <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'd' & data_TYPE_A_bpd$TYPE == 'thin',     select = 3:5)
+data_stub_A_d <- subset(data_TYPE_A_bpd, data_TYPE_A_bpd$location == 'd' & data_TYPE_A_bpd$TYPE == 'stubby',   select = 3:5)
+
+label_location <- c('A_d')
+
+data_mast_A_d <- cbind.fill(label_location, 
+                            data_mast_A_d, 
+                            data_mush_A_d, 
+                            data_thin_A_d, 
+                            data_stub_A_d, 
+                            fill = 0)
+
+#for N_b
+
+data_mast_NL_bpd <- data_all %>%
+  group_by(file, retro_label, location) %>%
+  summarise(den_ov_NL_bpd   = mean(density_overall, na.rm = TRUE),
+            den_mush_NL_bpd = mean(density_mushroom, na.rm = TRUE),
+            den_thin_NL_bpd = mean(density_thin, na.rm = TRUE),
+            den_stub_NL_bpd = mean(density_stubby, na.rm = TRUE),
+            vol_ov_NL_bpd   = mean(RAYBURST_VOLUME, na.rm = TRUE),
+            len_ov_NL_bpd   = mean(MAX_DTS, na.rm = TRUE),
+            hd_ov_NL_bpd    = mean(HEAD_DIAMETER, na.rm = TRUE)) %>%
+  ungroup()
+
+data_count_master <- data_mast_NL_bpd[,1:3]
+
+count_N_b <- data.frame(nrow(subset(data_count_master, retro_label == 0 & location =='b')))
+count_N_p <- data.frame(nrow(subset(data_count_master, retro_label == 0 & location =='p')))
+count_N_d <- data.frame(nrow(subset(data_count_master, retro_label == 0 & location =='d')))
+count_N_A <- sum(count_N_b, count_N_p, count_N_d)
+
+count_L_b <- data.frame(nrow(subset(data_count_master, retro_label == 1 & location =='b')))
+count_L_p <- data.frame(nrow(subset(data_count_master, retro_label == 1 & location =='p')))
+count_L_d <- data.frame(nrow(subset(data_count_master, retro_label == 1 & location =='d')))
+count_L_A <- sum(count_L_b, count_L_p, count_L_d)
+
+count_A_A <- sum(count_N_A, count_L_A)
+count_A_b <- sum(count_N_b, count_L_b)
+count_A_p <- sum(count_N_p, count_L_p)
+count_A_d <- sum(count_N_d, count_L_d)
+
+
+
+data_count_master <- cbind.fill(count_A_A,
+                                count_A_b,
+                                count_A_p,
+                                count_A_d,
+                                count_N_A,
+                                count_N_b, 
+                                count_N_p, 
+                                count_N_d,
+                                count_L_A,
+                                count_L_b, 
+                                count_L_p, 
+                                count_L_d, 
+                                fill = 0)
+
+data_count_master            <- t(data_count_master)
+row.names(data_count_master) <- NULL
+
+
+data_mast_NL_bpd <- data_mast_NL_bpd %>%
+  group_by(retro_label, location) %>%
+  summarise(den_ov   = mean(den_ov_NL_bpd, na.rm = TRUE),
+            den_mush = mean(den_mush_NL_bpd, na.rm = TRUE),
+            den_thin = mean(den_thin_NL_bpd, na.rm = TRUE),
+            den_stub = mean(den_stub_NL_bpd, na.rm = TRUE),
+            vol_ov   = mean(vol_ov_NL_bpd, na.rm = TRUE),
+            len_ov   = mean(len_ov_NL_bpd, na.rm = TRUE),
+            hd_ov   = mean(hd_ov_NL_bpd, na.rm = TRUE)) %>%
+  ungroup()
+
+data_mast_N_b <- subset(data_mast_NL_bpd, data_mast_NL_bpd$retro_label == 0 & data_mast_NL_bpd$location == 'b', select = 3:9)
+
+data_TYPE_NL_bpd <- data_all %>%
+  group_by(file, retro_label, location, TYPE) %>%
+  summarise(vol_TYPE_NL_bpd = mean(RAYBURST_VOLUME, na.rm = TRUE),
+            len_TYPE_NL_bpd = mean(MAX_DTS, na.rm = TRUE),
+            hd_TYPE_NL_bpd  = mean(HEAD_DIAMETER, na.rm = TRUE)) %>%
+  ungroup()
+
+data_TYPE_NL_bpd <- data_TYPE_NL_bpd %>%
+  group_by(retro_label, location, TYPE) %>%
+  summarise(vol_TYPE = mean(vol_TYPE_NL_bpd, na.rm = TRUE),
+            len_TYPE = mean(len_TYPE_NL_bpd, na.rm = TRUE),
+            hd_TYPE  = mean(hd_TYPE_NL_bpd, na.rm = TRUE)) %>%
+  ungroup()
+
+data_mush_N_b <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'b' & 
+                          data_TYPE_NL_bpd$TYPE        == 'mushroom', 
+                        select = 4:6)
+
+data_thin_N_b <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'b' & 
+                          data_TYPE_NL_bpd$TYPE        == 'thin', 
+                        select = 4:6)
+
+data_stub_N_b <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'b' & 
+                          data_TYPE_NL_bpd$TYPE        == 'stubby', 
+                        select = 4:6)
+
+
+label_location <- c('N_b')
+
+data_mast_N_b <- cbind.fill(label_location, 
+                            data_mast_N_b, 
+                            data_mush_N_b,
+                            data_thin_N_b,
+                            data_stub_N_b, 
+                            fill = 0)
+
+#N_p
+
+data_mast_N_p <- subset(data_mast_NL_bpd, data_mast_NL_bpd$retro_label == 0 & data_mast_NL_bpd$location == 'p', select = 3:9)
+
+
+data_mush_N_p <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'p' & 
+                          data_TYPE_NL_bpd$TYPE        == 'mushroom', 
+                        select = 4:6)
+
+
+data_thin_N_p <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'p' & 
+                          data_TYPE_NL_bpd$TYPE        == 'thin', 
+                        select = 4:6)
+
+
+data_stub_N_p <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'p' & 
+                          data_TYPE_NL_bpd$TYPE        == 'stubby', 
+                        select = 4:6)
+
+
+label_location <- c('N_p')
+
+data_mast_N_p <- cbind.fill(label_location, 
+                            data_mast_N_p, 
+                            data_mush_N_p, 
+                            data_thin_N_p, 
+                            data_stub_N_p, 
+                            fill = 0)
+
+#N_d
+
+data_mast_N_d <- subset(data_mast_NL_bpd, 
+                        data_mast_NL_bpd$retro_label == 0 & 
+                          data_mast_NL_bpd$location    == 'd', 
+                        select = 3:9)
+
+data_mush_N_d <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'd' & 
+                          data_TYPE_NL_bpd$TYPE        == 'mushroom', 
+                        select = 4:6)
+
+data_thin_N_d <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'd' & 
+                          data_TYPE_NL_bpd$TYPE        == 'thin', 
+                        select = 4:6)
+
+data_stub_N_d <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 0 & 
+                          data_TYPE_NL_bpd$location    == 'd' & 
+                          data_TYPE_NL_bpd$TYPE        == 'stubby', 
+                        select = 4:6)
+
+label_location <- c('N_d')
+
+data_mast_N_d <- cbind.fill(label_location, 
+                            data_mast_N_d, 
+                            data_mush_N_d, 
+                            data_thin_N_d, 
+                            data_stub_N_d, 
+                            fill = 0)
+
+#L_b
+
+data_mast_L_b <- subset(data_mast_NL_bpd, 
+                        data_mast_NL_bpd$retro_label == 1 & 
+                          data_mast_NL_bpd$location    == 'b', 
+                        select = 3:9)
+
+
+data_mush_L_b <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'b' & 
+                          data_TYPE_NL_bpd$TYPE        == 'mushroom', 
+                        select = 4:6)
+
+
+data_thin_L_b <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'b' & 
+                          data_TYPE_NL_bpd$TYPE        == 'thin', 
+                        select = 4:6)
+
+
+data_stub_L_b <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'b' & 
+                          data_TYPE_NL_bpd$TYPE        == 'stubby', 
+                        select = 4:6)
+
+
+label_location <- c('L_b')
+data_mast_L_b <- cbind.fill(label_location, 
+                            data_mast_L_b, 
+                            data_mush_L_b, 
+                            data_thin_L_b, 
+                            data_stub_L_b, 
+                            fill = 0)
+
+#L_p
+
+data_mast_L_p <- subset(data_mast_NL_bpd, 
+                        data_mast_NL_bpd$retro_label == 1 & 
+                          data_mast_NL_bpd$location    == 'p', 
+                        select = 3:9)
+
+
+data_mush_L_p <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'p' & 
+                          data_TYPE_NL_bpd$TYPE        == 'mushroom', 
+                        select = 4:6)
+
+
+data_thin_L_p <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'p' & 
+                          data_TYPE_NL_bpd$TYPE        == 'thin', 
+                        select = 4:6)
+
+
+data_stub_L_p <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'p' & 
+                          data_TYPE_NL_bpd$TYPE        == 'stubby', 
+                        select = 4:6)
+
+
+
+label_location <- c('L_p')
+
+data_mast_L_p <- cbind.fill(label_location, 
+                            data_mast_L_p, 
+                            data_mush_L_p, 
+                            data_thin_L_p, 
+                            data_stub_L_p, 
+                            fill = 0)
+
+#L_d
+
+data_mast_L_d <- subset(data_mast_NL_bpd, 
+                        data_mast_NL_bpd$retro_label == 1 & 
+                          data_mast_NL_bpd$location    == 'd', 
+                        select = 3:9)
+
+
+data_mush_L_d <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'd' & 
+                          data_TYPE_NL_bpd$TYPE        == 'mushroom', 
+                        select = 4:6)
+
+
+data_thin_L_d <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'd' & 
+                          data_TYPE_NL_bpd$TYPE        == 'thin', 
+                        select = 4:6)
+
+
+data_stub_L_d <- subset(data_TYPE_NL_bpd, 
+                        data_TYPE_NL_bpd$retro_label == 1 & 
+                          data_TYPE_NL_bpd$location    == 'd' & 
+                          data_TYPE_NL_bpd$TYPE        == 'stubby', 
+                        select = 4:6)
+
+
+label_location <- c('L_d')
+
+data_mast_L_d <- cbind.fill(label_location, 
+                            data_mast_L_d, 
+                            data_mush_L_d, 
+                            data_thin_L_d, 
+                            data_stub_L_d, 
+                            fill = 0)
+
+
+
+
+data_master <- rbind(data_mast_A_A, 
+                     data_mast_A_b, 
+                     data_mast_A_p, 
+                     data_mast_A_d, 
+                     data_mast_N_A, 
+                     data_mast_N_b, 
+                     data_mast_N_p, 
+                     data_mast_N_d, 
+                     data_mast_L_A, 
+                     data_mast_L_b, 
+                     data_mast_L_p, 
+                     data_mast_L_d)
+
+data_master <- cbind.fill (rat_ID, 
+                           data_count_master, 
+                           data_master)
+
+colnames(data_master) <- c('rat_ID', 
+                           'count',
+                           'group',
+                           'den_ov', 
+                           'den_mush', 
+                           'den_thin', 
+                           'den_stub', 
+                           'vol_ov', 
+                           'len_ov', 
+                           'hd_ov', 
+                           'vol_mush', 
+                           'len_mush', 
+                           'hd_mush', 
+                           'vol_thin', 
+                           'len_thin', 
+                           'hd_thin',
+                           'vol_stub', 
+                           'len_stub', 
+                           'hd_stub')
+
+
+
+savePath_all <- paste0(saveFolder, "/data_all") # creates the path where files can be saved
+setwd(savePath_all)
+write.csv(data_all, paste0(rat_ID, "_data_all.csv"))
+#setwd(fileLoc) # sets working directory to the path created above
+#dir.create(savePath, "data_all") #create a directory to create a file inside
+setwd(saveFolder)
+savePath_master <- paste0(saveFolder, "/data_master")
+setwd(savePath_master)
+write.csv(data_master, paste0(rat_ID, "_data_master.csv"))
+
+}
+
+
+winDialog("ok", "Code complete")