Skip to content
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions R/ipr2viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ getTopAccByLinDomArch <- function(infile_full,
cln_domarch <- cln %>% select(domarch_cols)
col_counts <- colSums(is.na(cln_domarch))
DA_sym <- sym(names(which.min(col_counts)))
showNotification(paste0("Selecting representatives by unique ", DA_sym, " and lineage combinations"))
# showNotification(paste0("Selecting representatives by unique ", DA_sym, " and lineage combinations"))
## Group by Lineage, DomArch and reverse sort by group counts
grouped <- cln %>%
group_by({{ DA_sym }}, {{ lin_sym }}) %>%
arrange(desc(PcPositive)) %>%
summarise(count = n(), AccNum = dplyr::first(AccNum)) %>%
arrange(-count) %>%
filter({{ lin_sym }} != "" && {{ DA_sym }} != "")
filter({{ lin_sym }} != "" & {{ DA_sym }} != "")
top_acc <- grouped$AccNum[1:n]
top_acc <- na.omit(top_acc)
return(top_acc)
Expand Down Expand Up @@ -180,7 +180,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = MolEvolvR::iprscan_cols)
ipr_out <- ipr_out %>% filter(.data$Name %in% accessions)
analysis_cols <- paste0("DomArch.", analysis)
infile_full <- infile_full %>% select(.data$analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
infile_full <- infile_full %>% select(analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
## To filter by Analysis
analysis <- paste(analysis, collapse = "|")
## @SAM: This can't be set in stone since the analysis may change!
Expand Down Expand Up @@ -212,7 +212,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
analysis_labeler <- analyses %>%
pivot_wider(names_from = .data$Analysis, values_from = .data$Analysis)

lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
lookup_tbl_path <- "~/awasyn/new_trial/cln_lookup_tbl.tsv"
lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)

lookup_tbl <- lookup_tbl %>% select(-.data$ShortName) # Already has ShortName -- Just needs SignDesc
Expand Down
20 changes: 10 additions & 10 deletions R/networks_domarch.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@
#' A network of domains is returned based on shared domain architectures.
#'
#' @param prot A data frame that contains the column 'DomArch'.
#' @param column Name of column containing Domain architecture from which nodes
#' @param column Name of column containing Domain architecture from which nodes
#' and edges are generated.
#' @param domains_of_interest Character vector specifying domains of interest.
#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
#' total counts if cutoff_type is "Total Count".
#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
#' Lineage.
#' @param layout Character. Layout type to be used for the network. Options are:
#' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
#' @param query_color Character. Color to represent the queried domain in the
#' @param query_color Character. Color to represent the queried domain in the
#' network.
#'
#' @importFrom dplyr across add_row all_of distinct filter mutate pull select
Expand Down Expand Up @@ -211,7 +211,7 @@ createDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
visOptions(highlightNearest = TRUE)
},
error = function(e) {
showNotification(toString(e))
# showNotification(toString(e))
vis_g <- "error"
},
finally = {
Expand All @@ -231,18 +231,18 @@ createDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
#'
#'
#' @param prot A data frame that contains the column 'DomArch'.
#' @param column Name of column containing Domain architecture from which nodes
#' @param column Name of column containing Domain architecture from which nodes
#' and edges are generated.
#' @param domains_of_interest Character vector specifying the domains of interest.
#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
#' total counts if cutoff_type is "Total Count".
#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
#' Lineage.
#' @param layout Character. Layout type to be used for the network. Options are:
#' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
#' @param query_color Color that the nodes of the domains in the
#' @param query_color Color that the nodes of the domains in the
#' domains_of_interest vector are colored
#' @param partner_color Color that the nodes that are not part of the
#' @param partner_color Color that the nodes that are not part of the
#' domains_of_interest vector are colored
#' @param border_color Color for the borders of the nodes.
#' @param IsDirected Is the network directed? Set to false to eliminate arrows
Expand Down
70 changes: 35 additions & 35 deletions R/pre-msa-tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
#' @param y Delimitter. Default is space (" ").
#'
#' @importFrom rlang abort
#'
#'
#' @return A character vector in title case.
#' @export
#'
Expand Down Expand Up @@ -112,21 +112,21 @@ addLeaves2Alignment <- function(aln_file = "",
lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
# lin_file="data/rawdata_tsv/PspA.txt",
reduced = FALSE) {

#Check if the alignment file is provided and exists
if (nchar(aln_file) == 0) {
abort("Error: Alignment file path must be provided.")
}

if (!file.exists(aln_file)) {
abort(paste("Error: The alignment file '", aln_file, "' does not exist."))
}

# Check if the lineage file exists
if (!file.exists(lin_file)) {
abort(paste("Error: The lineage file '", lin_file, "' does not exist."))
}

# Check that the 'reduced' parameter is logical
if (!is.logical(reduced) || length(reduced) != 1) {
abort("Error: 'reduced' must be a single logical value (TRUE or FALSE).")
Expand Down Expand Up @@ -249,15 +249,15 @@ addName <- function(data,
if (!is.data.frame(data)) {
abort("Error: The input 'data' must be a data frame")
}

# Check that the specified columns exist in the data
required_cols <- c(accnum_col, spec_col, lin_col)
missing_cols <- setdiff(required_cols, names(data))
if (length(missing_cols) > 0) {
abort(paste("Error: The following columns are missing from the data:",
abort(paste("Error: The following columns are missing from the data:",
paste(missing_cols, collapse = ", ")))
}

cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp")
split_data <- data %>%
separate(
Expand Down Expand Up @@ -347,16 +347,16 @@ convertAlignment2FA <- function(aln_file = "",
if (nchar(aln_file) == 0) {
abort("Error: Alignment file path must be provided.")
}

if (!file.exists(aln_file)) {
abort(paste("Error: The alignment file '", aln_file, "' does not exist."))
}

# Check if the lineage file exists
if (!file.exists(lin_file)) {
abort(paste("Error: The lineage file '", lin_file, "' does not exist."))
}

# Check that the 'reduced' parameter is logical
if (!is.logical(reduced) || length(reduced) != 1) {
abort("Error: 'reduced' must be a single logical value (TRUE or FALSE).")
Expand Down Expand Up @@ -424,14 +424,14 @@ mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
if (!is.data.frame(acc2name)) {
abort("Error: acc2name must be a data frame.")
}

# Check if the specified columns exist in the data frame
if (!(acc_col %in% colnames(acc2name))) {
abort("Error: The specified acc_col '", acc_col, "' does not exist in
abort("Error: The specified acc_col '", acc_col, "' does not exist in
acc2name.")
}
if (!(name_col %in% colnames(acc2name))) {
abort("Error: The specified name_col '", name_col, "' does not exist in
abort("Error: The specified name_col '", name_col, "' does not exist in
acc2name.")
}

Expand Down Expand Up @@ -475,7 +475,7 @@ rename_fasta <- function(fa_path, outpath,
abort("Error: The input FASTA file does not exist at the specified
path: ", fa_path)
}

# Check if the output path is writable
outdir <- dirname(outpath)
if (!dir.exists(outdir)) {
Expand Down Expand Up @@ -541,20 +541,20 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
reduced = F) {
# Check if the alignment path exists
if (!dir.exists(aln_path)) {
abort("Error: The alignment directory does not exist at the specified
abort("Error: The alignment directory does not exist at the specified
path: ", aln_path)
}

# Check if the output path exists; if not, attempt to create it
if (!dir.exists(fa_outpath)) {
dir.create(fa_outpath, recursive = TRUE)
message("Note: The output directory did not exist and has been created: ",
message("Note: The output directory did not exist and has been created: ",
fa_outpath)
}

# Check if the linear file exists
if (!file.exists(lin_file)) {
abort("Error: The linear file does not exist at the specified path: ",
abort("Error: The linear file does not exist at the specified path: ",
lin_file)
}
# library(here)
Expand Down Expand Up @@ -626,7 +626,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
if (!is.character(accessions) || length(accessions) == 0) {
abort("Error: 'accessions' must be a non-empty character vector.")
}

if (!dir.exists(dirname(outpath))) {
abort("Error: The output directory does not exist: ", dirname(outpath))
}
Expand Down Expand Up @@ -676,7 +676,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
id = accessions_partitioned[[x]],
db = "protein",
rettype = "fasta",
api_key = Sys.getenv("ENTREZ_API_KEY")
#api_key = Sys.getenv("ENTREZ_API_KEY")
)
)
})
Expand Down Expand Up @@ -732,21 +732,21 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
createRepresentativeAccNum <- function(prot_data,
reduced = "Lineage",
accnum_col = "AccNum") {

# Validate input
if (!is.data.frame(prot_data)) {
abort("Error: 'prot_data' must be a data frame.")
}

# Check if the reduced column exists in prot_data
if (!(reduced %in% colnames(prot_data))) {
abort("Error: The specified reduced column '", reduced, "' does not
abort("Error: The specified reduced column '", reduced, "' does not
exist in the data frame.")
}

# Check if the accnum_col exists in prot_data
if (!(accnum_col %in% colnames(prot_data))) {
abort("Error: The specified accession number column '", accnum_col, "'
abort("Error: The specified accession number column '", accnum_col, "'
does not exist in the data frame.")
}
# Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
Expand Down Expand Up @@ -808,10 +808,10 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
if (!file.exists(fasta_file)) {
abort("Error: The FASTA file does not exist: ", fasta_file)
}
if (file_ext(fasta_file) != "fasta" && file_ext(fasta_file) != "fa") {
abort("Error: The specified file is not a valid FASTA file: ", fasta_file)
}

# if (file_ext(fasta_file) != "fasta" && file_ext(fasta_file) != "fa") {
# abort("Error: The specified file is not a valid FASTA file: ", fasta_file)
# }
fasta <- readAAStringSet(fasta_file)

aligned <- switch(tool,
Expand Down Expand Up @@ -857,23 +857,23 @@ writeMSA_AA2FA <- function(alignment, outpath) {
if (!inherits(alignment, "AAMultipleAlignment")) {
abort("Error: The alignment must be of type 'AAMultipleAlignment'.")
}

# Check the output path is a character string
if (!is.character(outpath) || nchar(outpath) == 0) {
abort("Error: Invalid output path specified.")
}

# Check if the output directory exists
outdir <- dirname(outpath)
if (!dir.exists(outdir)) {
abort("Error: The output directory does not exist: ", outdir)
}

l <- length(rownames(alignment))
l <- length(names(unmasked(alignment)))
fasta <- ""
for (i in 1:l)
{
fasta <- paste0(fasta, paste(">", rownames(alignment)[i]), "\n")
fasta <- paste0(fasta, paste(">", names(unmasked(alignment)[i])), "\n")
seq <- toString(unmasked(alignment)[[i]])
fasta <- paste0(fasta, seq, "\n")
}
Expand Down
Loading