Skip to content
Draft
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.Rproj.user
docs
common_data
.Rhistory
.DS_Store
.DS_Store
8 changes: 4 additions & 4 deletions R/ipr2viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ getTopAccByLinDomArch <- function(infile_full,
cln_domarch <- cln %>% select(domarch_cols)
col_counts <- colSums(is.na(cln_domarch))
DA_sym <- sym(names(which.min(col_counts)))
showNotification(paste0("Selecting representatives by unique ", DA_sym, " and lineage combinations"))
# showNotification(paste0("Selecting representatives by unique ", DA_sym, " and lineage combinations"))
## Group by Lineage, DomArch and reverse sort by group counts
grouped <- cln %>%
group_by({{ DA_sym }}, {{ lin_sym }}) %>%
arrange(desc(PcPositive)) %>%
summarise(count = n(), AccNum = dplyr::first(AccNum)) %>%
arrange(-count) %>%
filter({{ lin_sym }} != "" && {{ DA_sym }} != "")
filter({{ lin_sym }} != "" & {{ DA_sym }} != "")
top_acc <- grouped$AccNum[1:n]
top_acc <- na.omit(top_acc)
return(top_acc)
Expand Down Expand Up @@ -180,7 +180,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = MolEvolvR::iprscan_cols)
ipr_out <- ipr_out %>% filter(.data$Name %in% accessions)
analysis_cols <- paste0("DomArch.", analysis)
infile_full <- infile_full %>% select(.data$analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
infile_full <- infile_full %>% select(analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
## To filter by Analysis
analysis <- paste(analysis, collapse = "|")
## @SAM: This can't be set in stone since the analysis may change!
Expand Down Expand Up @@ -212,7 +212,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
analysis_labeler <- analyses %>%
pivot_wider(names_from = .data$Analysis, values_from = .data$Analysis)

lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
system.file("common_data", "cln_lookup_tbl.tsv", package = "MolEvolvR", mustWork = TRUE)
lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)

lookup_tbl <- lookup_tbl %>% select(-.data$ShortName) # Already has ShortName -- Just needs SignDesc
Expand Down
4 changes: 2 additions & 2 deletions R/networks_domarch.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ createDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
visOptions(highlightNearest = TRUE)
},
error = function(e) {
showNotification(toString(e))
# showNotification(toString(e))
vis_g <- "error"
},
finally = {
Expand Down Expand Up @@ -364,4 +364,4 @@ createBinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_inter
"auto" = visIgraphLayout(vg, "layout.auto")
)
vg
}
}
14 changes: 7 additions & 7 deletions R/pre-msa-tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
id = accessions_partitioned[[x]],
db = "protein",
rettype = "fasta",
api_key = Sys.getenv("ENTREZ_API_KEY")
# api_key = Sys.getenv("ENTREZ_API_KEY")
)
)
})
Expand Down Expand Up @@ -809,9 +809,9 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
abort("Error: The FASTA file does not exist: ", fasta_file)
}

if (file_ext(fasta_file) != "fasta" && file_ext(fasta_file) != "fa") {
abort("Error: The specified file is not a valid FASTA file: ", fasta_file)
}
# if (file_ext(fasta_file) != "fasta" && file_ext(fasta_file) != "fa") {
# abort("Error: The specified file is not a valid FASTA file: ", fasta_file)
# }
fasta <- readAAStringSet(fasta_file)

aligned <- switch(tool,
Expand Down Expand Up @@ -869,11 +869,11 @@ writeMSA_AA2FA <- function(alignment, outpath) {
abort("Error: The output directory does not exist: ", outdir)
}

l <- length(rownames(alignment))
l <- length(names(unmasked((alignment))))
fasta <- ""
for (i in 1:l)
{
fasta <- paste0(fasta, paste(">", rownames(alignment)[i]), "\n")
fasta <- paste0(fasta, paste(">", names(unmasked((alignment)))[i]), "\n")
seq <- toString(unmasked(alignment)[[i]])
fasta <- paste0(fasta, seq, "\n")
}
Expand Down Expand Up @@ -955,4 +955,4 @@ getAccNumFromFA <- function(fasta_file) {
# cfile <- read_delim("data/alignments/pspc.gismo.aln", delim=" ")
# cfile <- as.data.frame(map(cfile,function(x) gsub("\\s+", "",x)))
# colnames(cfile) <- c("AccNum", "Alignment")
# }
# }
8 changes: 4 additions & 4 deletions R/summarize.R
Original file line number Diff line number Diff line change
Expand Up @@ -690,18 +690,18 @@ totalGenContextOrDomArchCounts <- function(prot, column = "DomArch", lineage_col
abort("Error: 'digits' must be a non-negative integer.")
}

column <- sym(column)
# column <- sym(column)

prot <- select(prot, {{ column }}, {{ lineage_col }}) %>%
filter(!is.na({{ column }}) & !is.na({{ lineage_col }})) %>%
filter({{ column }} != "")

prot <- summarizeByLineage(prot, column, by = lineage_col, query = "all")
col_count <- prot %>%
group_by({{ column }}) %>%
group_by(!!sym(column)) %>%
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@the-mayer, do we prefer curly curly or is this switch going to be consistent with the rest of our code? @awasyn, is there any particular reason for this switch? Did it ({{}}) throw a warning or error?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it threw an error. eg column <- "DomArch" is passed as string but earlier in the code there was a column <- sym(column) which change "DomArch" to a symbol i.e "DomArch" and the code tries to find column name in dataframe as "DomArch" which doesn't exist.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. {{column}} should work similar to sym(column). So not sure what's going on. @the-mayer?

summarise(totalcount = sum(count))

total <- left_join(prot, col_count, by = as_string(column))
total <- left_join(prot, col_count, by = column)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@the-mayer, make sure this switch causes no errors?


sum_count <- sum(total$count)
total <- total %>%
Expand Down Expand Up @@ -901,4 +901,4 @@ findParalogs <- function(prot) {
# cat("Word counts for broken up domains from DAs and DAs from GCs.
# \nFor e.g.:
# DA.doms.wc <- query.sub$DA.doms %>%
# words2WordCounts()")
# words2WordCounts()")
Loading