@@ -64,7 +64,7 @@ We have detailed the various steps in a differential expression analysis workflo
6464Load data and metadata
6565
6666``` {r}
67- data <- read_table("../Data/Mov10_counts_traditional.txt ")
67+ data <- read_table("../Data/Vampirium_counts_traditional.tsv ")
6868
6969meta <- read_csv("../Data/samplesheet.csv")
7070```
@@ -73,14 +73,14 @@ Check that the row names of the metadata equal the column names of the **raw cou
7373
7474``` {r}
7575### Check that sample names match in both files
76- all(colnames(data)[-1 ] %in% meta$sample)
77- all(colnames(data)[-1 ] == meta$sample)
76+ all(colnames(data)[-c(1,2) ] %in% meta$sample)
77+ all(colnames(data)[-c(1,2) ] == meta$sample)
7878```
7979
8080Reorder meta rows so it matches count data colnames
8181
8282``` {r}
83- reorder <- match(colnames(data)[-1 ],meta$sample)
83+ reorder <- match(colnames(data)[-c(1,2) ],meta$sample)
8484reorder
8585
8686meta <- meta[reorder,]
@@ -89,7 +89,7 @@ meta <- meta[reorder,]
8989Create DESeq2Dataset object
9090
9191``` {r}
92- dds <- DESeqDataSetFromMatrix(countData = data %>% column_to_rownames("GeneSymbol" ),
92+ dds <- DESeqDataSetFromMatrix(countData = data %>% select(-gene_name) %>% column_to_rownames("gene_id") %>% mutate_all(as.integer ),
9393 colData = meta %>% column_to_rownames("sample"),
9494 design = ~ condition)
9595```
@@ -164,7 +164,7 @@ Extract the rlog matrix from the object
164164
165165``` {r}
166166rld_mat <- assay(rld)
167- rld_cor <- cor(rld_mat) # Pearson correlation betweeen samples
167+ rld_cor <- cor(rld_mat) # Pearson correlation between samples
168168rld_dist <- as.matrix(dist(t(assay(rld)))) #distances are computed by rows, so we need to transponse the matrix
169169```
170170
@@ -221,7 +221,7 @@ Formal LFC calculation
221221
222222``` {r}
223223# Specify contrast for comparison of interest
224- contrast <- c("condition", "MOV10_overexpression ", "control ")
224+ contrast <- c("condition", "control ", "vampirium ")
225225
226226# Output results of Wald test for contrast
227227res <- results(dds,
@@ -237,7 +237,7 @@ resultsNames(dds)
237237
238238# Shrink the log2 fold changes to be more accurate
239239res <- lfcShrink(dds,
240- coef = "condition_MOV10_overexpression_vs_control ",
240+ coef = "condition_vampirium_vs_control ",
241241 type = "apeglm")
242242```
243243
@@ -269,13 +269,13 @@ lookup <- function(gene_name, tx2gene, dds){
269269 return(hits)
270270}
271271
272- lookup(gene_name = "MOV10 ", tx2gene = tx2gene, dds = dds)
272+ lookup(gene_name = "TSPAN7 ", tx2gene = tx2gene, dds = dds)
273273```
274274
275275Plot expression for single gene
276276
277277``` {r counts_plot}
278- plotCounts(dds, gene="ENSG00000155363 ", intgroup="condition")
278+ plotCounts(dds, gene="ENSG00000156298 ", intgroup="condition")
279279```
280280
281281Function to annotate all your gene results
@@ -285,7 +285,6 @@ res_tbl <- merge(res_tbl, tx2gene %>% select(-transcript_ID) %>% distinct(),
285285 by.x = "gene", by.y = "gene_ID", all.x = T)
286286
287287res_tbl
288-
289288```
290289
291290### MAplot
@@ -319,7 +318,7 @@ head(res_tbl)
319318ggplot(res_tbl, aes(x = log2FoldChange, y = -log10(padj))) +
320319 geom_point(aes(colour = threshold)) +
321320 geom_text_repel(aes(label = genelabels)) +
322- ggtitle("Mov10 overexpression ") +
321+ ggtitle("Vampirium vs Control ") +
323322 xlab("log2 fold change") +
324323 ylab("-log10 adjusted p-value") +
325324 theme(legend.position = "none",
@@ -350,7 +349,7 @@ pheatmap(norm_sig,
350349### Annotate with ` annotables `
351350
352351``` {r}
353- ids <- grch37 %>% dplyr::filter(ensgene %in% res_tbl$gene)
352+ ids <- grch38 %>% dplyr::filter(ensgene %in% res_tbl$gene)
354353res_ids <- inner_join(res_tbl, ids, by=c("gene"="ensgene"))
355354```
356355
0 commit comments