These exercises cover the sections on Plotting in R PlottingInR.
Use the example from the course ("data/DiffEx.csv"
), and
use padj < 0.01
and abs(log2FC) > 1
as thresholds to define DE genes
change the colour codes for the volcano plot to c("Up.DE","Down.DE","non.DE")
= c("#B2182B","#2166AC","grey")
and generate the volcano plot below
show the GeneSym for the top 10 DE genes based on padj
hints:
use read.csv()
to import DiffEx.csv to R
if you failed to import DiffEx.csv to R, please check whether you set up your work directory properly by using getwd()
and setwd()
# install.packages("ggplot2")
library(ggplot2)
# install.packages("ggrepel")
library(ggrepel)
# This is the work directory I am using and yours will be different
# setwd("/Volumes/bioinfomatics$/yfwang/BioinformaticsCore/GitHub/LMS_PlottingInR/course/")
# reading in table
df4ggplot <- read.csv("data/DiffEx.csv")
df4ggplot$DE.cat <- ifelse(df4ggplot$padj < 0.01 & abs(df4ggplot$log2FoldChange) > 1,
"TRUE", "FALSE"
)
df4ggplot$DE.cat <- factor(df4ggplot$DE.cat,
levels=c("TRUE","FALSE"))
table(df4ggplot$DE.cat)
##
## TRUE FALSE
## 1237 17120
df4ggplot$newDE.cat <- ifelse(
df4ggplot$DE.cat == "FALSE", "non.DE","all.DE"
)
table(df4ggplot$newDE.cat)
##
## all.DE non.DE
## 1237 17120
df4ggplot[df4ggplot$newDE.cat=="all.DE",]$newDE.cat <- ifelse(
df4ggplot[df4ggplot$newDE.cat=="all.DE",]$log2FoldChange > 0, "Up.DE","Down.DE"
)
table(df4ggplot$newDE.cat)
##
## Down.DE non.DE Up.DE
## 539 17120 698
df4ggplot <- df4ggplot[order(df4ggplot$padj),]
df4ggplot$newDE.cat <- factor(df4ggplot$newDE.cat,
levels = c("Up.DE","Down.DE","non.DE"))
vol_col <- ggplot(data=df4ggplot,
aes(x=log2FoldChange, y=-log10(pvalue),col=newDE.cat)) +
geom_point(size=0.75) +
xlab("log2FoldChange") + ylab("-log10 PValue")+
scale_color_manual(values=c("#B2182B","#2166AC","grey"))+
geom_label_repel(data=df4ggplot[c(1:10),],
mapping=aes(label = mgi_symbol),
box.padding = 0.5,max.overlaps = 20)+
theme_classic() + ggtitle("Volcano plot")
vol_col
MA plot visualizes the differences between measurements taken in two samples, by transforming the data onto M (log FC ratio) and A (mean average) scales, then plotting these values
Please use columns 'log2FoldChange'
and 'log2(baseMean)'
to create the MA plot
MA_col <- ggplot(data=df4ggplot,
aes(x=log2(baseMean), y=log2FoldChange,col=newDE.cat)) +
geom_point(size=0.75,alpha=0.5) +
xlab("log2(Averaged.Norm.Expression)") + ylab("log2(FC)")+
scale_color_manual(values=c("#B2182B","#2166AC","grey"))+
geom_label_repel(data=df4ggplot[c(1:10),],
mapping=aes(label = mgi_symbol),
box.padding = 0.5,max.overlaps = 20)+
theme_classic() + ggtitle("MA plot")
MA_col