These exercises cover the sections on Plotting in R PlottingInR.
Please load the dateset SubsetRlog_7D.csv from "data/SubsetRlog_7D.csv"
hint: use read.csv()
function to load SubsetRlog_7D.csv. If you failed to import SubsetRlog_7D.csv to R, please check whether you set up your work directory properly by using getwd()
and setwd()
# This is the work directory I am using and yours will be different
# setwd("/Volumes/bioinfomatics$/yfwang/BioinformaticsCore/GitHub/LMS_PlottingInR/course/")
exp_Data <- read.csv("data/SubsetRlog_7D.csv")
head(exp_Data)
## X DOX24H_1 DOX24H_2 DOX24H_3 DOX7D_1 DOX7D_2
## 1 ENSMUSG00000050711 12.109594 12.155740 12.089761 13.891323 13.856755
## 2 ENSMUSG00000026463 12.288243 12.069297 11.934777 11.971843 11.908514
## 3 ENSMUSG00000051079 7.177476 7.282678 7.415341 8.097972 8.558204
## 4 ENSMUSG00000037995 9.178713 9.370469 9.047744 7.834046 7.667787
## 5 ENSMUSG00000039910 9.024315 8.790573 8.878026 9.497840 9.751352
## 6 ENSMUSG00000019880 8.308681 8.701553 8.403844 9.026944 9.351475
## DOX7D_3 VEH24H_1 VEH24H_2 VEH24H_3 VEH7D_1 VEH7D_2 VEH7D_3
## 1 14.095029 12.361107 12.560950 12.404746 13.640879 13.552177 13.578203
## 2 11.748345 12.543844 12.334998 12.195093 12.357483 12.181816 12.315383
## 3 8.653715 6.761927 6.861818 6.899967 7.445987 7.440482 7.938914
## 4 7.540055 9.222387 9.152175 9.299462 8.064490 7.994608 8.025586
## 5 9.731984 9.257527 9.246675 9.442477 9.346837 9.238912 9.323775
## 6 9.373127 8.305506 8.296453 8.429132 8.762056 8.791535 8.877247
colnames(exp_Data)[1] <- "EnsemblID"
wide
data.frame to the long
data.frame using melt() function from reshape2
packagelibrary(reshape2)
exp_Data_long <- melt(exp_Data,id="EnsemblID")
head(exp_Data_long)
## EnsemblID variable value
## 1 ENSMUSG00000050711 DOX24H_1 12.109594
## 2 ENSMUSG00000026463 DOX24H_1 12.288243
## 3 ENSMUSG00000051079 DOX24H_1 7.177476
## 4 ENSMUSG00000037995 DOX24H_1 9.178713
## 5 ENSMUSG00000039910 DOX24H_1 9.024315
## 6 ENSMUSG00000019880 DOX24H_1 8.308681
'Treatment'
, 'TimePoint'
and 'Rep'
based on 'variable'
column information?sub
unique(exp_Data_long$variable)
## [1] DOX24H_1 DOX24H_2 DOX24H_3 DOX7D_1 DOX7D_2 DOX7D_3 VEH24H_1 VEH24H_2
## [9] VEH24H_3 VEH7D_1 VEH7D_2 VEH7D_3
## 12 Levels: DOX24H_1 DOX24H_2 DOX24H_3 DOX7D_1 DOX7D_2 DOX7D_3 ... VEH7D_3
exp_Data_long$Treatment <- sub("(.+)([2|7].+)(_\\d)","\\1",exp_Data_long$variable)
exp_Data_long$TimePoint <- sub("(.+)([2|7].+)(_\\d)","\\2",exp_Data_long$variable)
exp_Data_long$Rep <- sub("(.+)([2|7].+)(_\\d)","Rep\\3",exp_Data_long$variable)
head(exp_Data_long)
## EnsemblID variable value Treatment TimePoint Rep
## 1 ENSMUSG00000050711 DOX24H_1 12.109594 DOX 24H Rep_1
## 2 ENSMUSG00000026463 DOX24H_1 12.288243 DOX 24H Rep_1
## 3 ENSMUSG00000051079 DOX24H_1 7.177476 DOX 24H Rep_1
## 4 ENSMUSG00000037995 DOX24H_1 9.178713 DOX 24H Rep_1
## 5 ENSMUSG00000039910 DOX24H_1 9.024315 DOX 24H Rep_1
## 6 ENSMUSG00000019880 DOX24H_1 8.308681 DOX 24H Rep_1
'Group'
based on 'Treatment'
and 'TimePoint'
columns?paste
exp_Data_long$Group <- paste(exp_Data_long$Treatment,exp_Data_long$TimePoint,
sep=".")
head(exp_Data_long)
## EnsemblID variable value Treatment TimePoint Rep Group
## 1 ENSMUSG00000050711 DOX24H_1 12.109594 DOX 24H Rep_1 DOX.24H
## 2 ENSMUSG00000026463 DOX24H_1 12.288243 DOX 24H Rep_1 DOX.24H
## 3 ENSMUSG00000051079 DOX24H_1 7.177476 DOX 24H Rep_1 DOX.24H
## 4 ENSMUSG00000037995 DOX24H_1 9.178713 DOX 24H Rep_1 DOX.24H
## 5 ENSMUSG00000039910 DOX24H_1 9.024315 DOX 24H Rep_1 DOX.24H
## 6 ENSMUSG00000019880 DOX24H_1 8.308681 DOX 24H Rep_1 DOX.24H
geom_boxplot()
+ geom_point()
theme(axis.text.x = element_text(angle=45, hjust=1))
library(ggplot2)
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
ggplot(exp_Data_long, aes(x=variable,y=value)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.5,
position=position_dodge2(width = 0.5)) +
theme(axis.text.x = element_text(angle=45, hjust=1))+
ylab("rlog")
geom_violin()
+ geom_boxplot()
geom_boxplot()
and geom_boxplot(position = position_dodge(0.9),width=0.4)
library(ggplot2)
# use geom_boxplot()
plotv1 <- ggplot(exp_Data_long, aes(x=Rep,y=value, fill=Group)) +
geom_violin() +
geom_boxplot() +
ylab("rlog")
plotv1
# use geom_boxplot(position = position_dodge(0.9),width=0.4)
plotv2 <- ggplot(exp_Data_long, aes(x=Rep,y=value, fill=Group)) +
geom_violin() +
geom_boxplot(position = position_dodge(0.9),width=0.4) +
ylab("rlog")
plotv2