In this practical we will read in two replicate sets of peaks from the Myc Encode dataset and annotate these to gene using the ChIPseeker packge. We will visualise the results using ChIPseeker plotting functions and see how to combine samples into one plot.

In the second half of the practicals we will perform an enrichment analysis on the genes overlapping peak sets using the GOseq package.

library(GenomicRanges)
library(ChIPseeker)
library(org.Mm.eg.db)
library(TxDb.Mmusculus.UCSC.mm9.knownGene)

melPeak_Rep1 <- read.delim("data/MacsPeaks/mycmelrep1_peaks.xls",sep="\t",comment.char = "#")
melPeak_Rep2 <- read.delim("data/MacsPeaks/mycmelrep2_peaks.xls",sep="\t",comment.char = "#")

melRep1_GR <- GRanges(
                  seqnames=melPeak_Rep1[,"chr"],
                  IRanges(melPeak_Rep1[,"start"],
                  melPeak_Rep1[,"end"]
                  )
                )

mcols(melRep1_GR) <- melPeak_Rep1[,c("abs_summit", "fold_enrichment")]


melRep2_GR <- GRanges(
                  seqnames=melPeak_Rep2[,"chr"],
                  IRanges(melPeak_Rep2[,"start"],
                  melPeak_Rep2[,"end"]
                  )
                )

mcols(melRep2_GR) <- melPeak_Rep2[,c("abs_summit", "fold_enrichment")]
library(GenomeInfoDb)

seqlevelsStyle(melRep1_GR) <- "UCSC"
seqlevelsStyle(melRep2_GR) <- "UCSC"

peakAnno_MelRep1 <- annotatePeak(melRep1_GR, tssRegion=c(-1000, 1000), 
                         TxDb=TxDb.Mmusculus.UCSC.mm9.knownGene, annoDb="org.Mm.eg.db")
## >> preparing features information...      2016-03-09 20:43:46 
## >> identifying nearest features...        2016-03-09 20:43:47 
## >> calculating distance from peak to TSS...   2016-03-09 20:43:48 
## >> assigning genomic annotation...        2016-03-09 20:43:48 
## >> adding gene annotation...          2016-03-09 20:43:58
## 'select()' returned many:many mapping between keys and columns
## >> assigning chromosome lengths           2016-03-09 20:44:21 
## >> done...                    2016-03-09 20:44:21
peakAnno_MelRep2 <- annotatePeak(melRep2_GR, tssRegion=c(-1000, 1000), 
                         TxDb=TxDb.Mmusculus.UCSC.mm9.knownGene, annoDb="org.Mm.eg.db")
## >> preparing features information...      2016-03-09 20:44:22 
## >> identifying nearest features...        2016-03-09 20:44:22 
## >> calculating distance from peak to TSS...   2016-03-09 20:44:22 
## >> assigning genomic annotation...        2016-03-09 20:44:22 
## >> adding gene annotation...          2016-03-09 20:44:24
## 'select()' returned many:many mapping between keys and columns
## >> assigning chromosome lengths           2016-03-09 20:44:47 
## >> done...                    2016-03-09 20:44:47

plotAnnoBar(peakAnno_MelRep1)

plotAnnoBar(peakAnno_MelRep2)



plotDistToTSS(peakAnno_MelRep1)
## Warning: Stacking not well defined when ymin != 0

plotDistToTSS(peakAnno_MelRep2)
## Warning: Stacking not well defined when ymin != 0

Mel_Rep1_and_Rep2 <- list(peakAnno_MelRep1,peakAnno_MelRep2)
names(Mel_Rep1_and_Rep2) <- c("Rep1","Rep2")
plotAnnoBar(Mel_Rep1_and_Rep2)

peakAnno_MelRep1_GR <- as.GRanges(peakAnno_MelRep1)
peakAnno_MelRep2_GR <- as.GRanges(peakAnno_MelRep2)

MelRep1_genesWithPeakInTSS <- unique(peakAnno_MelRep1_GR[peakAnno_MelRep1_GR$annotation == "Promoter",]$SYMBOL)

MelRep1_genesWithPeakInTSS <- unique(peakAnno_MelRep1_GR[peakAnno_MelRep1_GR$annotation == "Promoter",]$SYMBOL)
melRep1_GR_InRep1AndRep2 <- melRep1_GR[melRep1_GR %over% melRep2_GR]
melRep1_GR_InRep1AndRep2
## GRanges object with 30233 ranges and 2 metadata columns:
##           seqnames                 ranges strand   | abs_summit
##              <Rle>              <IRanges>  <Rle>   |  <integer>
##       [1]     chr1     [4775337, 4775959]      *   |    4775616
##       [2]     chr1     [4847544, 4847931]      *   |    4847795
##       [3]     chr1     [5073028, 5073344]      *   |    5073202
##       [4]     chr1     [7078801, 7079170]      *   |    7078892
##       [5]     chr1     [7387587, 7388483]      *   |    7387940
##       ...      ...                    ...    ... ...        ...
##   [30229]     chrX [165737530, 165737735]      *   |  165737687
##   [30230]     chrX [165757688, 165758643]      *   |  165758296
##   [30231]     chrY [   581657,    582367]      *   |     582221
##   [30232]     chrY [   622356,    622742]      *   |     622484
##   [30233]     chrY [   622882,    623459]      *   |     623091
##           fold_enrichment
##                 <numeric>
##       [1]          9.5239
##       [2]         7.45675
##       [3]        10.20078
##       [4]         4.00702
##       [5]        19.01092
##       ...             ...
##   [30229]         6.23072
##   [30230]        17.31472
##   [30231]        18.62359
##   [30232]        10.23618
##   [30233]        14.59007
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths