Download the following BAM (aligned to hg19) and index files (*.bai) (ENCODE data - ChIP-Seq of CTCF in Ag04449 human fibroblast cells)
Use readGAlignments
to read the bam. Construct an
ScanBamParam
object that accepts only aligned reads,
passing quality control and not duplicates.
Compute genome wide coverage using coverage
function
Export coverage as bigWig using export.bw()
from
rtracklayer package and visualise it using IGV (https://software.broadinstitute.org/software/igv/download)
Compute number of reads overlapping with hg19 promoters (TSS ± 1kb) and export the results as csv file.
library("GenomicAlignments")
library("rtracklayer")
bamFile <- "wgEncodeUwTfbsAg04449CtcfStdAlnRep1.bam"
flag <- scanBamFlag()
param <- ScanBamParam(
flag=scanBamFlag(isUnmappedQuery=FALSE, isDuplicate=FALSE, isNotPassingQualityControls=FALSE)
)
# Read the BAM
CTCF <- readGAlignments(bamFile,param=param)
# Generate the genomic coverage and inspect the output
CTCFCov <- coverage(CTCF)
# Export as bigWig
export.bw(CTCFCov, "Ag04449_CTCF.bw")
# Identifying hg19 promoters
hg19Gene <- read.table("hg19Genes.txt",sep="\t",header=T)
hg19Gene.GR <- makeGRangesFromDataFrame(hg19Gene, keep.extra.columns = T)
hg19Gene.GR <- GRanges(seqnames=hg19Gene$chr,
ranges=IRanges(start=hg19Gene$start,end=hg19Gene$end),
strand=ifelse(hg19Gene$strand==1,"+","-"),
EnsemblID=hg19Gene$ensID,
Symbol=hg19Gene$GeneSym)
hg19Promoters <- promoters(hg19Gene.GR,upstream=1000,downstream=1000)
# Reads overlapping with promoters
CTCFCounts <- countOverlaps(hg19Promoters,CTCF)
# Add CTCF counts as elementMetadata to hg19Promoters object
mcols(hg19Promoters)$CTCF <- CTCFCounts
# Export the results as text file
hg19Promoters.df <- as.data.frame(hg19Promoters)
write.csv(hg19Promoters.df,"hg19Promoters_CTCF.csv",row.names=F)