methylKit: DMR 분석 #
#2025-07-28
1. Load packages #
library("methylKit")
library("genomation")
library("GenomicRanges")
#
2. Set path #
setwd("/data/home/ysh980101/2309_5-aza/Bismark/sorted_n")
getwd()
'/data1/home/ysh980101/2309_5-aza/Bismark/sorted_n'
#
3. Load data #
# Define the list containing the bismark coverage files.
covlist <- list(
"KEB1/KEB01_1_bismark_bt2_pe.sorted_n.deduplicated.bismark.cov.gz",
"KEB2/KEB02_1_bismark_bt2_pe.sorted_n.deduplicated.bismark.cov.gz",
"KEB4/KEB04_1_bismark_bt2_pe.sorted_n.deduplicated.bismark.cov.gz")
myobj_lowCov <- methRead(covlist,
sample.id=list("KEB01","KEB02","KEB04"),
pipeline = "bismarkCoverage",
assembly="hg38",
treatment=c(0,1,2),
mincov = 3
)
tiles <- tileMethylCounts(myobj_lowCov,win.size=1000,step.size=1000,cov.bases = 3
tiles.norm <- normalizeCoverage(tiles, method = "median")
meth.tiles <- unite(tiles.norm, destrand=FALSE)
meth.tiles
meth.tilesDf = getData(meth.tiles)
meth.tilesDf_filtered <- meth.tilesDf[!grepl("\\.|\\_|MT", meth.tilesDf$chr), ]
meth.tilesDf_filtered
#
4. Test: keb01 vs keb02 #
meth.tilesDf.1vs2 <- meth.tilesDf_filtered[, 1:10]
meth.tilesDf.1vs2
meth.tiles.1vs2=new("methylBase",meth.tilesDf.1vs2,sample.ids=c("KEB01","KEB02"),
assembly="hg38",treatment=c(0,1))
myDiff <- calculateDiffMeth(meth.tiles.1vs2,
overdispersion = "MN",
adjust="BH")
myDiff <- myDiff[order(myDiff$qvalue), ]
myDiff
#
5. Save #
myDiffDf = getData(myDiff)
write.table(myDiffDf, file = "/data/home/ysh980101/2310/res_total/myDiff1vs2.tsv", sep = "\t", quote = FALSE, row.names = FALSE)
#
6. CpG filtering #
차등 메틸화된 모든 region에 대한 test 결과가 myDiff에 있는데 CpG island 부분만 남겨서 downstream 분석을 진행한다.
cpgi.df = read.table("/data/home/ysh980101/2310/data/hg38_CpGIslands.chr.bed.txt", header = FALSE, stringsAsFactors=FALSE)
cpgi.df = cpgi.df[grep("_",cpgi.df[,1],invert=TRUE),]
cpgi.gr = GRanges(seqnames=cpgi.df[,1],ranges=IRanges(start=cpgi.df[,2],end=cpgi.df[,3]))
myDiff.sbo <- selectByOverlap(myDiff,cpgi.gr)
myDiff.sboDf <- getData(myDiff.sbo)
myDiff.sboDf <- myDiff.sboDf[order(myDiff.sboDf$qvalue), ]
write.table(myDiff.sboDf, file = "/data/home/ysh980101/2310/res_total_cpgfilt/myDiff.sbo1vs2.tsv", sep = "\t", quote = FALSE, row.names = FALSE)
#
7. Gene region annotation #
myDiff.sbo25p <- getMethylDiff(myDiff.sbo,
difference=0,
qvalue=0.05)
myDiff.sbo25p <- myDiff.sbo25p[order(myDiff.sbo25p$qvalue),]
#gene region annotation
refseq_anot <- readTranscriptFeatures("/data/home/ysh980101/2310/data/hg38_NCBI_RefSeq.chr.bed.txt")
myDiff.sbo25p.anot <- annotateWithGeneParts(target = as(myDiff.sbo25p,"GRanges"),feature = refseq_anot)
dist_tss <- getAssociationWithTSS(myDiff.sbo25p.anot)
dist_tss
plotTargetAnnotation(myDiff.sbo25p.anot, main = "Differential Methylation Annotation")
cpg_anot <- readFeatureFlank("/data/home/ysh980101/2310/data/hg38_CpGIslands.chr.bed.txt", feature.flank.name = c("CpGi", "shores"), flank=2000)
diffCpGann <- annotateWithFeatureFlank(as(myDiff.sbo25p,"GRanges"), feature = cpg_anot$CpGi, flank = cpg_anot$shores, feature.name = "CpGi", flank.name = "shores")
plotTargetAnnotation(diffCpGann, main = "Differential Methylation Annotation")
(는 cpg island만 필터링했기때문에 당연한 결과)
#
8. Save result #
write.table(dist_tss, file = "/data/home/ysh980101/2310/res_total_cpgfilt/dist_tss.1vs2.tsv", sep = "\t", quote = FALSE, row.names = FALSE)
#
cf
- 갠적으로 재밋었던 작업이었는데 (생전첨보는툴들의 향연) 결과가 너무 별로라 논문에실리지는 못한 비운의작업 이었당
- 교수님께서 코드 달라셔서 드렸는데 “샘플수늘린것밖에 달라진게 없는데 자꾸 오류가난다 아무리봐도 문제가없다 한번봐달라"셔서 한 30분 같이 씨름했었는데
- 문제는? 없었음. 혹시나해서 터미널에서 돌리니까 돌아갔다. 그냥 jupyter에서 안돌아간거였고 교수님이 보시고 역시 r은 쓰레기라고 하셨다 .. 맞긴해..