methylKit: DMR 분석

methylKit: DMR 분석 #

#2025-07-28


1. Load packages #

library("methylKit")
library("genomation")
library("GenomicRanges")

#

2. Set path #

setwd("/data/home/ysh980101/2309_5-aza/Bismark/sorted_n")
getwd()
'/data1/home/ysh980101/2309_5-aza/Bismark/sorted_n'

#

3. Load data #

# Define the list containing the bismark coverage files.
covlist <- list(
   "KEB1/KEB01_1_bismark_bt2_pe.sorted_n.deduplicated.bismark.cov.gz",
   "KEB2/KEB02_1_bismark_bt2_pe.sorted_n.deduplicated.bismark.cov.gz",
   "KEB4/KEB04_1_bismark_bt2_pe.sorted_n.deduplicated.bismark.cov.gz")
myobj_lowCov <- methRead(covlist,
           sample.id=list("KEB01","KEB02","KEB04"),
           pipeline = "bismarkCoverage",
           assembly="hg38",
           treatment=c(0,1,2),
           mincov = 3
           )
tiles <- tileMethylCounts(myobj_lowCov,win.size=1000,step.size=1000,cov.bases = 3

tiles.norm <- normalizeCoverage(tiles, method = "median")
meth.tiles <- unite(tiles.norm, destrand=FALSE)
meth.tiles
image
meth.tilesDf = getData(meth.tiles)
meth.tilesDf_filtered <- meth.tilesDf[!grepl("\\.|\\_|MT", meth.tilesDf$chr), ]
meth.tilesDf_filtered
image

#

4. Test: keb01 vs keb02 #

meth.tilesDf.1vs2 <- meth.tilesDf_filtered[, 1:10]
meth.tilesDf.1vs2
image
meth.tiles.1vs2=new("methylBase",meth.tilesDf.1vs2,sample.ids=c("KEB01","KEB02"),
                       assembly="hg38",treatment=c(0,1))

myDiff <- calculateDiffMeth(meth.tiles.1vs2,
                            overdispersion = "MN",
                            adjust="BH")
myDiff <- myDiff[order(myDiff$qvalue), ]
myDiff
image

#

5. Save #

myDiffDf = getData(myDiff)
write.table(myDiffDf, file = "/data/home/ysh980101/2310/res_total/myDiff1vs2.tsv", sep = "\t", quote = FALSE, row.names = FALSE)

#

6. CpG filtering #

차등 메틸화된 모든 region에 대한 test 결과가 myDiff에 있는데 CpG island 부분만 남겨서 downstream 분석을 진행한다.

cpgi.df = read.table("/data/home/ysh980101/2310/data/hg38_CpGIslands.chr.bed.txt", header = FALSE, stringsAsFactors=FALSE)
cpgi.df = cpgi.df[grep("_",cpgi.df[,1],invert=TRUE),]
cpgi.gr = GRanges(seqnames=cpgi.df[,1],ranges=IRanges(start=cpgi.df[,2],end=cpgi.df[,3]))
myDiff.sbo <- selectByOverlap(myDiff,cpgi.gr)

myDiff.sboDf <- getData(myDiff.sbo)
myDiff.sboDf <- myDiff.sboDf[order(myDiff.sboDf$qvalue), ]
write.table(myDiff.sboDf, file = "/data/home/ysh980101/2310/res_total_cpgfilt/myDiff.sbo1vs2.tsv", sep = "\t", quote = FALSE, row.names = FALSE)

#

7. Gene region annotation #

myDiff.sbo25p <- getMethylDiff(myDiff.sbo,
                        difference=0,
                        qvalue=0.05)
myDiff.sbo25p <- myDiff.sbo25p[order(myDiff.sbo25p$qvalue),]

#gene region annotation
refseq_anot <- readTranscriptFeatures("/data/home/ysh980101/2310/data/hg38_NCBI_RefSeq.chr.bed.txt")
myDiff.sbo25p.anot <- annotateWithGeneParts(target = as(myDiff.sbo25p,"GRanges"),feature = refseq_anot)
dist_tss <- getAssociationWithTSS(myDiff.sbo25p.anot)
dist_tss
image
plotTargetAnnotation(myDiff.sbo25p.anot, main = "Differential Methylation Annotation")
image
cpg_anot <- readFeatureFlank("/data/home/ysh980101/2310/data/hg38_CpGIslands.chr.bed.txt", feature.flank.name = c("CpGi", "shores"), flank=2000)
diffCpGann <- annotateWithFeatureFlank(as(myDiff.sbo25p,"GRanges"), feature = cpg_anot$CpGi, flank = cpg_anot$shores, feature.name = "CpGi", flank.name = "shores")
plotTargetAnnotation(diffCpGann, main = "Differential Methylation Annotation")
image

(는 cpg island만 필터링했기때문에 당연한 결과)

#

8. Save result #

write.table(dist_tss, file = "/data/home/ysh980101/2310/res_total_cpgfilt/dist_tss.1vs2.tsv", sep = "\t", quote = FALSE, row.names = FALSE)

#

cf

  • 갠적으로 재밋었던 작업이었는데 (생전첨보는툴들의 향연) 결과가 너무 별로라 논문에실리지는 못한 비운의작업 이었당
  • 교수님께서 코드 달라셔서 드렸는데 “샘플수늘린것밖에 달라진게 없는데 자꾸 오류가난다 아무리봐도 문제가없다 한번봐달라"셔서 한 30분 같이 씨름했었는데
    • 문제는? 없었음. 혹시나해서 터미널에서 돌리니까 돌아갔다. 그냥 jupyter에서 안돌아간거였고 교수님이 보시고 역시 r은 쓰레기라고 하셨다 .. 맞긴해..