DESeq2: DE 분석 #

#2024-12-31

Tool
Bioconductor - DESeq2 https://bioconductor.org/packages/release/bioc/html/DESeq2.html

#

1. Load package #

suppressMessages({
    library("DESeq2")
    library(pheatmap)
    library(withr)
    #library(tidyverse)
    library(RColorBrewer)
    library(gplots)
    library(dplyr)
    })

#

2. Set path #

setwd("/data-blog/bi1")
getwd()

'/data-blog/bi1'

#

3. Run DESeq2 #

S1 <- '33'
S2 <- '150'

countdata <- read.csv("results.csv", header=TRUE, sep=',')
colnames(countdata) <- c('GeneID','150-1','150-2','150-3','33-1','33-2','33-3','con-1','con-2','con-3')
countdata <- countdata[, c('GeneID','150-1','150-2','150-3','33-1','33-2','33-3','con-1','con-2','con-3')]

selected_columns <- paste(c('GeneID',paste0(S2,"-1"), paste0(S2,"-2"), paste0(S2,"-3"),paste0(S1,"-1"), paste0(S1,"-2"), paste0(S1,"-3")), sep="")
countdata <- countdata[, selected_columns]
countdata <- countdata[rowSums(countdata[, -1]) != 0, ]

sample.names <- paste(c(paste0(S2,"-1"), paste0(S2,"-2"), paste0(S2,"-3"),paste0(S1,"-1"), paste0(S1,"-2"), paste0(S1,"-3")), sep="")
conditions <- factor(c(S2,S2,S2,S1,S1,S1))
metadata <- data.frame(Sample = sample.names, group = conditions)

metadata

N <- dim(countdata)[[2]]
cData = countdata[,2:N]
GeneID = countdata[,1]
rownames(cData) = GeneID

dds <- DESeqDataSetFromMatrix(countData = cData,
                                colData = metadata,
                                design = ~group)
dds$group <- relevel(dds$group, ref = S1)
colData(dds)

dds <- DESeq(dds)  
vsd <- vst(dds, blind=FALSE)
rld <- rlogTransformation(dds, blind=FALSE)

res <- results(dds, contrast = c("group", S2, S1))
res_tbl <- as.data.frame(res)
res_tbl$GeneID <- rownames(res_tbl)
res_tbl <- res_tbl[order(res_tbl$padj), ]

NM_no_NA <- na.omit(res)
res_cut <- NM_no_NA[NM_no_NA$padj<0.05,]
res_cut

# padj
val_str <- 'padj'
cutoff <- 0.05
cutoff_str <- as.character(cutoff)

sig_res <- dplyr::filter(res_tbl, padj < cutoff)
sig_res <- dplyr::arrange(sig_res, padj)
sig_res_file <- paste0('res_', S2, '_', S1, '_', val_str, cutoff_str, '.csv')
#write.csv(sig_res, file = sig_res_file)

print(paste0(S2, ' vs ', S1, ' | ', val_str, '<', cutoff_str))
sig_idx <- res$padj < cutoff & !is.na(res$padj)
sig_dat <- assay(rld)[sig_idx, ]
annC <- data.frame(condition = conditions)
rownames(annC) <- colnames(sig_dat)
heat_colors <- brewer.pal(6, "RdYlGn")
heat_colors_reversed <- rev(heat_colors)
ann_colors <- list(condition = setNames(c("#F7819F", "#58D3F7"), c(S2, S1)))

A data.frame: 6 x 2
Sample	group
<chr>	<fct>
150-1	150
150-2	150
150-3	150
33-1	33
33-2	33
33-3	33
DataFrame with 6 rows and 2 columns
           Sample    group
      <character> <factor>
150-1       150-1      150
150-2       150-2      150
150-3       150-3      150
33-1         33-1      33 
33-2         33-2      33 
33-3         33-3      33

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

log2 fold change (MLE): group 150 vs 33 
Wald test p-value: group 150 vs 33 
DataFrame with 205 rows and 6 columns
         baseMean log2FoldChange     lfcSE      stat      pvalue        padj
        <numeric>      <numeric> <numeric> <numeric>   <numeric>   <numeric>
ABHD2      50.721       1.352060  0.431587   3.13276 1.73168e-03 4.00143e-02
ADAM12    706.120      -0.571960  0.168494  -3.39454 6.87431e-04 2.03489e-02
ADD2     1819.643       0.868228  0.148791   5.83521 5.37230e-09 9.73246e-07
AIF1L     144.513       1.168923  0.283764   4.11935 3.79938e-05 2.07318e-03
AKAP5    1042.005      -0.637445  0.202189  -3.15271 1.61761e-03 3.81572e-02
...           ...            ...       ...       ...         ...         ...
ZNF655   774.2459      -0.910286  0.198632  -4.58277 4.58855e-06 3.52229e-04
ZNF682    59.7573      -1.382049  0.438336  -3.15295 1.61632e-03 3.81572e-02
ZNF77     76.0271      -1.231188  0.388382  -3.17004 1.52417e-03 3.71126e-02
ZRANB3   536.2301      -0.878732  0.179932  -4.88367 1.04128e-06 9.22422e-05
ZSCAN25 1257.3596      -0.460023  0.149161  -3.08408 2.04184e-03 4.57797e-02
[1] "150 vs 33 | padj<0.05"