netMHCpan #4 결과 확인 및 heatmap 시각화 #

#2025-07-23

1. netMHCpan 결과 확인 #

#data

data/
├── c315
│   └── * (*: patient id)
│       ├── peptides_HLA-I.csv
│       └── binding_affinities_HLA-I.csv
├── c442
│   └── (c315와 동일한 구조로 생성됨)
└── patient_id.txt
result/
└── (empty)

# Load package
import pandas as pd
import numpy as np
import os

# Load patient id
f = open("/data/patient_id.txt", "r")
patients = f.read().split("\n")

# Merge epitope table

hotspots = ["c315", "c442"]

peptide_df_list = []

for hotspot in hotspots:
    for patient in patients:
        peptide_df = pd.read_csv(f"/data/{hotspot}/{patient}/peptides_HLA-I.csv")

        # 특수 문자가 포함된 Peptide 제거 (Non coding region)
        peptide_df = peptide_df[~peptide_df['Peptide'].str.contains('[-*]', regex=True, na=False)]
        
        # Patients 컬럼의 첫 번째 행 값 가져오기
        patient_name = patient  # 파일 이름 또는 경로에서 patient ID를 사용
        
        # Peptide 컬럼 이름을 patient_name으로 변경
        peptide_df = peptide_df[['Peptide']]  # Peptide 컬럼만 남기기
        peptide_df.columns = [patient_name]   # 컬럼 이름 변경
        
        # 리스트에 추가
        peptide_df_list.append(peptide_df)

    # Merged df 생성 및 저장
    merged_df = pd.concat(peptide_df_list, axis = 1)
    merged_df.index = f"{hotspot}." + merged_df.index.astype(str)
    merged_df.to_csv(f"/result/epitope_{hotspot}.csv")

# Merge affinity table

for hotspot in hotspots:
    dfs = []

    for pid in patient_ids:
        file_path = f"/data/{hotspot}/{pid}/binding_affinities_HLA-I.csv"
        df = pd.read_csv(file_path)
        
        df.rename(columns={'Affinity': f'{pid}'}, inplace=True)
        df.rename(columns={'Peptide': f'Peptide_{pid}'}, inplace=True)
        
        if pid == 'reference':
            dfs.append(df)
        else:
            dfs.append(df[[f'{pid}']])
            #dfs.append(df[[f'{pid}', f'Peptide_{pid}']])
    
    res_df = pd.concat(dfs, axis=1)
    res_df = res_df.set_index('Allele')

    # 유의미한 pair만 남기기
    res_df = res_df[res_df['reference'] < 500]
    res_df.iloc[:, 1:] = res_df.iloc[:, 1:].subtract(res_df['reference'], axis=0)

    # 저장
    res_df.to_csv(f"/result/aff-table_{hotspot}.csv")

#result

data/
├── c315
│   └── * (*: patient id)
│       ├── peptides_HLA-I.csv
│       └── binding_affinities_HLA-I.csv
├── c442
│   └── (c315와 동일한 구조로 생성됨)
└── patient_id.txt
result/
├── epitope_c315.csv
├── epitope_c442.csv
├── aff-table_c315.csv
└── aff-table_c442.csv

2. Heatmap 시각화 #

#data

data/
└── meta.csv
result/
├── aff-table_c315.csv
└── aff-table_c442.csv

# Load package
if(!requireNamespace("pheatmap", quietly = TRUE)) install.packages("pheatmap")
library(pheatmap)

# Load data
aff_table_df <- read.csv("/result/aff-table_c315.csv")
rownames(aff_table_df) <- paste(aff_table_df$Allele, aff_table_df$Peptide_reference, sep = "_")

aff_table_df <- aff_table_df[ , -(1:3)]
filtered_meta_df <- read.csv("/data/meta.csv", row.names = 1)
colnames(aff_table_df) <- gsub("\\.", "-", colnames(aff_table_df))

# Annotation colors
severity_colors <- c("Moderate" = "#a6d854", "Severe" = "#f56464") 
cluster_colors <- c("1" = "#66c2a5", "2" = "#ffd92f", "3" = "#e78ac3", "4" = "#a6d854",
                    "5" = "#8da0cb", "6" = "#fc8d62", "7" = "#e5c494")
annotation_colors <- list(Severity = severity_colors, Clusters = cluster_colors)

annotation_df <- data.frame(Severity = filtered_meta_df$Severity,
                            Clusters = as.factor(filtered_meta_df$Clusters),
                            row.names = rownames(filtered_meta_df))

annotation_df$Clusters <- as.character(annotation_df$Clusters)

# 데이터 범위 계산
min_val <- min(as.matrix(aff_table_df), na.rm = TRUE) #-255
min_val <- -500
max_val <- max(as.matrix(aff_table_df), na.rm = TRUE)

# breaks 설정: 0을 중앙값으로 설정
breaks <- c(seq(min_val, 0, length.out = 6), seq(0, max_val, length.out = 6)[-1])

# Draw heatmap
pheatmap(
  t(as.matrix(aff_table_df)),
  annotation_row = annotation_df,
  annotation_colors = annotation_colors,
  color = colorRampPalette(c("green", "white", "red"))(length(breaks) - 1),
  breaks = breaks,
  show_rownames = FALSE,
  show_colnames = TRUE,
  annotation_legend = TRUE,
  legend = TRUE,
  width = 10,
  height = 12, 
  filename = "/result/aff-heatmap_c315.png"
  )

#result

data/
└── meta.csv
result/
├── aff-table_c315.csv
├── aff-table_c442.csv
├── aff-heatmap_c315.png
└── aff-heatmap_c442.png

#

그리드 있는게 나은지 없는게나은지?!! 내눈엔 있는게 이쁜데 row랑 column 너무 많으면 정신사납기도하구 그렇다
색깔 하나하나 나름 고심해서 골랐는데 R 느낌 낭낭하게 나와서 갠적으로 맘에 드는 플롯 ㅎㅎ

#

논문에는 요 그림으로 들어갔다. (그리드 버전)