netMHCpan #3 HLA-peptide affinity 분석 #
#2025-07-23
#data
data/
├── c315
│ ├── allprot.fasta
│ └── * (*: patient id)
│ ├── proteome.fasta
│ └── peptides_HLA-I.csv
├── c442
│ └── (c315와 동일한 구조로 생성됨)
├── patient_id.txt
└── common_mhc.txt
#
#predict_affinity.bash
#!/bin/bash
# 입력:
# 1) 클러스터명 (예: c315)
# 2) 병렬 프로세스 수 (NUM_PROC)
# 출력:
# 환자별 binding_affinities_HLA-I.csv
CLUSTER=$1
NUM_PROC=$2
netMHCpan="../netMHCpan-4.1/netMHCpan"
OUT_DIR="data/${CLUSTER}"
PATIENT_TXT="data/patient_id.txt"
HLA_I_ALLELES_FILE="data/common_mhc.txt"
# 스크립트가 있는 디렉터리로 이동
cd "$(dirname "$0")"
# 환자별로 netMHCpan 예측 수행
while read -r PATIENT_ID; do
PATIENT_DIR="$OUT_DIR/$PATIENT_ID"
RAW_DIR="$PATIENT_DIR/raw_predictions"
mkdir -p "$RAW_DIR"
PEPTIDES_TABLE="$PATIENT_DIR/peptides_HLA-I.csv"
# peptides.txt 생성 (첫 컬럼만 추출, 헤더 제거)
tail -n +2 "$PEPTIDES_TABLE" | cut -d"," -f1 > "$PATIENT_DIR/peptides.txt"
# 병렬로 netMHCpan 실행
parallel -j "$NUM_PROC" \
"$netMHCpan -BA -p $PATIENT_DIR/peptides.txt -a {} > $RAW_DIR/{}.txt" \
:::: "$HLA_I_ALLELES_FILE"
# 결과 병합 및 정리
python3 sc2.py "$RAW_DIR" > "$PATIENT_DIR/binding_affinities_HLA-I.csv"
rm -rf "$RAW_DIR" "$PATIENT_DIR/peptides.txt"
done < "$PATIENT_TXT"
#sc2.py
import sys
import os
netMHCpan_outdir = sys.argv[1]
print("Allele,Peptide,Affinity")
for fname in sorted(os.listdir(netMHCpan_outdir)):
netMHCpan_file = open(netMHCpan_outdir + "/" + fname, "r")
table_started = False
for line in netMHCpan_file:
if "Pos" in line:
table_started = True
netMHCpan_file.readline()
continue
if table_started and line[0] == "-":
break
if not table_started:
continue
line = line.split()
allele = line[1]
peptide = line[2]
affinity = int(float(line[15]))
print("{},{},{}".format(allele, peptide, affinity))
netMHCpan_file.close()
#result
data/
├── c315
│ ├── allprot.fasta
│ └── * (*: patient id)
│ ├── proteome.fasta
│ ├── peptides_HLA-I.csv
│ └── binding_affinities_HLA-I.csv
├── c442
│ └── (c315와 동일한 구조로 생성됨)
├── patient_id.txt
└── common_mhc.txt
환자별 binding_affinities_HLA-I.csv 생성.