#4 결과 검증: 임상 결과와의 연관성

#4 결과 검증: 임상 결과와의 연관성 #

#2025-06-20


1. Load package #

import pandas as pd
import numpy as np
import os
os.sys.path.append("/data/home/ysh980101/2407/Mutclust") 

from pathlib import Path
from Bin.Utils.utils import *
from Bin.arg_parser import *
from Bin.mlib import *

2. Load COVID19 data #

i = 1
tag = f"test{i}"
resdir = f"/data/home/ysh980101/2407/Mutclust/Testdata/Output/GISAID_{tag}/"
covid19_dir = "/data3/projects/2020_MUTCLUST/Data/Projects/COVID19/Sequence/Preprocessed/Nucleotide/Mutationinfo"
meta_path = "/data/home/ysh980101/2506/data/meta.csv"

hotspots = pd.read_csv(f"{resdir}clusters_{tag}.txt",sep="\t")
metaData = pd.read_csv(meta_path, index_col=0)
mutInfo = make_mutInfo_covid19(covid19_dir)
mutSignature = make_mutSignature(mutInfo, hotspots, metaData)
print(mutSignature)
      COV-CCO-001  COV-CCO-002  COV-CCO-003  COV-CCO-004  COV-CCO-006  \
c0              0            0            0            0            0   
c1              0            0            0            0            0   
c2              0            0            0            0            0   
c3              0            0            0            0            0   
c4              0            0            0            0            0   
...           ...          ...          ...          ...          ...   
c472            0            1            0            0            0   
c473            0            0            0            0            0   
c474            0            0            0            0            0   
c475            0            0            0            0            0   
c476            0            0            0            0            0   

      COV-CCO-008  COV-CCO-009  COV-CCO-010  COV-CCO-011  COV-CCO-013  ...  \
c0              0            0            0            0            0  ...   
c1              0            0            0            0            0  ...   
c2              0            0            0            0            0  ...   
c3              0            0            0            0            0  ...   
c4              0            0            0            0            0  ...   
...           ...          ...          ...          ...          ...  ...   
c472            0            0            0            0            0  ...   
c473            0            0            0            0            0  ...   
c474            0            0            0            0            0  ...   
c475            0            0            0            0            0  ...   
c476            0            0            0            0            0  ...    

[477 rows x 387 columns]
sig_hotspots, significance = select_sig_hotspots(mutSignature, metaData, hotspots)
significance
   Hotspot       p-value           FDR  Significant
0      c22  1.882327e-07  4.489349e-06         True
1      c90  1.158366e-03  2.051443e-02         True
2     c118  9.750940e-15  1.162800e-12         True
3     c123  8.587634e-14  6.827169e-12         True
4     c124  1.051981e-03  2.007179e-02         True
5     c198  2.827480e-10  1.123923e-08         True
6     c239  5.739929e-16  2.737946e-13         True
7     c258  1.489502e-08  4.301825e-07         True
8     c292  6.617715e-07  1.372457e-05         True
9     c298  1.205966e-04  2.396858e-03         True
10    c309  2.746212e-08  7.277461e-07         True
11    c315  7.603734e-08  1.908937e-06         True
12    c319  5.323421e-07  1.154215e-05         True
13    c334  4.989612e-10  1.830804e-08         True
14    c337  8.625002e-12  4.114126e-10         True
15    c350  4.178970e-07  9.492232e-06         True
16    c364  9.750940e-15  1.162800e-12         True
17    c385  4.112387e-13  2.802298e-11         True
18    c390  1.161194e-03  2.051443e-02         True
19    c412  5.946573e-12  3.151684e-10         True
20    c429  3.073511e-09  9.773764e-08         True
21    c431  7.755493e-14  6.827169e-12         True
22    c438  1.929048e-09  6.572544e-08         True
23    c442  5.644954e-13  3.365804e-11         True
24    c444  1.362927e-15  3.250582e-13         True
25    c455  1.928137e-03  3.171454e-02         True
26    c460  1.723333e-03  2.935821e-02         True
27    c462  1.533145e-08  4.301825e-07         True
28    c468  1.169796e-11  5.072662e-10         True
outdir = "result/"
sig_hotspots.to_csv(f"{outdir}sig_hotspots.csv", index=False)

만든건 저장하기.