MLflow #2 mlflow 파이프라인 #
#2025-08-22
1. 코드 #
#1 트래킹 서버 설정
import os
import mlflow
# 1. 로그를 저장할 서버/위치 지정
mlflow.set_tracking_uri(uri=os.getenv("MLFLOW_TRACKING_URI", "")) # MLFLOW_TRACKING_URI로 MLflow 서버를 연결
current_uri = mlflow.get_tracking_uri()
print(f"Current Tracking URI: {current_uri}")
#
#2 Experiment 생성
# 2. Experiment 생성
experiment = mlflow.set_experiment("new_experiment")
print(f"Experiment ID: {experiment.experiment_id}")
print(f"Experiment Name: {experiment.name}")
print(f"Artifact Location: {experiment.artifact_location}")
print(f"Lifecycle Stage: {experiment.lifecycle_stage}")
Experiment ID: 2
Experiment Name: new_experiment
Artifact Location: /mlflow/mlruns/2
Lifecycle Stage: active
#
#3 information 확인, 로그 기록
# 3. Experiment information 확인 & 로그 기록
# Access metadata about the experiment
_experiment = mlflow.get_experiment(experiment_id=experiment.experiment_id)
# Log runs under the same experiment
for i in range(3):
with mlflow.start_run():
mlflow.log_param("iteration", i) # parameter 저장
mlflow.log_metric("accuracy", 0.8 + i * 0.05) # metric 저장
print(f"Logged run under experiment '{experiment.name}'")
# Log an artifact (e.g., a text file)
with open("example.txt", "w") as f:
f.write("This is an example artifact.") # artifact 저장
mlflow.log_artifact("example.txt")
- 동일한 실험(new_experiment) 아래에서 3번의 Run을 수행
- Run이 끝나면 UI에서 iteration과 accuracy의 관계를 확인할 수 있다.
#
#4 기존 experiment 재사용
# 4. 기존 Experiment 재사용
# Reuse the same experiment in another script or session
# Option A: Use the experiment name
mlflow.set_experiment("new_experiment") # new_experiment로 experiment을 다시 불러옴
# Option B: Use the experiment ID directly
with mlflow.start_run(experiment_id=experiment.experiment_id): # experiment_id로 experiment 이름 지정
mlflow.log_param("new_param", 100)
mlflow.log_metric("new_metric", 0.95)
print(f"Logged run under experiment ID {experiment.experiment_id}")
#
2. 코드 실행 #
$ docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
d40c2085dd73 ghcr.io/mlflow/mlflow:v2.0.1 "/bin/sh -c 'pip ins…" 7 hours ago Up 32 seconds 0.0.0.0:5001->5000/tcp mlflow
9da59e40a1f8 mysql:8.0 "docker-entrypoint.s…" 7 hours ago Up 32 seconds 0.0.0.0:3306->3306/tcp, 33060/tcp mysql v2.0.1 35853aa42ccd 2 years ago 905MB
$ docker exec -it d40c2085dd73 /bin/bash
root@d40c2085dd73:/# cd /mlflow/mlruns
root@d40c2085dd73:/mlflow/mlruns#
시작상태는이렇고 logging_functions.py를 실행
root@d40c2085dd73:/mlflow/mlruns# python logging_functions.py
<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)
<jemalloc>: (This is the expected behaviour if you are running under QEMU)
Current Tracking URI: http://0.0.0.0:5000
2025/08/22 08:07:59 INFO mlflow.tracking.fluent: Experiment with name 'new_experiment' does not exist. Creating a new experiment.
Experiment ID: 2
Experiment Name: new_experiment # experiment 정보: id, name
Artifact Location: /mlflow/mlruns/2 # location, 생애주기단계
Lifecycle Stage: active
_Experiment ID: 2
_Experiment Name: new_experiment
_Artifact Location: /mlflow/mlruns/2
_Lifecycle Stage: active
Logged run under experiment 'new_experiment'
Logged run under experiment 'new_experiment'
Logged run under experiment 'new_experiment'
logging functions를 보면 start_run을 써서 3번 돌렷는데
# Log runs under the same experiment
for i in range(3):
with mlflow.start_run():
mlflow.log_param("iteration", i)
mlflow.log_metric("accuracy", 0.8 + i * 0.05)
print(f"Logged run under experiment '{experiment.name}'")
Current Tracking URI: http://0.0.0.0:5000
2025/08/22 08:07:59 INFO mlflow.tracking.fluent: Experiment with name 'new_experiment' does not exist. Creating a new experiment.
Experiment ID: 2
Experiment Name: new_experiment
Artifact Location: /mlflow/mlruns/2
Lifecycle Stage: active
_Experiment ID: 2
_Experiment Name: new_experiment
_Artifact Location: /mlflow/mlruns/2
_Lifecycle Stage: active
Logged run under experiment 'new_experiment'
Logged run under experiment 'new_experiment'
Logged run under experiment 'new_experiment'
그렇기때문에 /mlruns/2 안에 3번의 run이 저장돼있다.