Démarrer avec MLflow
Packages
Paths & Config
MLFLOW_HOST = "127.0.0.1"
MLFLOW_PORT = "8080"
MLFLOW_URI = f"http://{MLFLOW_HOST}:{MLFLOW_PORT}"
EXPERIMENT_NAME = "Get Started with MLflow"
DATA_URL = "https://raw.githubusercontent.com/joekakone/datasets/master/datasets/ml-challenges/diabetes.csv"
mlflow.set_tracking_uri(uri=MLFLOW_URI)
mlflow.create_experiment(EXPERIMENT_NAME)
mlflow.set_experiment(EXPERIMENT_NAME)
Output
<Experiment: artifact_location='mlflow-artifacts:/934956080996859228', creation_time=1707548279279, experiment_id='934956080996859228', last_update_time=1707548279279, lifecycle_stage='active', name='Get Started with MLflow', tags={}>
Split Train/Test
Train model
# Hypterparameters
params = dict(n_estimators=100, max_depth=6, max_features=3)
# Create model
rf = RandomForestClassifier(**params)
# Train
rf.fit(X_train, y_train)
with mlflow.start_run():
# Tags
mlflow.set_tag("Training Info", "Basic LR model for diabete data")
# Dataset
mlflow.log_input(mlflow.data.from_pandas(dataset, source=DATA_URL), context='training')
# Hyperparameters
mlflow.log_params(params)
# Infer the model signature
signature = infer_signature(X_train, rf.predict(X_train))
# Score
mlflow.log_metric("accuracy", score)
# Log the model
model_info = mlflow.sklearn.log_model(
sk_model=rf,
artifact_path="diabete_model",
signature=signature,
input_example=X_train,
registered_model_name="diabete-classifier",
)