Aller au contenu

Démarrer avec MLflow

Packages

import mlflow

Paths & Config

MLFLOW_HOST = "127.0.0.1"
MLFLOW_PORT = "8080"
MLFLOW_URI = f"http://{MLFLOW_HOST}:{MLFLOW_PORT}"
EXPERIMENT_NAME = "Get Started with MLflow"
DATA_URL = "https://raw.githubusercontent.com/joekakone/datasets/master/datasets/ml-challenges/diabetes.csv"

mlflow.set_tracking_uri(uri=MLFLOW_URI)

mlflow.create_experiment(EXPERIMENT_NAME)

mlflow.set_experiment(EXPERIMENT_NAME)
Output
<Experiment: artifact_location='mlflow-artifacts:/934956080996859228', creation_time=1707548279279, experiment_id='934956080996859228', last_update_time=1707548279279, lifecycle_stage='active', name='Get Started with MLflow', tags={}>

dataset = pd.read_csv(DATA_URL)
dataset.head()
X = dataset.drop("Outcome", axis=1)
y = dataset["Outcome"]

Split Train/Test

X_train, X_test, y_train, y_test = train_test_split(X, y)

Train model

# Hypterparameters
params = dict(n_estimators=100, max_depth=6, max_features=3)

# Create model
rf = RandomForestClassifier(**params)

# Train
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
score = rf.score(X_test, y_test)
print(score)
Output
0.765625

with mlflow.start_run():
    # Tags
    mlflow.set_tag("Training Info", "Basic LR model for diabete data")

    # Dataset
    mlflow.log_input(mlflow.data.from_pandas(dataset, source=DATA_URL), context='training')

    # Hyperparameters
    mlflow.log_params(params)

    # Infer the model signature
    signature = infer_signature(X_train, rf.predict(X_train))

    # Score
    mlflow.log_metric("accuracy", score)

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=rf,
        artifact_path="diabete_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="diabete-classifier",
    )
import mlflow
logged_model = 'runs:/38ad9aee189343cdadfcc79ec35ecc55/diabete_model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(pd.DataFrame(X_test))