forked from mlflow/mlflow-export-import
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Export_Experiment.py
153 lines (105 loc) · 3.97 KB
/
Export_Experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Databricks notebook source
# MAGIC %md ### Export Experiment
# MAGIC
# MAGIC #### Overview
# MAGIC * Exports an experiment and its runs (artifacts too) to a DBFS directory.
# MAGIC * Output file `experiment.json` contains top-level experiment metadata.
# MAGIC * Each run and its artifacts are stored as a sub-directory whose name is that of the run_id.
# MAGIC * Notebooks also can be exported in several formats.
# MAGIC
# MAGIC ##### Output folder
# MAGIC ```
# MAGIC
# MAGIC +-experiment.json
# MAGIC +-d2309e6c74dc4679b576a37abf6b6af8/
# MAGIC | +-run.json
# MAGIC | +-artifacts/
# MAGIC | +-plot.png
# MAGIC | +-sklearn-model/
# MAGIC | | +-model.pkl
# MAGIC | | +-conda.yaml
# MAGIC | | +-MLmodel
# MAGIC ```
# MAGIC
# MAGIC ##### Widgets
# MAGIC * Experiment ID or name - Either the experiment ID or experiment name.
# MAGIC * Output base directory - Base output folder of the exported experiment. All the experiment data will be saved here under the experiment ID folder.
# MAGIC * Notebook formats:
# MAGIC * Standard Databricks notebook formats such as SOURCE, HTML, JUPYTER, DBC. See [Databricks Export Format](https://docs.databricks.com/dev-tools/api/latest/workspace.html#notebookexportformat) documentation.
# MAGIC
# MAGIC #### Setup
# MAGIC * See Setup in [README]($./_README).
# COMMAND ----------
# MAGIC %md ### Setup
# COMMAND ----------
# MAGIC %run ./Common
# COMMAND ----------
dbutils.widgets.text("1. Experiment ID or Name", "")
experiment_id_or_name = dbutils.widgets.get("1. Experiment ID or Name")
dbutils.widgets.text("2. Output base directory", "")
output_dir = dbutils.widgets.get("2. Output base directory")
notebook_formats = get_notebook_formats(3)
print("experiment_id_or_name:", experiment_id_or_name)
print("output_dir:", output_dir)
print("notebook_formats:", notebook_formats)
# COMMAND ----------
assert_widget(experiment_id_or_name, "1. Experiment ID or Name")
assert_widget(output_dir, "2. Output base directory")
import mlflow
from mlflow_export_import.common import mlflow_utils
client = mlflow.tracking.MlflowClient()
experiment = mlflow_utils.get_experiment(client, experiment_id_or_name)
output_dir = f"{output_dir}/{experiment.experiment_id}"
print("experiment_id:",experiment.experiment_id)
print("experiment_name:",experiment.name)
print("output_dir:",output_dir)
# COMMAND ----------
# MAGIC %md ### Display MLflow UI URI of Experiment
# COMMAND ----------
display_experiment_uri(experiment.name)
# COMMAND ----------
# MAGIC %md ### Remove any previous exported experiment data
# MAGIC
# MAGIC Note: may be a bit finicky (S3 eventual consistency). Just try the remove again if subsequent export fails.
# COMMAND ----------
dbutils.fs.rm(output_dir, True)
dbutils.fs.rm(output_dir, False)
# COMMAND ----------
# MAGIC %md ### Export the experiment
# COMMAND ----------
# %sh ls -l /dbfs/mnt/andre-work/exim/experiments
# 12927081/12927081/b29df707abcc4d21bf7b3d5c182a12a2
# COMMAND ----------
from mlflow_export_import.experiment.export_experiment import ExperimentExporter
exporter = ExperimentExporter(
client,
notebook_formats = notebook_formats)
exporter.export_experiment(
exp_id_or_name = experiment.experiment_id,
output_dir = output_dir)
# COMMAND ----------
# MAGIC %md ### Display exported experiment files
# COMMAND ----------
import os
output_dir = output_dir.replace("dbfs:","/dbfs")
os.environ['OUTPUT_DIR'] = output_dir
output_dir
# COMMAND ----------
# MAGIC %sh echo $OUTPUT_DIR
# COMMAND ----------
# MAGIC %sh ls -lR $OUTPUT_DIR
# COMMAND ----------
# MAGIC %sh cat $OUTPUT_DIR/manifest.json
# COMMAND ----------
# MAGIC %md #### List run information
# COMMAND ----------
find_run_dir(output_dir, "RUN_DIR", "manifest.json")
# COMMAND ----------
import glob
files = [f for f in glob.glob(f"{output_dir}/*") if not f.endswith("manifest.json")]
os.environ['RUN_DIR'] = files[0]
files[0]
# COMMAND ----------
# MAGIC %sh ls -lR $RUN_DIR
# COMMAND ----------
# MAGIC %sh cat $RUN_DIR/run.json