diff --git a/.gitignore b/.gitignore
index 69de92e5..74712a80 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,17 +1,17 @@
-# folders
+# Environments
+.venv
 
-## IDEs
+# IDEs
 .idea
+.vscode
 
-## Python
-venv*
-__pycache__
-.ipynb_checkpoints
-
-## Project
-data
+# Mac OS
+.DS_Store
 
-# files
+# Python
+__pycache__
 
-## Mac OS
-.DS_Store
+# Project
+data/*
+models/*
+reports/*
diff --git a/README.md b/README.md
index cff8466b..e9f6e7a8 100644
--- a/README.md
+++ b/README.md
@@ -1,42 +1,17 @@
-# Tutorial: dvc-3-automate-experiments
+# Tutorial: Automate DVC experiments
 
-## 1. clone this repository
+## 1. Create and activate virtual environment
 
-```bash
-git clone https://gitlab.com/7labs.ru/tutorials-dvc/dvc-3-automate-experiments.git
-cd dvc-3-automate-experiments
-```
-
-## 2. Create and activate virtual environment
-
-Install virtualenv in advance: 
+Create virtual environment
 
 ```bash
-pip install virtualenv
+python3 -m venv .venv
+echo "export PYTHONPATH=$PWD" >> .venv/bin/activate
+source .venv/bin/activate
 ```
 
-Create virtual environment 
-```bash
-virtualenv venv-dvc-3-automate-experiments
-source venv-dvc-3-automate-experiments/bin/activate
-```
-
-## 3. Install python libraries (including dvc)
+## 2. Install python libraries
 
 ```bash
 pip install -r requirements.txt
 ```
-
-    
-## 4. Add Virtual Environment to Jupyter Notebook
-
-```bash
-python -m ipykernel install --user --name=venv-dvc-3-automate-experiments
-``` 
-
-## 5. Run and follow Jupyter Notebook `dvc-3-automate-experiments.ipynb` for instructions:
-
-```bash
-jupyter notebook
-```
-
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 00000000..b722e9e1
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1 @@
+!.gitignore
\ No newline at end of file
diff --git a/dvc-3-automate-experiments.ipynb b/dvc-3-automate-experiments.ipynb
deleted file mode 100644
index 74371ddd..00000000
--- a/dvc-3-automate-experiments.ipynb
+++ /dev/null
@@ -1,2972 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Install and init DVC\n",
-    "\n",
-    "Prerequisites: \n",
-    "-  DVC and requirements.txt packages installed (if not - check README.md file for instructions)\n",
-    "-  A project repository is a Git repo \n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Install with pip"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-01T07:32:18.843826Z",
-     "start_time": "2020-07-01T07:32:16.105734Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Collecting dvc==1.5.0\n",
-      "  Using cached dvc-1.5.0-py2.py3-none-any.whl (445 kB)\n",
-      "Collecting ruamel.yaml>=0.16.1\n",
-      "  Using cached ruamel.yaml-0.16.10-py2.py3-none-any.whl (111 kB)\n",
-      "Collecting shortuuid>=0.5.0\n",
-      "  Using cached shortuuid-1.0.1-py3-none-any.whl (7.5 kB)\n",
-      "Collecting shtab<2,>=1.3.0\n",
-      "  Using cached shtab-1.3.1-py2.py3-none-any.whl (12 kB)\n",
-      "Collecting pydot>=1.2.4\n",
-      "  Using cached pydot-1.4.1-py2.py3-none-any.whl (19 kB)\n",
-      "Collecting rich>=3.0.5\n",
-      "  Using cached rich-5.2.0-py3-none-any.whl (145 kB)\n",
-      "Collecting tabulate>=0.8.7\n",
-      "  Using cached tabulate-0.8.7-py3-none-any.whl (24 kB)\n",
-      "Processing /home/alex/.cache/pip/wheels/3c/33/97/805b282e129f60bb4e87cea622338f30b65f21eaf65219971f/funcy-1.14-py2.py3-none-any.whl\n",
-      "Processing /home/alex/.cache/pip/wheels/49/68/a0/8e7cb7bbf4990fc10b5a082aa0eb3ac66787ca11e8eca445b2/flufl.lock-3.2-py3-none-any.whl\n",
-      "Collecting pyasn1>=0.4.1\n",
-      "  Using cached pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)\n",
-      "Collecting appdirs>=1.4.3\n",
-      "  Using cached appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)\n",
-      "Requirement already satisfied: setuptools>=34.0.0 in ./venv-dvc-3-automate-experiments/lib/python3.7/site-packages (from dvc==1.5.0) (47.1.1)\n",
-      "Collecting tqdm<5,>=4.45.0\n",
-      "  Using cached tqdm-4.48.2-py2.py3-none-any.whl (68 kB)\n",
-      "Processing /home/alex/.cache/pip/wheels/bc/f8/ae/bc69cb5f61393ebf9ade4cde41d1a813d35bfe78263a26f99e/dpath-2.0.1-py3-none-any.whl\n",
-      "Collecting grandalf==0.6\n",
-      "  Using cached grandalf-0.6-py3-none-any.whl (31 kB)\n",
-      "Processing /home/alex/.cache/pip/wheels/b8/92/aa/456d462c908b4e210c3928f778d28f94049fc9e47af8b191c9/nanotime-0.5.2-py3-none-any.whl\n",
-      "Collecting flatten-json<0.1.8,>=0.1.6\n",
-      "  Using cached flatten_json-0.1.7-py3-none-any.whl (6.4 kB)\n",
-      "Processing /home/alex/.cache/pip/wheels/ce/22/5c/bcd55db68399954d13c8d3b23192a517dd59ba3ee8648fa773/pygtrie-2.3.2-py3-none-any.whl\n",
-      "Requirement already satisfied: packaging>=19.0 in ./venv-dvc-3-automate-experiments/lib/python3.7/site-packages (from dvc==1.5.0) (20.4)\n",
-      "Requirement already satisfied: PyYAML<5.4,>=5.1.2 in ./venv-dvc-3-automate-experiments/lib/python3.7/site-packages (from dvc==1.5.0) (5.3)\n",
-      "Processing /home/alex/.cache/pip/wheels/17/a2/0a/00fa5a0d6f271c82fc59be9ae47173bb6e6a462d4361224072/jsonpath_ng-1.5.1-py3-none-any.whl\n",
-      "Collecting colorama>=0.3.9\n",
-      "  Using cached colorama-0.4.3-py2.py3-none-any.whl (15 kB)\n",
-      "Collecting toml>=0.10.1\n",
-      "  Using cached toml-0.10.1-py2.py3-none-any.whl (19 kB)\n",
-      "Collecting pathspec>=0.6.0\n",
-      "  Using cached pathspec-0.8.0-py2.py3-none-any.whl (28 kB)\n",
-      "Collecting gitpython>3\n",
-      "  Using cached GitPython-3.1.7-py3-none-any.whl (158 kB)\n",
-      "Collecting networkx<2.5,>=2.1\n",
-      "  Using cached networkx-2.4-py3-none-any.whl (1.6 MB)\n",
-      "Collecting ply>=3.9\n",
-      "  Using cached ply-3.11-py2.py3-none-any.whl (49 kB)\n",
-      "Processing /home/alex/.cache/pip/wheels/0d/c4/19/13d74440f2a571841db6b6e0a273694327498884dafb9cf978/configobj-5.0.6-py3-none-any.whl\n",
-      "Collecting distro>=1.3.0\n",
-      "  Using cached distro-1.5.0-py2.py3-none-any.whl (18 kB)\n",
-      "Collecting requests>=2.22.0\n",
-      "  Using cached requests-2.24.0-py2.py3-none-any.whl (61 kB)\n",
-      "Processing /home/alex/.cache/pip/wheels/af/ee/20/047a79ba5ff692baa2f7e2e95c0cd57061a1673d59f5acf0d5/voluptuous-0.11.7-py3-none-any.whl\n",
-      "Collecting zc.lockfile>=1.2.1\n",
-      "  Using cached zc.lockfile-2.0-py2.py3-none-any.whl (9.7 kB)\n",
-      "Collecting ruamel.yaml.clib>=0.1.2; platform_python_implementation == \"CPython\" and python_version < \"3.9\"\n",
-      "  Using cached ruamel.yaml.clib-0.2.0-cp37-cp37m-manylinux1_x86_64.whl (547 kB)\n",
-      "Requirement already satisfied: pyparsing>=2.1.4 in ./venv-dvc-3-automate-experiments/lib/python3.7/site-packages (from pydot>=1.2.4->dvc==1.5.0) (2.4.7)\n",
-      "Collecting typing-extensions<4.0.0,>=3.7.4\n",
-      "  Using cached typing_extensions-3.7.4.2-py3-none-any.whl (22 kB)\n",
-      "Collecting commonmark<0.10.0,>=0.9.0\n",
-      "  Using cached commonmark-0.9.1-py2.py3-none-any.whl (51 kB)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.6.0 in ./venv-dvc-3-automate-experiments/lib/python3.7/site-packages (from rich>=3.0.5->dvc==1.5.0) (2.6.1)\n",
-      "Processing /home/alex/.cache/pip/wheels/3e/5d/46/fa3cbde0ab8c53dbdd14658b3a4c97035b8851369ce8e79649/atpublic-2.0-py3-none-any.whl\n",
-      "Processing /home/alex/.cache/pip/wheels/8b/99/a0/81daf51dcd359a9377b110a8a886b3895921802d2fc1b2397e/future-0.18.2-cp37-none-any.whl\n",
-      "Requirement already satisfied: six in ./venv-dvc-3-automate-experiments/lib/python3.7/site-packages (from packaging>=19.0->dvc==1.5.0) (1.15.0)\n",
-      "Requirement already satisfied: decorator in ./venv-dvc-3-automate-experiments/lib/python3.7/site-packages (from jsonpath-ng>=1.5.1->dvc==1.5.0) (4.4.2)\n",
-      "Collecting gitdb<5,>=4.0.1\n",
-      "  Using cached gitdb-4.0.5-py3-none-any.whl (63 kB)\n",
-      "Collecting certifi>=2017.4.17\n",
-      "  Using cached certifi-2020.6.20-py2.py3-none-any.whl (156 kB)\n",
-      "Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n",
-      "  Using cached urllib3-1.25.10-py2.py3-none-any.whl (127 kB)\n",
-      "Collecting chardet<4,>=3.0.2\n",
-      "  Using cached chardet-3.0.4-py2.py3-none-any.whl (133 kB)\n",
-      "Collecting idna<3,>=2.5\n",
-      "  Using cached idna-2.10-py2.py3-none-any.whl (58 kB)\n",
-      "Collecting smmap<4,>=3.0.1\n",
-      "  Using cached smmap-3.0.4-py2.py3-none-any.whl (25 kB)\n",
-      "Installing collected packages: ruamel.yaml.clib, ruamel.yaml, shortuuid, shtab, pydot, colorama, typing-extensions, commonmark, rich, tabulate, funcy, atpublic, flufl.lock, pyasn1, appdirs, tqdm, dpath, future, grandalf, nanotime, flatten-json, pygtrie, ply, jsonpath-ng, toml, pathspec, smmap, gitdb, gitpython, networkx, configobj, distro, certifi, urllib3, chardet, idna, requests, voluptuous, zc.lockfile, dvc\n",
-      "  Attempting uninstall: tqdm\n",
-      "    Found existing installation: tqdm 4.42.0\n",
-      "    Uninstalling tqdm-4.42.0:\n",
-      "      Successfully uninstalled tqdm-4.42.0\n",
-      "Successfully installed appdirs-1.4.4 atpublic-2.0 certifi-2020.6.20 chardet-3.0.4 colorama-0.4.3 commonmark-0.9.1 configobj-5.0.6 distro-1.5.0 dpath-2.0.1 dvc-1.5.0 flatten-json-0.1.7 flufl.lock-3.2 funcy-1.14 future-0.18.2 gitdb-4.0.5 gitpython-3.1.7 grandalf-0.6 idna-2.10 jsonpath-ng-1.5.1 nanotime-0.5.2 networkx-2.4 pathspec-0.8.0 ply-3.11 pyasn1-0.4.8 pydot-1.4.1 pygtrie-2.3.2 requests-2.24.0 rich-5.2.0 ruamel.yaml-0.16.10 ruamel.yaml.clib-0.2.0 shortuuid-1.0.1 shtab-1.3.1 smmap-3.0.4 tabulate-0.8.7 toml-0.10.1 tqdm-4.48.2 typing-extensions-3.7.4.2 urllib3-1.25.10 voluptuous-0.11.7 zc.lockfile-2.0\n",
-      "\u001b[33mWARNING: You are using pip version 20.1.1; however, version 20.2.2 is available.\n",
-      "You should consider upgrading via the '/home/alex/Dev/Projects/tutorials/tutorials-dvc/dvc-3-automate-experiments/venv-dvc-3-automate-experiments/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install dvc==1.5.0"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Checkout branch `tutorial`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-01T07:32:19.401395Z",
-     "start_time": "2020-07-01T07:32:19.271265Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Switched to a new branch 'dvc-tutorial'\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git checkout -b dvc-tutorial"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-06-08T11:18:29.199273Z",
-     "start_time": "2019-06-08T11:18:29.196865Z"
-    }
-   },
-   "source": [
-    "## Initialize DVC\n",
-    "\n",
-    "References: \n",
-    "- https://dvc.org/doc/get-started/initialize "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-01T07:32:22.463407Z",
-     "start_time": "2020-07-01T07:32:21.450728Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "You can now commit the changes to git.\n",
-      "\n",
-      "\u001b[31m+---------------------------------------------------------------------+\n",
-      "\u001b[39m\u001b[31m|\u001b[39m                                                                     \u001b[31m|\u001b[39m\n",
-      "\u001b[31m|\u001b[39m        DVC has enabled anonymous aggregate usage analytics.         \u001b[31m|\u001b[39m\n",
-      "\u001b[31m|\u001b[39m     Read the analytics documentation (and how to opt-out) here:     \u001b[31m|\u001b[39m\n",
-      "\u001b[31m|\u001b[39m              \u001b[34mhttps://dvc.org/doc/user-guide/analytics\u001b[39m               \u001b[31m|\u001b[39m\n",
-      "\u001b[31m|\u001b[39m                                                                     \u001b[31m|\u001b[39m\n",
-      "\u001b[31m+---------------------------------------------------------------------+\n",
-      "\u001b[39m\n",
-      "\u001b[33mWhat's next?\u001b[39m\n",
-      "\u001b[33m------------\u001b[39m\n",
-      "- Check out the documentation: \u001b[34mhttps://dvc.org/doc\u001b[39m\n",
-      "- Get help and share ideas: \u001b[34mhttps://dvc.org/chat\u001b[39m\n",
-      "- Star us on GitHub: \u001b[34mhttps://github.com/iterative/dvc\u001b[39m\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc init"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Commit changes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-01T07:32:26.446894Z",
-     "start_time": "2020-07-01T07:32:26.392814Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[dvc-tutorial f285905] Initialize DVC\n",
-      " 6 files changed, 128 insertions(+)\n",
-      " create mode 100644 .dvc/.gitignore\n",
-      " create mode 100644 .dvc/config\n",
-      " create mode 100644 .dvc/plots/confusion.json\n",
-      " create mode 100644 .dvc/plots/default.json\n",
-      " create mode 100644 .dvc/plots/scatter.json\n",
-      " create mode 100644 .dvc/plots/smooth.json\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "\n",
-    "git add .\n",
-    "git commit -m \"Initialize DVC\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Build automated pipelines"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create `data_load` stage\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 94,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:35.023136Z",
-     "start_time": "2020-07-03T19:30:34.904974Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "!mkdir -p data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 95,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:37.406056Z",
-     "start_time": "2020-07-03T19:30:35.351794Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Restored stage 'data_load' from run-cache                                       \n",
-      "Skipping run, checking out outputs\n",
-      "Creating 'dvc.yaml'\n",
-      "Adding stage 'data_load' in 'dvc.yaml'\n",
-      "Generating lock file 'dvc.lock'\n",
-      "\n",
-      "To track the changes with git, run:\n",
-      "\n",
-      "\tgit add dvc.lock dvc.yaml .dvc/.gitignore\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc run -n data_load \\\n",
-    "    -d src/data_load.py \\\n",
-    "    -o data/iris.csv \\\n",
-    "    -o data/classes.json \\\n",
-    "    -p data_load \\\n",
-    "    python src/data_load.py \\\n",
-    "        --config=params.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 96,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:37.455211Z",
-     "start_time": "2020-07-03T19:30:37.433214Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "4.0K\tdata/classes.json\n",
-      "4.0K\tdata/cm.csv\n",
-      "4.0K\tdata/iris.csv\n",
-      "8.0K\tdata/iris_featurized.csv\n",
-      "4.0K\tdata/metrics.json\n",
-      "8.0K\tdata/model.joblib\n",
-      "4.0K\tdata/test.csv\n",
-      "8.0K\tdata/train.csv\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "\n",
-    "du -sh data/*"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 97,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:37.604922Z",
-     "start_time": "2020-07-03T19:30:37.479654Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[01;34m.\u001b[00m\r\n",
-      "├── README.md\r\n",
-      "├── \u001b[01;34mdata\u001b[00m\r\n",
-      "│   ├── classes.json\r\n",
-      "│   ├── cm.csv\r\n",
-      "│   ├── iris.csv\r\n",
-      "│   ├── iris_featurized.csv\r\n",
-      "│   ├── metrics.json\r\n",
-      "│   ├── model.joblib\r\n",
-      "│   ├── test.csv\r\n",
-      "│   └── train.csv\r\n",
-      "├── dvc-3-automate-experiments.ipynb\r\n",
-      "├── dvc.lock\r\n",
-      "├── dvc.yaml\r\n",
-      "├── params.yaml\r\n",
-      "├── requirements.txt\r\n",
-      "└── \u001b[01;34msrc\u001b[00m\r\n",
-      "    ├── __init__.py\r\n",
-      "    ├── data_load.py\r\n",
-      "    ├── evaluate.py\r\n",
-      "    ├── featurization.py\r\n",
-      "    ├── split_dataset.py\r\n",
-      "    └── train.py\r\n",
-      "\r\n",
-      "2 directories, 20 files\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!tree -I venv-dvc-3-automate-experiments"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## dvc.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 98,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:37.727096Z",
-     "start_time": "2020-07-03T19:30:37.609182Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "stages:\r\n",
-      "  data_load:\r\n",
-      "    cmd: python src/data_load.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - src/data_load.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    outs:\r\n",
-      "    - data/classes.json\r\n",
-      "    - data/iris.csv\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!cat dvc.yaml"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## params.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 99,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:37.877998Z",
-     "start_time": "2020-07-03T19:30:37.755666Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "data_load:\r\n",
-      "  raw_data_path: data/iris.csv\r\n",
-      "  classes_names_path: data/classes.json\r\n",
-      "\r\n",
-      "featurize:\r\n",
-      "  features_path: data/iris_featurized.csv\r\n",
-      "  target_column: target\r\n",
-      "\r\n",
-      "\r\n",
-      "data_split:\r\n",
-      "  test_size: 0.2\r\n",
-      "  train_path: data/train.csv\r\n",
-      "  test_path: data/test.csv\r\n",
-      "\r\n",
-      "\r\n",
-      "train:\r\n",
-      "  model_path: data/model.joblib\r\n",
-      "\r\n",
-      "\r\n",
-      "evaluate:\r\n",
-      "  metrics_file: data/metrics.json\r\n",
-      "  confusion_matrix: data/cm.csv\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!cat params.yaml"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Reproduce a pipeline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 100,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:39.781553Z",
-     "start_time": "2020-07-03T19:30:37.923002Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stage 'data_load' didn't change, skipping                                       \n",
-      "Data and pipelines are up to date.\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc repro"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Change params.yaml and reproduce \n",
-    "\n",
-    "Add a new line into `data_load` section:\n",
-    "    `dummy_param: dummy_value`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 101,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:41.698409Z",
-     "start_time": "2020-07-03T19:30:39.807607Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stage 'data_load' didn't change, skipping                                       \n",
-      "Data and pipelines are up to date.\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc repro"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Build end-to-end Machine Learning pipeline\n",
-    "Stages \n",
-    "- extract features \n",
-    "- split dataset \n",
-    "- train \n",
-    "- evaluate \n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Add feature extraction stage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 103,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:45.387596Z",
-     "start_time": "2020-07-03T19:30:43.388868Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Restored stage 'feature_extraction' from run-cache                              \n",
-      "Skipping run, checking out outputs\n",
-      "Adding stage 'feature_extraction' in 'dvc.yaml'\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "To track the changes with git, run:\n",
-      "\n",
-      "\tgit add dvc.lock dvc.yaml\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc run -n feature_extraction \\\n",
-    "    -d src/featurization.py \\\n",
-    "    -d data/iris.csv \\\n",
-    "    -o data/iris_featurized.csv \\\n",
-    "    -p data_load,featurize \\\n",
-    "    python src/featurization.py \\\n",
-    "        --config=params.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 104,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:45.561869Z",
-     "start_time": "2020-07-03T19:30:45.439521Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "README.md                        params.yaml\r\n",
-      "\u001b[1m\u001b[36mdata\u001b[m\u001b[m                             requirements.txt\r\n",
-      "dvc-3-automate-experiments.ipynb \u001b[1m\u001b[36msrc\u001b[m\u001b[m\r\n",
-      "dvc.lock                         \u001b[1m\u001b[36mvenv-dvc-3-automate-experiments\u001b[m\u001b[m\r\n",
-      "dvc.yaml\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!ls "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 105,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:45.706627Z",
-     "start_time": "2020-07-03T19:30:45.585641Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "stages:\r\n",
-      "  data_load:\r\n",
-      "    cmd: python src/data_load.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - src/data_load.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    outs:\r\n",
-      "    - data/classes.json\r\n",
-      "    - data/iris.csv\r\n",
-      "  feature_extraction:\r\n",
-      "    cmd: python src/featurization.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/iris.csv\r\n",
-      "    - src/featurization.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    - featurize\r\n",
-      "    outs:\r\n",
-      "    - data/iris_featurized.csv\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!cat dvc.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 106,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:45.745702Z",
-     "start_time": "2020-07-03T19:30:45.734321Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>sepal_length</th>\n",
-       "      <th>sepal_width</th>\n",
-       "      <th>petal_length</th>\n",
-       "      <th>petal_width</th>\n",
-       "      <th>target</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>5.1</td>\n",
-       "      <td>3.5</td>\n",
-       "      <td>1.4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4.9</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>1.4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>4.7</td>\n",
-       "      <td>3.2</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4.6</td>\n",
-       "      <td>3.1</td>\n",
-       "      <td>1.5</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5.0</td>\n",
-       "      <td>3.6</td>\n",
-       "      <td>1.4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   sepal_length  sepal_width  petal_length  petal_width  target\n",
-       "0           5.1          3.5           1.4          0.2       0\n",
-       "1           4.9          3.0           1.4          0.2       0\n",
-       "2           4.7          3.2           1.3          0.2       0\n",
-       "3           4.6          3.1           1.5          0.2       0\n",
-       "4           5.0          3.6           1.4          0.2       0"
-      ]
-     },
-     "execution_count": 106,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "features = pd.read_csv('data/iris_featurized.csv')\n",
-    "features.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 107,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:45.893549Z",
-     "start_time": "2020-07-03T19:30:45.763986Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[31m??\u001b[m .dvc/\r\n",
-      "\u001b[31m??\u001b[m dvc.lock\r\n",
-      "\u001b[31m??\u001b[m dvc.yaml\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git status -s"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 108,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:45.961182Z",
-     "start_time": "2020-07-03T19:30:45.916816Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[dev 0ae7569] Add stage features_extraction\n",
-      " 3 files changed, 56 insertions(+)\n",
-      " create mode 100644 .dvc/.gitignore\n",
-      " create mode 100644 dvc.lock\n",
-      " create mode 100644 dvc.yaml\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "git add .\n",
-    "git commit -m \"Add stage features_extraction\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Add split train/test stage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 109,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:48.044867Z",
-     "start_time": "2020-07-03T19:30:45.984594Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Restored stage 'split_dataset' from run-cache                                   \n",
-      "Skipping run, checking out outputs\n",
-      "Adding stage 'split_dataset' in 'dvc.yaml'\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "To track the changes with git, run:\n",
-      "\n",
-      "\tgit add dvc.lock dvc.yaml\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc run -n split_dataset \\\n",
-    "    -d src/split_dataset.py \\\n",
-    "    -d data/iris_featurized.csv \\\n",
-    "    -o data/train.csv \\\n",
-    "    -o data/test.csv \\\n",
-    "    -p featurize,data_split \\\n",
-    "        python src/split_dataset.py \\\n",
-    "            --config=params.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 110,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:48.186864Z",
-     "start_time": "2020-07-03T19:30:48.068177Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "stages:\r\n",
-      "  data_load:\r\n",
-      "    cmd: python src/data_load.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - src/data_load.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    outs:\r\n",
-      "    - data/classes.json\r\n",
-      "    - data/iris.csv\r\n",
-      "  feature_extraction:\r\n",
-      "    cmd: python src/featurization.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/iris.csv\r\n",
-      "    - src/featurization.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    - featurize\r\n",
-      "    outs:\r\n",
-      "    - data/iris_featurized.csv\r\n",
-      "  split_dataset:\r\n",
-      "    cmd: python src/split_dataset.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/iris_featurized.csv\r\n",
-      "    - src/split_dataset.py\r\n",
-      "    params:\r\n",
-      "    - data_split\r\n",
-      "    - featurize\r\n",
-      "    outs:\r\n",
-      "    - data/test.csv\r\n",
-      "    - data/train.csv\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!cat dvc.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 111,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:48.250249Z",
-     "start_time": "2020-07-03T19:30:48.209429Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[dev e39a9d3] Add stage split_dataset\n",
-      " 2 files changed, 32 insertions(+)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "git add .\n",
-    "git commit -m \"Add stage split_dataset\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Add train stage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 112,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:50.298161Z",
-     "start_time": "2020-07-03T19:30:48.275068Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Restored stage 'train' from run-cache                                           \n",
-      "Skipping run, checking out outputs\n",
-      "Adding stage 'train' in 'dvc.yaml'\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "To track the changes with git, run:\n",
-      "\n",
-      "\tgit add dvc.lock dvc.yaml\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc run -n train \\\n",
-    "    -d src/train.py \\\n",
-    "    -d data/train.csv \\\n",
-    "    -o data/model.joblib \\\n",
-    "    -p data_split,train \\\n",
-    "        python src/train.py \\\n",
-    "            --config=params.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 113,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:50.444828Z",
-     "start_time": "2020-07-03T19:30:50.324345Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "stages:\r\n",
-      "  data_load:\r\n",
-      "    cmd: python src/data_load.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - src/data_load.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    outs:\r\n",
-      "    - data/classes.json\r\n",
-      "    - data/iris.csv\r\n",
-      "  feature_extraction:\r\n",
-      "    cmd: python src/featurization.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/iris.csv\r\n",
-      "    - src/featurization.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    - featurize\r\n",
-      "    outs:\r\n",
-      "    - data/iris_featurized.csv\r\n",
-      "  split_dataset:\r\n",
-      "    cmd: python src/split_dataset.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/iris_featurized.csv\r\n",
-      "    - src/split_dataset.py\r\n",
-      "    params:\r\n",
-      "    - data_split\r\n",
-      "    - featurize\r\n",
-      "    outs:\r\n",
-      "    - data/test.csv\r\n",
-      "    - data/train.csv\r\n",
-      "  train:\r\n",
-      "    cmd: python src/train.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/train.csv\r\n",
-      "    - src/train.py\r\n",
-      "    params:\r\n",
-      "    - data_split\r\n",
-      "    - train\r\n",
-      "    outs:\r\n",
-      "    - data/model.joblib\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!cat dvc.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 114,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:50.512656Z",
-     "start_time": "2020-07-03T19:30:50.468759Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[dev d084d1b] Add stage train\n",
-      " 2 files changed, 28 insertions(+)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "git add .\n",
-    "git commit -m \"Add stage train\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Add evaluate stage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 115,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:52.746281Z",
-     "start_time": "2020-07-03T19:30:50.546074Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Restored stage 'evaluate' from run-cache                                        \n",
-      "Skipping run, checking out outputs\n",
-      "Adding stage 'evaluate' in 'dvc.yaml'\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "To track the changes with git, run:\n",
-      "\n",
-      "\tgit add dvc.yaml dvc.lock\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc run -n evaluate \\\n",
-    "    -d src/evaluate.py \\\n",
-    "    -d data/test.csv \\\n",
-    "    -d data/model.joblib \\\n",
-    "    -d data/classes.json \\\n",
-    "    -m data/metrics.json \\\n",
-    "    --plots data/cm.csv \\\n",
-    "    -p data_load,data_split,train,evaluate \\\n",
-    "        python src/evaluate.py \\\n",
-    "            --config=params.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 116,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:52.886914Z",
-     "start_time": "2020-07-03T19:30:52.769527Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "stages:\r\n",
-      "  data_load:\r\n",
-      "    cmd: python src/data_load.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - src/data_load.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    outs:\r\n",
-      "    - data/classes.json\r\n",
-      "    - data/iris.csv\r\n",
-      "  feature_extraction:\r\n",
-      "    cmd: python src/featurization.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/iris.csv\r\n",
-      "    - src/featurization.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    - featurize\r\n",
-      "    outs:\r\n",
-      "    - data/iris_featurized.csv\r\n",
-      "  split_dataset:\r\n",
-      "    cmd: python src/split_dataset.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/iris_featurized.csv\r\n",
-      "    - src/split_dataset.py\r\n",
-      "    params:\r\n",
-      "    - data_split\r\n",
-      "    - featurize\r\n",
-      "    outs:\r\n",
-      "    - data/test.csv\r\n",
-      "    - data/train.csv\r\n",
-      "  train:\r\n",
-      "    cmd: python src/train.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/train.csv\r\n",
-      "    - src/train.py\r\n",
-      "    params:\r\n",
-      "    - data_split\r\n",
-      "    - train\r\n",
-      "    outs:\r\n",
-      "    - data/model.joblib\r\n",
-      "  evaluate:\r\n",
-      "    cmd: python src/evaluate.py --config=params.yaml\r\n",
-      "    deps:\r\n",
-      "    - data/classes.json\r\n",
-      "    - data/model.joblib\r\n",
-      "    - data/test.csv\r",
-      "\r\n",
-      "    - src/evaluate.py\r\n",
-      "    params:\r\n",
-      "    - data_load\r\n",
-      "    - data_split\r\n",
-      "    - evaluate\r\n",
-      "    - train\r\n",
-      "    metrics:\r\n",
-      "    - data/metrics.json\r\n",
-      "    plots:\r\n",
-      "    - data/cm.csv\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!cat dvc.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 117,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:30:52.971253Z",
-     "start_time": "2020-07-03T19:30:52.919420Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[dev ecf5bc5] Add stage evaluate\n",
-      " 2 files changed, 46 insertions(+)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "git add .\n",
-    "git commit -m \"Add stage evaluate\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-06-28T17:23:10.812463Z",
-     "start_time": "2020-06-28T17:23:09.886129Z"
-    }
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Experimenting with reproducible pipelines"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## How reproduce experiments?"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "> The most exciting part of DVC is reproducibility.\n",
-    ">> Reproducibility is the time you are getting benefits out of DVC instead of spending time defining the ML pipelines.\n",
-    "\n",
-    "> DVC tracks all the dependencies, which helps you iterate on ML models faster without thinking what was affected by your last change.\n",
-    ">> In order to track all the dependencies, DVC finds and reads ALL the DVC-files in a repository and builds a dependency graph (DAG) based on these files.\n",
-    "\n",
-    "> This is one of the differences between DVC reproducibility and traditional Makefile-like build automation tools (Make, Maven, Ant, Rakefile etc). It was designed in such a way to localize specification of DAG nodes.\n",
-    "If you run repro on any created DVC-file from our repository, nothing happens because nothing was changed in the defined pipeline.\n",
-    "\n",
-    "(c) dvc.org https://dvc.org/doc/tutorial/reproducibility"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 118,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:31:02.889684Z",
-     "start_time": "2020-07-03T19:31:00.936546Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stage 'data_load' didn't change, skipping                                       \n",
-      "Stage 'feature_extraction' didn't change, skipping\n",
-      "Stage 'split_dataset' didn't change, skipping\n",
-      "Stage 'train' didn't change, skipping\n",
-      "Stage 'evaluate' didn't change, skipping\n",
-      "Data and pipelines are up to date.\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# Nothing to reproduce\n",
-    "!dvc repro"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Experiment 1: Add features\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Create new experiment branch\n",
-    "\n",
-    "Before editing the code/featurization.py file, please create and checkout a new branch __ratio_features__"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 119,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:31:05.089755Z",
-     "start_time": "2020-07-03T19:31:04.832150Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Switched to a new branch 'exp1-ratio-features'\n",
-      "  dev\u001b[m\n",
-      "  dvc-tutorial\u001b[m\n",
-      "* \u001b[32mexp1-ratio-features\u001b[m\n",
-      "  master\u001b[m\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create new branch\n",
-    "\n",
-    "!git checkout -b exp1-ratio-features\n",
-    "!git branch"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Update featurization.py"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "in file __featurization.py__  in function`get_features()` after line \n",
-    "\n",
-    "```python\n",
-    "    features = dataset.copy()\n",
-    "```\n",
-    "\n",
-    "add lines:\n",
-    "\n",
-    "```python\n",
-    "    features['sepal_length_to_sepal_width'] = features['sepal_length'] / features['sepal_width']\n",
-    "    features['petal_length_to_petal_width'] = features['petal_length'] / features['petal_width']\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Reproduce pipeline "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 120,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:31:28.674990Z",
-     "start_time": "2020-07-03T19:31:25.527004Z"
-    },
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stage 'data_load' didn't change, skipping                                       \n",
-      "Running stage 'feature_extraction' with command:\n",
-      "\tpython src/featurization.py --config=params.yaml\n",
-      "Updating lock file 'dvc.lock'                                                   \n",
-      "\n",
-      "Restored stage 'split_dataset' from run-cache\n",
-      "Skipping run, checking out outputs\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "Restored stage 'train' from run-cache\n",
-      "Skipping run, checking out outputs\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "Restored stage 'evaluate' from run-cache\n",
-      "Skipping run, checking out outputs\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "To track the changes with git, run:\n",
-      "\n",
-      "\tgit add dvc.lock\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc repro"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 121,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:31:28.713726Z",
-     "start_time": "2020-07-03T19:31:28.699701Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>sepal_length</th>\n",
-       "      <th>sepal_width</th>\n",
-       "      <th>petal_length</th>\n",
-       "      <th>petal_width</th>\n",
-       "      <th>target</th>\n",
-       "      <th>sepal_length_to_sepal_width</th>\n",
-       "      <th>petal_length_to_petal_width</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>5.1</td>\n",
-       "      <td>3.5</td>\n",
-       "      <td>1.4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1.457143</td>\n",
-       "      <td>7.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4.9</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>1.4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1.633333</td>\n",
-       "      <td>7.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>4.7</td>\n",
-       "      <td>3.2</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1.468750</td>\n",
-       "      <td>6.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4.6</td>\n",
-       "      <td>3.1</td>\n",
-       "      <td>1.5</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1.483871</td>\n",
-       "      <td>7.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5.0</td>\n",
-       "      <td>3.6</td>\n",
-       "      <td>1.4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1.388889</td>\n",
-       "      <td>7.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   sepal_length  sepal_width  petal_length  petal_width  target  \\\n",
-       "0           5.1          3.5           1.4          0.2       0   \n",
-       "1           4.9          3.0           1.4          0.2       0   \n",
-       "2           4.7          3.2           1.3          0.2       0   \n",
-       "3           4.6          3.1           1.5          0.2       0   \n",
-       "4           5.0          3.6           1.4          0.2       0   \n",
-       "\n",
-       "   sepal_length_to_sepal_width  petal_length_to_petal_width  \n",
-       "0                     1.457143                          7.0  \n",
-       "1                     1.633333                          7.0  \n",
-       "2                     1.468750                          6.5  \n",
-       "3                     1.483871                          7.5  \n",
-       "4                     1.388889                          7.0  "
-      ]
-     },
-     "execution_count": 121,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Check features used in this pipeline\n",
-    "\n",
-    "import pandas as pd\n",
-    "\n",
-    "features = pd.read_csv('data/iris_featurized.csv')\n",
-    "features.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 122,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:31:28.867945Z",
-     "start_time": "2020-07-03T19:31:28.737094Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "On branch exp1-ratio-features\r\n",
-      "Changes not staged for commit:\r\n",
-      "  (use \"git add <file>...\" to update what will be committed)\r\n",
-      "  (use \"git restore <file>...\" to discard changes in working directory)\r\n",
-      "\t\u001b[31mmodified:   dvc.lock\u001b[m\r\n",
-      "\t\u001b[31mmodified:   src/featurization.py\u001b[m\r\n",
-      "\r\n",
-      "no changes added to commit (use \"git add\" and/or \"git commit -a\")\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git status"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 124,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:31:36.736663Z",
-     "start_time": "2020-07-03T19:31:35.023151Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Path               Metric    Value    Change                                    \n",
-      "data/metrics.json  f1_score  0.15385  0.0\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# Get difference with metric from previous pipeline\n",
-    "!dvc metrics diff --all"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 125,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:31:39.838836Z",
-     "start_time": "2020-07-03T19:31:39.445353Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[exp1-ratio-features 1fc8ec3] Experiment with new features\n",
-      " 3 files changed, 872 insertions(+), 510 deletions(-)\n",
-      "fatal: tag 'exp1_ratio_features' already exists\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git add .\n",
-    "!git commit -m \"Experiment with new features\"\n",
-    "!git tag -a \"exp1_ratio_features\" -m \"Experiment with new features\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Experiment 2: Tune Logistic Regression"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Create a new experiment branch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 127,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:32:43.387938Z",
-     "start_time": "2020-07-03T19:32:43.131917Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Switched to a new branch 'exp2-tuning-logreg'\n",
-      "  dev\u001b[m\n",
-      "  dvc-tutorial\u001b[m\n",
-      "  exp1-ratio-features\u001b[m\n",
-      "* \u001b[32mexp2-tuning-logreg\u001b[m\n",
-      "  master\u001b[m\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create new branch for experiment\n",
-    "\n",
-    "!git checkout -b exp2-tuning-logreg\n",
-    "!git branch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 129,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:32:52.254763Z",
-     "start_time": "2020-07-03T19:32:50.225661Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stage 'data_load' didn't change, skipping                                       \n",
-      "Stage 'feature_extraction' didn't change, skipping\n",
-      "Stage 'split_dataset' didn't change, skipping\n",
-      "Stage 'train' didn't change, skipping\n",
-      "Stage 'evaluate' didn't change, skipping\n",
-      "Data and pipelines are up to date.\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# Nothing to reproduce since code was checked out by `git checkout`\n",
-    "# and data files were checked out by `dvc checkout`\n",
-    "!dvc repro"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Tuning parameters\n",
-    "\n",
-    "in file __train.py__ :\n",
-    "\n",
-    "replace LogisticRegression params with:\n",
-    "\n",
-    "```python\n",
-    "    clf = LogisticRegression(C=0.1, solver='newton-cg', multi_class='multinomial', max_iter=100)\n",
-    "```\n",
-    "__Note__: here we changed logistic regresssion hyperparameters: C  to 0.1\n",
-    "\n",
-    "\n",
-    "https://dvc.org/doc/tutorials/get-started/experiments#tuning-parameters"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Reproduce pipelines"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 130,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:33:22.746410Z",
-     "start_time": "2020-07-03T19:33:19.314933Z"
-    },
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stage 'data_load' didn't change, skipping                                       \n",
-      "Stage 'feature_extraction' didn't change, skipping\n",
-      "Stage 'split_dataset' didn't change, skipping\n",
-      "Running stage 'train' with command:\n",
-      "\tpython src/train.py --config=params.yaml\n",
-      "Updating lock file 'dvc.lock'                                                   \n",
-      "\n",
-      "Restored stage 'evaluate' from run-cache\n",
-      "Skipping run, checking out outputs\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "To track the changes with git, run:\n",
-      "\n",
-      "\tgit add dvc.lock\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# re-run pipeline \n",
-    "\n",
-    "!dvc repro"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 131,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:33:24.945534Z",
-     "start_time": "2020-07-03T19:33:24.825464Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\"f1_score\": 1.0}"
-     ]
-    }
-   ],
-   "source": [
-    "# Get difference with metric from previous pipeline\n",
-    "!cat data/metrics.json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 134,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:34:06.466000Z",
-     "start_time": "2020-07-03T19:34:05.328958Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\tdata/metrics.json:                                                             \n",
-      "\t\tf1_score: 1.0\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc metrics show"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 135,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:34:08.160934Z",
-     "start_time": "2020-07-03T19:34:06.494683Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Path               Metric    Value    Change                                    \n",
-      "data/metrics.json  f1_score  1.0      0.84615\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc metrics diff --all"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Commit"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 137,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:36:15.808072Z",
-     "start_time": "2020-07-03T19:36:15.762972Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "On branch exp2-tuning-logreg\n",
-      "nothing to commit, working tree clean\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "\n",
-    "git add .\n",
-    "git commit -m \"Tune model. LogisticRegression. C=0.1\"\n",
-    "git tag -a \"exp2_tuning_logreg\" -m \"Tune model. LogisticRegression. C=0.1\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Experiment 3: Use SVM"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:14:01.831192Z",
-     "start_time": "2020-07-03T19:14:01.829062Z"
-    }
-   },
-   "source": [
-    "### Create a new experiment branch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 138,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:36:20.443851Z",
-     "start_time": "2020-07-03T19:36:20.187021Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Switched to a new branch 'exp3-svm'\n",
-      "  dev\u001b[m\n",
-      "  dvc-tutorial\u001b[m\n",
-      "  exp1-ratio-features\u001b[m\n",
-      "  exp2-tuning-logreg\u001b[m\n",
-      "* \u001b[32mexp3-svm\u001b[m\n",
-      "  master\u001b[m\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git checkout -b exp3-svm\n",
-    "!git branch"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Update train.py\n",
-    "\n",
-    "in file __train.py__ replace line\n",
-    "\n",
-    "```python\n",
-    "    clf = LogisticRegression(C=0.1, solver='newton-cg', multi_class='multinomial', max_iter=100)\n",
-    "```\n",
-    "\n",
-    "with line\n",
-    "\n",
-    "```python\n",
-    "    clf = SVC(C=0.01, kernel='linear', gamma='scale', degree=5)\n",
-    "```\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Reproduce pipeline "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 139,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:36:35.537208Z",
-     "start_time": "2020-07-03T19:36:32.544097Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stage 'data_load' didn't change, skipping                                       \n",
-      "Stage 'feature_extraction' didn't change, skipping\n",
-      "Stage 'split_dataset' didn't change, skipping\n",
-      "Running stage 'train' with command:\n",
-      "\tpython src/train.py --config=params.yaml\n",
-      "Updating lock file 'dvc.lock'                                                   \n",
-      "\n",
-      "Restored stage 'evaluate' from run-cache\n",
-      "Skipping run, checking out outputs\n",
-      "Updating lock file 'dvc.lock'\n",
-      "\n",
-      "To track the changes with git, run:\n",
-      "\n",
-      "\tgit add dvc.lock\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc repro"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 140,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:36:38.995561Z",
-     "start_time": "2020-07-03T19:36:37.831841Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\tdata/metrics.json:                                                             \n",
-      "\t\tf1_score: 1.0\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc metrics show"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 141,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:36:40.521084Z",
-     "start_time": "2020-07-03T19:36:40.392754Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "On branch exp3-svm\r\n",
-      "Changes not staged for commit:\r\n",
-      "  (use \"git add <file>...\" to update what will be committed)\r\n",
-      "  (use \"git restore <file>...\" to discard changes in working directory)\r\n",
-      "\t\u001b[31mmodified:   dvc.lock\u001b[m\r\n",
-      "\t\u001b[31mmodified:   src/train.py\u001b[m\r\n",
-      "\r\n",
-      "no changes added to commit (use \"git add\" and/or \"git commit -a\")\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git status"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 142,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:36:41.766798Z",
-     "start_time": "2020-07-03T19:36:41.377185Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[exp3-svm 1474ec0] Experiment 3 with SVM estimator\r\n",
-      " 2 files changed, 5 insertions(+), 4 deletions(-)\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git add .\n",
-    "!git commit -m \"Experiment 3 with SVM estimator\"\n",
-    "!git tag -a \"exp3_svm\" -m \"Experiment 3 with SVM estimator\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Merge best experiment `dvc-tutorial ` branch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 153,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:45:17.537969Z",
-     "start_time": "2020-07-03T19:45:17.463715Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Auto-merging src/train.py\n",
-      "CONFLICT (content): Merge conflict in src/train.py\n",
-      "Auto-merging src/featurization.py\n",
-      "CONFLICT (add/add): Merge conflict in dvc.lock\n",
-      "Auto-merging dvc.lock\n",
-      "Auto-merging dvc-3-automate-experiments.ipynb\n",
-      "CONFLICT (content): Merge conflict in dvc-3-automate-experiments.ipynb\n",
-      "Automatic merge failed; fix conflicts and then commit the result.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Previous HEAD position was 1474ec0 Experiment 3 with SVM estimator\n",
-      "Switched to branch 'dvc-tutorial'\n"
-     ]
-    },
-    {
-     "ename": "CalledProcessError",
-     "evalue": "Command 'b'\\ngit checkout dvc-tutorial \\ngit merge exp3_svm\\n'' returned non-zero exit status 1.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mCalledProcessError\u001b[0m                        Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-153-8dc7196dd41b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'bash'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'\\ngit checkout dvc-tutorial \\ngit merge exp3_svm\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m   2350\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2351\u001b[0m                 \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2352\u001b[0;31m                 \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2353\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2354\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/IPython/core/magics/script.py\u001b[0m in \u001b[0;36mnamed_script_magic\u001b[0;34m(line, cell)\u001b[0m\n\u001b[1;32m    140\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    141\u001b[0m                 \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscript\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshebang\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    144\u001b[0m         \u001b[0;31m# write a basic docstring:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m</Users/mnrozhkov/anaconda3/lib/python3.7/site-packages/decorator.py:decorator-gen-110>\u001b[0m in \u001b[0;36mshebang\u001b[0;34m(self, line, cell)\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/IPython/core/magic.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    186\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m         \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/IPython/core/magics/script.py\u001b[0m in \u001b[0;36mshebang\u001b[0;34m(self, line, cell)\u001b[0m\n\u001b[1;32m    243\u001b[0m             \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    244\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_error\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturncode\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 245\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mCalledProcessError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturncode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    247\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_run_script\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mto_close\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mCalledProcessError\u001b[0m: Command 'b'\\ngit checkout dvc-tutorial \\ngit merge exp3_svm\\n'' returned non-zero exit status 1."
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "\n",
-    "git checkout dvc-tutorial \n",
-    "git merge exp3_svm"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Compare experiment"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Compare params "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 147,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:39:20.728429Z",
-     "start_time": "2020-07-03T19:39:19.065249Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[0m                                                                            "
-     ]
-    }
-   ],
-   "source": [
-    "# Get params diffs \n",
-    "\n",
-    "!dvc params diff"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 148,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:39:29.288964Z",
-     "start_time": "2020-07-03T19:39:27.598159Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Path         Param                         Old                       New        \n",
-      "params.yaml  data_load.classes_names_path  data/classes.json         data/classes.json\n",
-      "params.yaml  data_load.raw_data_path       data/iris.csv             data/iris.csv\n",
-      "params.yaml  data_split.test_path          data/test.csv             data/test.csv\n",
-      "params.yaml  data_split.test_size          0.2                       0.2\n",
-      "params.yaml  data_split.train_path         data/train.csv            data/train.csv\n",
-      "params.yaml  evaluate.confusion_matrix     data/cm.csv               data/cm.csv\n",
-      "params.yaml  evaluate.metrics_file         data/metrics.json         data/metrics.json\n",
-      "params.yaml  featurize.features_path       data/iris_featurized.csv  data/iris_featurized.csv\n",
-      "params.yaml  featurize.target_column       target                    target\n",
-      "params.yaml  train.model_path              data/model.joblib         data/model.joblib\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# Compare parameters with a specific commit, a tag or any revision\n",
-    "\n",
-    "!dvc params diff --all"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:09:20.304575Z",
-     "start_time": "2020-07-03T19:09:18.649548Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\"params.yaml\": {\"evaluate.metrics_file\": {\"old\": \"data/metrics.json\", \"new\": \"data/metrics.json\"}, \"featurize.features_path\": {\"old\": \"data/iris_featurized.csv\", \"new\": \"data/iris_featurized.csv\"}, \"data_load.classes_names_path\": {\"old\": \"data/classes.json\", \"new\": \"data/classes.json\"}, \"data_split.test_path\": {\"old\": \"data/test.csv\", \"new\": \"data/test.csv\"}, \"train.model_path\": {\"old\": \"data/model.joblib\", \"new\": \"data/model.joblib\"}, \"featurize.target_column\": {\"old\": \"target\", \"new\": \"target\"}, \"data_load.raw_data_path\": {\"old\": \"data/iris.csv\", \"new\": \"data/iris.csv\"}, \"evaluate.confusion_matrix\": {\"old\": \"data/cm.csv\", \"new\": \"data/cm.csv\"}, \"data_split.test_size\": {\"old\": 0.2, \"new\": 0.2, \"diff\": 0.0}, \"data_split.train_path\": {\"old\": \"data/train.csv\", \"new\": \"data/train.csv\"}}}\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc params diff --show-json --all"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:09:27.495017Z",
-     "start_time": "2020-07-03T19:09:25.848748Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "| Path        | Param                        | Old                      | New                      |\n",
-      "|-------------|------------------------------|--------------------------|--------------------------|\n",
-      "| params.yaml | data_load.classes_names_path | data/classes.json        | data/classes.json        |\n",
-      "| params.yaml | data_load.raw_data_path      | data/iris.csv            | data/iris.csv            |\n",
-      "| params.yaml | data_split.test_path         | data/test.csv            | data/test.csv            |\n",
-      "| params.yaml | data_split.test_size         | 0.2                      | 0.2                      |\n",
-      "| params.yaml | data_split.train_path        | data/train.csv           | data/train.csv           |\n",
-      "| params.yaml | evaluate.confusion_matrix    | data/cm.csv              | data/cm.csv              |\n",
-      "| params.yaml | evaluate.metrics_file        | data/metrics.json        | data/metrics.json        |\n",
-      "| params.yaml | featurize.features_path      | data/iris_featurized.csv | data/iris_featurized.csv |\n",
-      "| params.yaml | featurize.target_column      | target                   | target                   |\n",
-      "| params.yaml | train.model_path             | data/model.joblib        | data/model.joblib        |\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc params diff --show-md --all"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:00:44.847802Z",
-     "start_time": "2020-07-03T19:00:44.717758Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mcommit 736c92a6eeda6261f528d7a2e2d4db4cb306fa03\u001b[m\u001b[33m (\u001b[m\u001b[1;36mHEAD -> \u001b[m\u001b[1;32mexp2-svm\u001b[m\u001b[33m, \u001b[m\u001b[1;33mtag: exp2_svm\u001b[m\u001b[33m)\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Fri Jul 3 21:49:25 2020 +0300\r\n",
-      "\r\n",
-      "    Experiment 2 with SVM estimator\r\n",
-      "\r\n",
-      "\u001b[33mcommit 24f75fdcc9bede20cbecf88697b5d3f8ed56f58c\u001b[m\u001b[33m (\u001b[m\u001b[1;33mtag: exp1_ratio_features\u001b[m\u001b[33m, \u001b[m\u001b[1;32mexp1-ratio-features\u001b[m\u001b[33m)\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Fri Jul 3 21:48:42 2020 +0300\r\n",
-      "\r\n",
-      "    Experiment with new features\r\n",
-      "\r\n",
-      "\u001b[33mcommit 34a0bc667f86c3b5e388bef672eb598b8a6a7788\u001b[m\u001b[33m (\u001b[m\u001b[1;32mdvc-tutorial\u001b[m\u001b[33m)\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Wed Jul 1 10:35:03 2020 +0300\r\n",
-      "\r\n",
-      "    Add stage evaluate\r\n",
-      "\r\n",
-      "\u001b[33mcommit 4c45a4ff702106d78bbaf8d356e0e95ca268e05b\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Wed Jul 1 10:34:09 2020 +0300\r\n",
-      "\r\n",
-      "    Add stage train\r\n",
-      "\r\n",
-      "\u001b[33mcommit f41781d2c4855762c4405636491bc014cc00bd20\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Wed Jul 1 10:34:00 2020 +0300\r\n",
-      "\r\n",
-      "    Add stage split_dataset\r\n",
-      "\r\n",
-      "\u001b[33mcommit dbfc854a931baf57ad116f811c2cea39d4fb69a9\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Wed Jul 1 10:33:51 2020 +0300\r\n",
-      "\r\n",
-      "    Add stage features_extraction\r\n",
-      "\r\n",
-      "\u001b[33mcommit f2859056db4c53e11ba0593388fddd19018d577b\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Wed Jul 1 10:32:26 2020 +0300\r\n",
-      "\r\n",
-      "    Initialize DVC\r\n",
-      "\r\n",
-      "\u001b[33mcommit 1102dc2e3f636b2d37558f95a960c788f3de32ed\u001b[m\u001b[33m (\u001b[m\u001b[1;31morigin/dev\u001b[m\u001b[33m, \u001b[m\u001b[1;32mdev\u001b[m\u001b[33m)\u001b[m\r\n",
-      "Merge: 855c61a 92ac211\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Wed Jul 1 07:22:32 2020 +0000\r\n",
-      "\r\n",
-      "    Merge branch 'update-confusion-matrix' into 'dev'\r\n",
-      "    \r\n",
-      "    update confusion matrix\r\n",
-      "    \r\n",
-      "    See merge request 7labs.ru/tutorials-dvc/dvc-3-automate-experiments!4\r\n",
-      "\r\n",
-      "\u001b[33mcommit 92ac211f2139095965d0e26304d2d39003136def\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Tue Jun 30 13:08:30 2020 +0900\r\n",
-      "\r\n",
-      "    update confusion matrix\r\n",
-      "\r\n",
-      "\u001b[33mcommit 855c61ac3f02f8938445fe749846e20d01e0f247\u001b[m\r\n",
-      "Merge: 22aeb23 7fbf4d8\r\n",
-      "Author: Alexander Kolosov <alexckolosov@gmail.com>\r\n",
-      "Date:   Mon Jun 29 08:47:37 2020 +0000\r\n",
-      "\r\n",
-      "    Merge branch 'dev-update-pipelines' into 'dev'\r\n",
-      "    \r\n",
-      "    Dev update pipelines\r\n",
-      "    \r\n",
-      "    See merge request 7labs.ru/tutorials-dvc/dvc-3-automate-experiments!3\r\n",
-      "\r\n",
-      "\u001b[33mcommit 7fbf4d8f4e54be947f77dce09191b4f6fbb287f0\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Mon Jun 29 08:47:37 2020 +0000\r\n",
-      "\r\n",
-      "    Dev update pipelines\r\n",
-      "\r\n",
-      "\u001b[33mcommit 22aeb23eb6b54f12f11c76a5714dbf6bff5f11f9\u001b[m\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Sun Jun 28 19:02:29 2020 +0300\r\n",
-      "\r\n",
-      "    Update name of tutorial and notebook\r\n",
-      "\r\n",
-      "\u001b[33mcommit 110a584e41fa7c140bbaf8130f70d4112e58d1a4\u001b[m\r\n",
-      "Merge: 2d7e834 a8d3200\r\n",
-      "Author: Mikhail <mnrozhkov@gmail.com>\r\n",
-      "Date:   Sat Jun 27 07:49:11 2020 +0000\r\n",
-      "\r\n",
-      "    Merge branch 'update-software' into 'dev'\r\n",
-      "    \r\n",
-      "    Update software\r\n",
-      "    \r\n",
-      "    See merge request 7labs.ru/tutorials-dvc/dvc-3-automate-experiments!2\r\n",
-      "\r\n",
-      "\u001b[33mcommit a8d3200b8cbffdc4af1c7204710d217e9f685928\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Fri Jun 26 17:58:32 2020 +0900\r\n",
-      "\r\n",
-      "    intall toc for jupyter notebook\r\n",
-      "\r\n",
-      "\u001b[33mcommit 8b042ad196928f9584b4bbce058625896af78d9d\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Fri Jun 26 17:58:12 2020 +0900\r\n",
-      "\r\n",
-      "    upgrade dvc\r\n",
-      "\r\n",
-      "\u001b[33mcommit 2d7e834a6d115d1b47253377b3baaace559e3259\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Thu Jun 11 12:53:18 2020 +0900\r\n",
-      "\r\n",
-      "    add data/ to .gitignore\r\n",
-      "\r\n",
-      "\u001b[33mcommit 8817b3ed1f82ed1c4feb9122d49237b37356e70e\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Wed Jun 10 22:56:32 2020 +0900\r\n",
-      "\r\n",
-      "    update Lesson 4.ipynb: append description of dvc plots diff\r\n",
-      "\r\n",
-      "\u001b[33mcommit a8db726c3f368c39180d61d21f21bf6727db20c0\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Wed Jun 10 22:44:00 2020 +0900\r\n",
-      "\r\n",
-      "    update Lesson 4.ipynb: add section for dvc metrics diff and dvc plots\r\n",
-      "\r\n",
-      "\u001b[33mcommit 77559e316fe6b5fd0a11f27a06fbc9eed1c2b606\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Wed Jun 10 22:43:09 2020 +0900\r\n",
-      "\r\n",
-      "    update src/evaluate.py: put metric and confusion matrix in separated files\r\n",
-      "\r\n",
-      "\u001b[33mcommit a0afac2ff2dc7c5815c72ec3770888b67e5f04e7\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Wed Jun 10 12:05:51 2020 +0900\r\n",
-      "\r\n",
-      "    refactor code modules\r\n",
-      "\r\n",
-      "\u001b[33mcommit 73846297879b1f1be3868c64e73b7d8ad6966b09\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Wed Jun 10 12:04:37 2020 +0900\r\n",
-      "\r\n",
-      "    fix Lesson 4.ipynb\r\n",
-      "\r\n",
-      "\u001b[33mcommit b6ba776f8607c6481e34f8a40af4c23a5cd36990\u001b[m\r\n",
-      "Author: Alex <alexckolosov@gmail.com>\r\n",
-      "Date:   Tue Jun 9 19:27:13 2020 +0900\r\n",
-      "\r\n",
-      "    create repo structure for  lesson 4\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# To see the difference between two specific commits, both need to be specified:\n",
-    "\n",
-    "!git log"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:01:12.974894Z",
-     "start_time": "2020-07-03T19:01:11.320625Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[0m                                                                            "
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "!dvc params diff 24f75fdcc9bede20cbecf88697b5d3f8ed56f58c HEAD^"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Show metrics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 149,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:42:07.828077Z",
-     "start_time": "2020-07-03T19:42:06.658092Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\tdata/metrics.json:                                                             \n",
-      "\t\tf1_score: 1.0\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# this pipeline metrics \n",
-    "\n",
-    "!dvc metrics show"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 150,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:42:10.492627Z",
-     "start_time": "2020-07-03T19:42:09.201160Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "workspace:                                                                      \n",
-      "\tdata/metrics.json:\n",
-      "\t\tf1_score: 1.0\n",
-      "dev:\n",
-      "\tdata/metrics.json:\n",
-      "\t\tf1_score: 1.0\n",
-      "dvc-tutorial:\n",
-      "\tdata/metrics.json:\n",
-      "\t\tf1_score: 0.9305555555555555\n",
-      "exp1-ratio-features:\n",
-      "\tdata/metrics.json:\n",
-      "\t\tf1_score: 0.15384615384615383\n",
-      "exp2-tuning-logreg:\n",
-      "\tdata/metrics.json:\n",
-      "\t\tf1_score: 1.0\n",
-      "exp3-svm:\n",
-      "\tdata/metrics.json:\n",
-      "\t\tf1_score: 1.0\n",
-      "exp2_tuning_logreg:\n",
-      "\tdata/metrics.json:\n",
-      "\t\tf1_score: 1.0\n",
-      "exp3_svm:\n",
-      "\tdata/metrics.json:\n",
-      "\t\tf1_score: 1.0\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# show all commited pipelines metrics (all branch and tags)\n",
-    "\n",
-    "!dvc metrics show -a -T"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Compare metrics (get differences)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 151,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:43:27.774038Z",
-     "start_time": "2020-07-03T19:43:26.104962Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[0m                                                                            "
-     ]
-    }
-   ],
-   "source": [
-    "!dvc metrics diff"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 152,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:44:46.444858Z",
-     "start_time": "2020-07-03T19:44:44.738955Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Path               Metric    Value    Change                                    \n",
-      "data/metrics.json  f1_score  1.0      0.0\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# --all - list all metrics, even those without changes\n",
-    "\n",
-    "!dvc metrics diff --all"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "* чтобы сравнить текущую метрики из текущего коммита и из другого, нужно указать другой (old) коммит:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:11:04.120125Z",
-     "start_time": "2020-07-03T19:11:02.460457Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Path               Metric    Value    Change                                    \n",
-      "data/metrics.json  f1_score  1.0      0.84615\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# Compare old and new branches\n",
-    "\n",
-    "\n",
-    "!dvc metrics diff exp1-ratio-features exp2-svm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:10:59.357203Z",
-     "start_time": "2020-07-03T19:10:57.708759Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Path               Metric    Value    Change                                    \n",
-      "data/metrics.json  f1_score  0.93056  0.77671\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# Equivalent to `!dvc metrics diff exp1-ratio-features dvc-tutorial`, because dvc-tutorial - current branch\n",
-    "\n",
-    "!dvc metrics diff exp1-ratio-features"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 157,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:50:29.269796Z",
-     "start_time": "2020-07-03T19:50:29.132897Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Switched to branch 'dev'\r\n",
-      "Your branch is ahead of 'origin/dev' by 7 commits.\r\n",
-      "  (use \"git push\" to publish your local commits)\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git checkout dev -f"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "* чтобы выводить не только новую, но и старую метрики, нужно добавить опцию --old"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 154,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:48:02.485718Z",
-     "start_time": "2020-07-03T19:48:01.562562Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[31mERROR\u001b[39m: failed to show metrics diff - unable to read: 'dvc.lock', YAML file structure is corrupted: while scanning a simple key\n",
-      "  in \"<unicode string>\", line 22, column 1\n",
-      "could not find expected ':'\n",
-      "  in \"<unicode string>\", line 23, column 8\n",
-      "\n",
-      "\u001b[33mHaving any troubles?\u001b[39m Hit us up at \u001b[34mhttps://dvc.org/support\u001b[39m, we are always happy to help!\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# Use --old to show both old and new metrics vlues \n",
-    "\n",
-    "!dvc metrics diff --old exp1-ratio-features exp2-svm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 158,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T19:50:33.253819Z",
-     "start_time": "2020-07-03T19:50:31.570404Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "| Path   | Metric   | Value   | Change   |                                      \n",
-      "|--------|----------|---------|----------|\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc metrics diff --show-md"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Build Plots\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 165,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T20:08:18.872602Z",
-     "start_time": "2020-07-03T20:08:18.869605Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from IPython.display import IFrame"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Show"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 176,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T20:10:21.387140Z",
-     "start_time": "2020-07-03T20:10:20.271263Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "file:///Users/mnrozhkov/dev/dvc/course/dvc-3-automate-experiments/data/plots-show.html\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "!dvc plots show  --template confusion \"data/cm.csv\" -x actual -y predicted -o data/plots-show.html"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 177,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T20:10:21.421474Z",
-     "start_time": "2020-07-03T20:10:21.416923Z"
-    },
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"500\"\n",
-       "            height=\"500\"\n",
-       "            src=\"data/plots-show.html\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x111a40438>"
-      ]
-     },
-     "execution_count": 177,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "IFrame(src='data/plots-show.html', width=500, height=500)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Diff"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 192,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T20:27:04.674839Z",
-     "start_time": "2020-07-03T20:27:03.879598Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "file:///Users/mnrozhkov/dev/dvc/course/dvc-3-automate-experiments/data/plots-diff.html\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
-   "source": [
-    "# Build metircs plots for all 3 experiments\n",
-    "!dvc plots diff -t confusion -o data/plots-diff.html exp1-ratio-features exp3-svm -x predicted"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 194,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-07-03T20:27:34.434387Z",
-     "start_time": "2020-07-03T20:27:34.430369Z"
-    },
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"1000\"\n",
-       "            height=\"400\"\n",
-       "            src=\"data/plots-diff.html\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x111a761d0>"
-      ]
-     },
-     "execution_count": 194,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "IFrame(src='data/plots-diff.html', width=1000, height=400)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.2"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": false,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {
-    "height": "calc(100% - 180px)",
-    "left": "10px",
-    "top": "150px",
-    "width": "230.953px"
-   },
-   "toc_section_display": true,
-   "toc_window_display": true
-  },
-  "varInspector": {
-   "cols": {
-    "lenName": 16,
-    "lenType": 16,
-    "lenVar": 40
-   },
-   "kernels_config": {
-    "python": {
-     "delete_cmd_postfix": "",
-     "delete_cmd_prefix": "del ",
-     "library": "var_list.py",
-     "varRefreshCmd": "print(var_dic_list())"
-    },
-    "r": {
-     "delete_cmd_postfix": ") ",
-     "delete_cmd_prefix": "rm(",
-     "library": "var_list.r",
-     "varRefreshCmd": "cat(var_dic_list()) "
-    }
-   },
-   "types_to_exclude": [
-    "module",
-    "function",
-    "builtin_function_or_method",
-    "instance",
-    "_Feature"
-   ],
-   "window_display": false
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/models/.gitignore b/models/.gitignore
new file mode 100644
index 00000000..b722e9e1
--- /dev/null
+++ b/models/.gitignore
@@ -0,0 +1 @@
+!.gitignore
\ No newline at end of file
diff --git a/params.yaml b/params.yaml
index 9bb859cd..933640bd 100644
--- a/params.yaml
+++ b/params.yaml
@@ -1,8 +1,8 @@
-
 data_load:
   raw_data_path: data/iris.csv
   classes_names_path: data/classes.json
 
+
 featurize:
   features_path: data/iris_featurized.csv
   target_column: target
@@ -15,9 +15,9 @@ data_split:
 
 
 train:
-  model_path: data/model.joblib
+  model_path: models/model.joblib
 
 
 evaluate:
-  metrics_file: data/metrics.json
-  confusion_matrix: data/cm.csv
+  metrics_file: reports/metrics.json
+  confusion_matrix: reports/cm.csv
diff --git a/reports/.gitignore b/reports/.gitignore
new file mode 100644
index 00000000..b722e9e1
--- /dev/null
+++ b/reports/.gitignore
@@ -0,0 +1 @@
+!.gitignore
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index d470460a..79a1c05b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,10 @@
-joblib==0.15.1
-jupyter==1.0.0
-jupyter_contrib_nbextensions==0.5.1
-ipykernel==5.3.0
-matplotlib==3.1.2
-numpy==1.18.1
-pandas==1.0.0
-pyyaml==5.3
-scikit-learn==0.23.1
-scipy==1.4.1
-tqdm==4.42.0
\ No newline at end of file
+dvc==2.57.2
+joblib==1.2.0
+matplotlib==3.7.1
+numpy==1.24.3
+pandas==2.0.1
+python-box==7.0.1
+pyyaml==6.0
+scikit-learn==1.2.2
+scipy==1.10.1
+tqdm==4.65.0
\ No newline at end of file
diff --git a/src/data_load.py b/src/data_load.py
index b07a8258..04005193 100644
--- a/src/data_load.py
+++ b/src/data_load.py
@@ -2,7 +2,8 @@
 import json
 from sklearn.datasets import load_iris
 from typing import Text
-import yaml
+
+from src.utils import load_config
 
 
 def data_load(config_path: Text) -> None:
@@ -12,18 +13,16 @@ def data_load(config_path: Text) -> None:
         config_path {Text}: path to config
     """
 
-    config = yaml.safe_load(open(config_path))
-    raw_data_path = config['data_load']['raw_data_path']
-    classes_names_path = config['data_load']['classes_names_path']
+    config = load_config(config_path)
 
     data = load_iris(as_frame=True)
     classes_names = data.target_names.tolist()
 
     dataset = data.frame
     dataset.columns = [colname.strip(' (cm)').replace(' ', '_') for colname in dataset.columns.tolist()]
-    dataset.to_csv(raw_data_path, index=False)
+    dataset.to_csv(config.data_load.raw_data_path, index=False)
 
-    with open(classes_names_path, 'w') as classes_names_file:
+    with open(config.data_load.classes_names_path, 'w') as classes_names_file:
         json.dump(obj={'classes_names': classes_names}, fp=classes_names_file)
 
 
diff --git a/src/evaluate.py b/src/evaluate.py
index bc0d6098..d9c4e428 100644
--- a/src/evaluate.py
+++ b/src/evaluate.py
@@ -4,7 +4,8 @@
 import pandas as pd
 from sklearn.metrics import f1_score
 from typing import Text
-import yaml
+
+from src.utils import load_config
 
 
 def evaluate(config_path: Text) -> None:
@@ -13,40 +14,29 @@ def evaluate(config_path: Text) -> None:
        config_path {Text}: path to config
     """
 
-    config = yaml.safe_load(open(config_path))
-    classes_names_path = config['data_load']['classes_names_path']
-    test_dataset_path = config['data_split']['test_path']
-    model_path = config['train']['model_path']
-    metrics_path = config['evaluate']['metrics_file']
-    confusion_matrix_path = config['evaluate']['confusion_matrix']
+    config = load_config(config_path)
 
-    classes = json.load(open(classes_names_path))['classes_names']
+    classes = json.load(open(config.data_load.classes_names_path))['classes_names']
 
-    test_dataset = pd.read_csv(test_dataset_path)
+    test_dataset = pd.read_csv(config.data_split.test_path)
     y = test_dataset.loc[:, 'target'].values.astype('float32')
     X = test_dataset.drop('target', axis=1).values
 
-    clf = joblib.load(model_path)
+    clf = joblib.load(config.train.model_path)
 
     prediction = clf.predict(X)
     f1 = f1_score(y_true=y, y_pred=prediction, average='macro')
 
     json.dump(
         obj={'f1_score': f1},
-        fp=open(metrics_path, 'w')
+        fp=open(config.evaluate.metrics_file, 'w')
     )
 
-    # pd.DataFrame({'actual': y, 'predicted': prediction}).apply(
-    #     lambda series: series.map(
-    #         {i: cls_name for i, cls_name in enumerate(classes)}
-    #     )
-    # ).to_csv(confusion_matrix_path, index=False)
-
     mapping = {i: cls_name for i, cls_name in enumerate(classes)}
     cmdf = pd.DataFrame(
         {'actual': y, 'predicted': prediction}
     ).apply(lambda series: series.map(mapping))
-    cmdf.to_csv(confusion_matrix_path, index=False)
+    cmdf.to_csv(config.evaluate.confusion_matrix, index=False)
 
 
 if __name__ == '__main__':
@@ -56,4 +46,3 @@ def evaluate(config_path: Text) -> None:
     args = args_parser.parse_args()
 
     evaluate(config_path=args.config)
-
diff --git a/src/featurization.py b/src/featurization.py
index 2306f34f..9aea31a6 100644
--- a/src/featurization.py
+++ b/src/featurization.py
@@ -1,7 +1,8 @@
 import argparse
 import pandas as pd
 from typing import Text
-import yaml
+
+from src.utils import load_config
 
 
 def get_features(dataset):
@@ -17,13 +18,11 @@ def featurize(config_path: Text) -> None:
         config_path {Text}: path to config
     """
 
-    config = yaml.safe_load(open(config_path))
-    raw_data_path = config['data_load']['raw_data_path']
-    featurized_dataset_path = config['featurize']['features_path']
+    config = load_config(config_path)
 
-    dataset = pd.read_csv(raw_data_path)
+    dataset = pd.read_csv(config.data_load.raw_data_path)
     features = get_features(dataset)
-    features.to_csv(featurized_dataset_path, index=False)
+    features.to_csv(config.featurize.features_path, index=False)
 
 
 if __name__ == '__main__':
diff --git a/src/split_dataset.py b/src/split_dataset.py
index ffd6f119..8b8bd38a 100644
--- a/src/split_dataset.py
+++ b/src/split_dataset.py
@@ -2,7 +2,8 @@
 from sklearn.model_selection import train_test_split
 import pandas as pd
 from typing import Text
-import yaml
+
+from src.utils import load_config
 
 
 def split_train_test(config_path: Text) -> None:
@@ -11,20 +12,15 @@ def split_train_test(config_path: Text) -> None:
        config_path {Text}: path to config
     """
 
-    config = yaml.safe_load(open(config_path))
-    featurized_dataset_path = config['featurize']['features_path']
-    train_dataset_path = config['data_split']['train_path']
-    test_dataset_path = config['data_split']['test_path']
-    test_size = config['data_split']['test_size']
-
-    dataset = pd.read_csv(featurized_dataset_path)
+    config = load_config(config_path)
+    dataset = pd.read_csv(config.featurize.features_path)
 
     # Split in train/test
-
+    test_size = config.data_split.test_size
     df_train, df_test = train_test_split(dataset, test_size=test_size, random_state=42)
 
-    df_train.to_csv(train_dataset_path, index=False)
-    df_test.to_csv(test_dataset_path, index=False)
+    df_train.to_csv(config.data_split.train_path, index=False)
+    df_test.to_csv(config.data_split.test_path, index=False)
 
 
 if __name__ == '__main__':
diff --git a/src/train.py b/src/train.py
index fefd056e..56f39c73 100644
--- a/src/train.py
+++ b/src/train.py
@@ -4,7 +4,8 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import SVC
 from typing import Text
-import yaml
+
+from src.utils import load_config
 
 
 def train(config_path: Text) -> None:
@@ -13,11 +14,9 @@ def train(config_path: Text) -> None:
        config_path {Text}: path to config
     """
 
-    config = yaml.safe_load(open(config_path))
-    train_dataset_path = config['data_split']['train_path']
-    model_path = config['train']['model_path']
+    config = load_config(config_path)
     # Load train set
-    train_dataset = pd.read_csv(train_dataset_path)
+    train_dataset = pd.read_csv(config.data_split.train_path)
 
     # Get X and Y
     y = train_dataset.loc[:, 'target'].values.astype('float32')
@@ -27,7 +26,7 @@ def train(config_path: Text) -> None:
     clf = LogisticRegression(C=0.00001, solver='lbfgs', multi_class='multinomial', max_iter=100)
     clf.fit(X, y)
 
-    joblib.dump(clf, model_path)
+    joblib.dump(clf, config.train.model_path)
 
 
 if __name__ == '__main__':
@@ -36,4 +35,4 @@ def train(config_path: Text) -> None:
     args_parser.add_argument('--config', dest='config', required=True)
     args = args_parser.parse_args()
 
-    train(config_path=args.config)
\ No newline at end of file
+    train(config_path=args.config)
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 00000000..ce4b6a50
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,19 @@
+import box
+from typing import Text
+import yaml
+
+
+def load_config(config_path: Text) -> box.ConfigBox:
+    """Loads yaml config in instance of box.ConfigBox.
+    Args:
+        config_path {Text}: path to config
+    Returns:
+        box.ConfigBox
+    """
+
+    with open(config_path) as config_file:
+
+        config = yaml.safe_load(config_file)
+        config = box.ConfigBox(config)
+
+        return config