vanishing_gradients/index.html

<html>

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>VisualML | Vanishing Gradients</title>
    <link rel="shortcut icon" href="../img/favicon.ico" type="image/x-icon">
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-vis@1.0.2/dist/tfjs-vis.umd.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@1.0.0/dist/tf.min.js"></script>
    <link href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.1.3/css/bootstrap.min.css" rel="stylesheet">
    <link href="https://fonts.googleapis.com/css?family=Roboto&display=swap" rel="stylesheet">
    <link rel="stylesheet" href="tfjs-examples.css" />
</head>

<body>
    <style>
         :root {
            --light-color: #666;
            --background-color: #aaa;
            --foreground-color: #666;
            --hover-color: #888;
            --hot-color: #8ed081;
        }
        
        form {
            display: flex;
            max-width: 800px;
            max-height: 650px;
            margin: auto;
            margin-left: 0px;
            font-family: 'Roboto', sans-serif;
        }
        
        #activations {
            flex: 1;
            flex-basis: auto;
            flex-grow: unset;
            overflow-y: auto;
            color: var(--hot-color);
            background-color: white;
        }
        
        #activations h3 {
            font-size: 0.8em;
            text-align: center;
            background-color: var(--light-color);
            color: #fff;
            padding-top: 1em;
            padding-bottom: 1em;
            margin: 0;
        }
        
        .content {
            font-size: 24px;
            font-family: Cambria, Cochin, Georgia, Times, 'Times New Roman', serif;
        }
        
        #main {
            flex: 10;
        }
        
        #weights,
        #controls {
            display: none;
        }
        
        #activations ul {
            list-style: none;
            margin: 0;
            padding: 0;
        }
        
        #activations ul li label:hover {
            background-color: var(--hover-color);
        }
        
        #activations ul li input:checked~label {
            background-color: #fff;
            color: var(--foreground-color);
        }
        
        #activations ul li input:checked#a_sigmoid~label {
            background-color: #8ed081;
        }
        
        #activations ul li input:checked#a_hardsigmoid~label {
            background-color: #430ade;
        }
        
        #activations ul li input:checked#a_softplus~label {
            background-color: #f33;
        }
        
        #activations ul li input:checked#a_softsign~label {
            background-color: #8af;
        }
        
        #activations ul li input:checked#a_tanh~label {
            background-color: #e2c044;
        }
        
        #activations ul li input:checked#a_softmax~label {
            background-color: #f4c095;
        }
        
        #activations ul li input:checked#a_linear~label {
            background-color: #d2d6ef;
        }
        
        #activations ul li input:checked#a_relu~label {
            background-color: #f19a3e;
        }
        
        #activations ul li input:checked#a_relu6~label {
            background-color: #b9ffb7;
        }
        
        #activations ul li input:checked#a_selu~label {
            background-color: #ba84f4;
        }
        
        #activations ul li input:checked#a_elu~label {
            background-color: #e29;
        }
        
        #activations ul li {
            margin: 0.4em;
        }
        
        #activations ul li span {
            background-color: #fff;
            padding: 0;
            border-radius: 0.5em;
            padding-left: 1em;
            padding-right: 1em;
            display: inline-block;
            min-width: 6em;
            opacity: 0.8;
        }
        
        #activations ul li input {
            display: none;
        }
        
        #activations ul li label {
            text-align: center;
            font-family: monospace;
            font-size: 1.4em;
            display: block;
            cursor: pointer;
            padding: 0.2em;
            border-radius: 0.5em;
            background-color: var(--background-color);
            color: var(--foreground-color);
        }
        
        #weights {
            border-bottom: 1px solid #888;
        }
        
        #weights>div {
            display: flex;
            padding: 2px;
            background-color: #f3f3f3;
        }
        
        #weights label {
            width: 5em;
            text-align: right;
            padding: 0.2em;
        }
        
        #weights input[type="range"] {
            display: block;
            width: 90%;
            margin-left: 5%;
            margin-right: 5%;
        }
        
        #controls {
            border-top: 1px solid #888;
            background-color: #f3f3f3;
            padding: 0.4em;
        }
        
        #plot {
            min-height: 200px;
            height: 350px;
            border-right: 1px solid #888;
        }
        
        #plot.select_function {
            text-align: center;
            display: flex;
            justify-content: center;
            align-items: center;
            border: 0;
        }
    </style>

    <style>
        input {
            width: 75px;
        }
        
        .input-div {
            padding: 5px;
            font-family: monospace;
            font-size: 16px;
        }
        
        .input-label {
            display: inline-block;
            width: 160px;
        }
        
        td {
            padding-left: 5px;
            padding-right: 5px;
            padding-bottom: 5px;
        }
        
        #predict-header {
            font-weight: bold;
        }
        
        .output-div {
            padding: 5px;
            padding-top: 20px;
            font-family: monospace;
            font-weight: bold;
        }
        
        #evaluate-table {
            display: inline-block;
        }
        
        #evaluate-table td,
        #evaluate-table th {
            font-family: monospace;
            border: 1px solid #ddd;
            padding: 8px;
        }
        
        #evaluate-table th {
            padding-top: 12px;
            padding-bottom: 12px;
            text-align: left;
            background-color: #4CAF50;
            color: white;
        }
        
        .region {
            border-left: 1px dashed #ccc;
            margin-bottom: 5px;
            padding-left: 24px;
            margin-left: -24px;
        }
        
        .load-save-section {
            padding-top: 3px;
            padding-bottom: 3px;
        }
        
        .logit-span {
            padding-right: 1em;
        }
        
        .correct-prediction {
            background-color: greenyellow
        }
        
        .wrong-prediction {
            background-color: red;
        }
        
        #parent {
            display: inline-flex;
            width: 100%;
        }
        
        #child1 {
            width: 30%;
        }
        
        #child2 {
            width: 70%;
        }
    </style>
    <script>
        let models;

        function init() {
            const activationFunctions = getSelectedActivationFunctions();

            models = activationFunctions.map(activationFunctionName => {
                // Create a model
                model = tf.sequential();
                model.add(tf.layers.dense({
                    units: 1,
                    useBias: true,
                    activation: activationFunctionName,
                    inputDim: 1,
                }));
                model.compile({
                    loss: 'meanSquaredError',
                    optimizer: tf.train.adam(),
                });
                // this is not a valid property on LayersModel, but needed a quick way to attach some metadata
                model.activationFunction = activationFunctionName;
                return model;
            });

            updateWeights();
        }

        function getSelectedActivationFunctions() {
            const inputs = document.querySelectorAll(`#activations input:checked`);
            return inputs ? [...inputs].map(input => input.value) : [];
        }

        document.onreadystatechange = init;

        function setWeight() {
            document.getElementById(`weight`).value = document.getElementById(`weight_value`).value;
            updateWeights();
        }

        function setBias() {
            document.getElementById(`bias`).value = document.getElementById(`bias_value`).value;
            updateWeights();
        }

        function updateWeights() {
            const newWeight = parseFloat(document.getElementById(`weight`).value);
            const newBias = parseFloat(document.getElementById(`bias`).value);
            tf.tidy(() => {
                models.forEach(model => model.layers[0].setWeights([tf.tensor2d([
                    [newWeight]
                ]), tf.tensor1d([newBias])]));
            });

            replot();
            displayWeights();
        }

        function replot() {
            const min = parseFloat(document.getElementById(`min`).value);
            const max = parseFloat(document.getElementById(`max`).value);

            const xs = tf.linspace(min, max, 100);

            const series = tf.tidy(() => models.map(model => {
                const ys = model.predict(xs.reshape([100, 1]));

                return {
                    x: xs.dataSync(),
                    y: ys.dataSync(),
                    type: 'scatter',
                    name: model.activationFunction,
                };
            }));

            // Clear tensor from memory
            xs.dispose();

            const activationFunctions = getSelectedActivationFunctions();
            const functionColors = {
                sigmoid: '#8ed081',
                hardSigmoid: '#430ade',
                softplus: '#f33',
                softsign: '#8af',
                tanh: '#f4c095',
                softmax: '#e2c044',
                linear: '#d2d6ef',
                relu: '#f19a3e',
                relu6: '#b9ffb7',
                selu: '#a846a0',
                elu: '#d30c7b',
            };
            let options = {
                colorway: activationFunctions.map(name => functionColors[name]),
                margin: {
                    l: 40,
                    r: 20,
                    b: 20,
                    t: 10,
                    pad: 0,
                },
            };
            const plotElement = document.getElementById("plot");
            if (activationFunctions.length === 0) {
                plotElement.innerHTML = "Please select one or more activation functions";
                plotElement.classList.add(`select_function`);
                document.getElementById(`weights`).style.display = "none";
                document.getElementById(`controls`).style.display = "none";
                return;
            }
            if (activationFunctions.length === 1) {
                switch (activationFunctions[0]) {
                    case "sigmoid":
                    case "hardSigmoid":
                        options.yaxis = {
                            range: [0, 1.05]
                        };
                        break;
                    case "softsign":
                    case "tanh":
                        options.yaxis = {
                            range: [-1.05, 1.05]
                        };
                        break;
                    case "relu6":
                        options.yaxis = {
                            range: [0, 6.1]
                        };
                        break;
                }
            }

            plotElement.classList.remove(`select_function`);
            document.getElementById("plot").innerText = "";
            Plotly.newPlot('plot', series, options, {
                displaylogo: false
            });
        }

        function displayWeights() {
            if (models.length) {
                document.getElementById(`weights`).style.display = "block";
                document.getElementById(`controls`).style.display = "block";
                const weights = models[0].layers[0].getWeights();
                document.getElementById(`weight`).value = weights[0].dataSync()[0].toFixed(2);
                document.getElementById(`weight_value`).value = weights[0].dataSync()[0].toFixed(2);
                document.getElementById(`bias`).value = weights[1].dataSync()[0].toFixed(2);
                document.getElementById(`bias_value`).value = weights[1].dataSync()[0].toFixed(2);
            }
        }
    </script>

    <body>
        <div class='container'>
            <section class='text-center'>
                <h1>Vanishing Gradients</h1><br />
                <p class='subtitle'>Comparing the activation functions</p>
            </section>

            <section>
                <p class='section-head'>Description</p>
                <p class="content">
                    This example explains the problem of vanishing gradients (which you may encounter when training a deep neural network) and also use some activation functions to prevent it. It describes the situation where a deep multilayer feed-forward network or a recurrent
                    neural network is unable to propagate useful gradient information from the output end of the model back to the layers near the input end of the model.
                </p>
                <p class="content">
                    Many fixes and workarounds have been proposed and investigated, such as alternate weight initialization schemes, unsupervised pre-training, layer-wise training, and variations on gradient descent. Perhaps the most common change is the use of the rectified
                    linear activation function and its modifications.
                </p> <br>
            </section>
            <section>
                <p class='section-head'>How the choice of activation function avoids vanishing gradients?</p><br>
                <p class="content">
                    Activation functions, like the sigmoid function, squishes a large input space into a small input space between 0 and 1. Therefore, a large change in the input of the sigmoid function will cause a small change in the output. Hence, the derivative becomes
                    small.A small gradient means that the weights and biases of the initial layers will not be updated effectively with each training session. Since these initial layers are often crucial to recognizing the core elements of the input data,
                    it can lead to overall inaccuracy of the whole network.
                </p>
                <p class="content">
                    The simplest solution is to use other activation functions, such as leakyRelu, Relu etc which don't cause a small derivatives. The really nice thing about Relu is the the gradient is either 0 or 1, which means it never saturates, and so gradients can’t
                    vanish — they are transferred perfectly across a network. However problem of dead relu may exist i.e. situation may come when grads may become exactly 0 but this problem is solved by its modification LeakyRelu.
                </p><br>
                <p class='subtitle'>You can visualize it by plotting these activation functions and many more by the options given below...</p>
            </section>
            <div id="parent">
                <div id="child1">
                    <form id="activations">
                        <ul>
                            <li>
                                <input type="checkbox" id="a_sigmoid" name="activation" value="sigmoid" onchange="init()" checked/>
                                <label for="a_sigmoid"><span>sigmoid</span></label>
                            </li>
                            <li>
                                <input type="checkbox" id="a_hardsigmoid" name="activation" value="hardSigmoid" onchange="init()" />
                                <label for="a_hardsigmoid"><span>hardSigmoid</span></label>
                            </li>
                            <li>
                                <input type="checkbox" id="a_softplus" name="activation" value="softplus" onchange="init()" />
                                <label for="a_softplus"><span>softplus</span></label></li>
                            <li>
                                <input type="checkbox" id="a_softsign" name="activation" value="softsign" onchange="init()" />
                                <label for="a_softsign"><span>softsign</span></label></li>
                            <li>
                                <input type="checkbox" id="a_tanh" name="activation" value="tanh" onchange="init()" checked/>
                                <label for="a_tanh"><span>tanh</span></label></li>
                            <li>
                                <input type="checkbox" id="a_softmax" name="activation" value="softmax" onchange="init()" />
                                <label for="a_softmax"><span>softmax</span></label></li>
                            <li>
                                <input type="checkbox" id="a_linear" name="activation" value="linear" onchange="init()" />
                                <label for="a_linear"><span>linear</span></label></li>
                            <li>
                                <input type="checkbox" id="a_relu" name="activation" value="relu" onchange="init()" checked/>
                                <label for="a_relu"><span>relu</span></label></li>
                            <li>
                                <input type="checkbox" id="a_relu6" name="activation" value="relu6" onchange="init()" />
                                <label for="a_relu6"><span>relu6</span></label></li>
                            <li>
                                <input type="checkbox" id="a_selu" name="activation" value="selu" onchange="init()" />
                                <label for="a_selu"><span>selu</span></label></li>
                            <li>
                                <input type="checkbox" id="a_elu" name="activation" value="elu" onchange="init()" />
                                <label for="a_elu"><span>elu</span></label></li>
                        </ul>
                    </form>
                </div>
                <div id="child2">
                    <form>
                        <div id="main">
                            <div id="weights">

                                <div>
                                    <label>Weight: </label>
                                    <input type="range" min="-10" max="10" step="0.1" value="1" oninput="updateWeights()" onchange="updateWeights()" class="slider" id="weight" />
                                    <input id="weight_value" type="number" step="0.1" oninput="setWeight()" value="0" />
                                </div>

                                <div>
                                    <label>Bias: </label>
                                    <input type="range" min="-10" max="10" step="0.1" value="1" oninput="updateWeights()" onchange="updateWeights()" class="slider" id="bias" />
                                    <input id="bias_value" type="number" step="0.1" oninput="setBias()" value="0" />
                                </div>

                            </div>
                            <div id="plot"></div>
                            <div id="controls">
                                <label>Inputs from <input id="min" type="number" value="-5" onchange="replot()" />
                  to <input id="max" type="number" value="5" onchange="replot()" /></label>
                            </div>
                        </div>
                    </form>
                </div>
            </div>
            <section>
                <p class='section-head'>About the dataset and model</p>
                <p class="content">
                    This example uses a fully connected neural network . The data used for each flower are the petal length and width as well as the sepal length and width. The data comes from the famous
                    <a href="https://en.wikipedia.org/wiki/Iris_flower_data_set"> Iris flower</a> data set.
                </p>
            </section>
            <section>
                <p class='section-head'>Instructions</p>
                <p class="content">
                    <ol>
                        <li>
                            <p>Using the options below you can set the activation function, num_layers, num_neurons_per_Layer, batch size, learning rate, num_iterations according to your choice.</p>
                        </li>
                        <li>
                            <p>You can visualize the neural network of your choice and visualize the gradients w.r.t each weight by analyzing the intensity of links connecting the neurons. Positive gradients are represented by blue links and negative gradients
                                are represented by red links.<br> Note that gradient values at final iteration are used in printing nn architecture.</p>
                        </li>
                        <li>
                            <p> In each iteration, input of chosen batch size is randomly selected from 120 egs out of 150 egs provided by iris dataset and then model parameters are optimized using gradient descent. Rest 30 egs are for validation. Also you
                                can see value of loss at each iteration in console.</p>
                        </li>
                        <li>
                            <p>Also a plot (Loss vs iteration) will show up on clicking the given button.</p>
                        </li>
                        <li>
                            <p>It is strictly advised to keep batch size greater than 1, otherwise you may encounter exploding gradients problem showing black colour links all over in architecture.</p>
                        </li>
                        <li>
                            <p>Wait for some time after clicking the button.</p>
                        </li>
                        <li>
                            <p>Change the custom parameters and again press the button to train the changed neural network.</p>
                        </li>
                    </ol>
                </p>
            </section>

            <section>
                <p class='section-head'>Controls</p>

                <div class="region">
                    <h3>Train Model</h3>
                    <div class="create-model">
                        <div class="input-div">
                            <label class="input-label">Activation Function:</label>
                            <select id="activations_f" name="activations">
              <option value="sigmoid">Sigmoid</option>
              <option value="relu">RELU</option>
              <option value="leakyRelu">LeakyRelu</option>
              <option value="tanh">Tanh</option>
              </select>
                        </div>

                        <div class="input-div">
                            <label class="input-label">Num_Layers:</label>
                            <input id="num-layers" type="number" value="2"></input>
                        </div>

                        <div class="input-div">
                            <label class="input-label">Num_neurons:</label>
                            <input id="num-neurons" type="number" value="6"></input>
                        </div>
                        <div class="input-div">
                            <label class="input-label">batch_Size:</label>
                            <input id="batch" type="number" value="4"></input>
                        </div>
                        <div class="input-div">
                            <label class="input-label">Learning_Rate:</label>
                            <input id="lr" type="number" value="0.01"></input>
                        </div>
                        <div class="input-div">
                            <label class="input-label">num_epochs:</label>
                            <input id="iter" type="number" value="20"></input>
                        </div>
                        <canvas id="myCanvas" height="0" width="0"></canvas>
                    </div>
                    <div id="graph"></div><br>
                    <div>
                        <canvas id="testloss" height="25" width="850"></canvas>
                    </div><br><br>
                    <div>
                        <canvas id="tra" height="25" width="500"></canvas>
                        <div id="graph2"></div><br>
                    </div>
                    <div>
                        <canvas id="tea" height="25" width="500"></canvas>
                        <div id="graph3"></div><br>
                    </div>
                    <div id="loader"></div><br><br>
                    <button id="show-nn-architecture">Train and Show NN-architecture</button>
                </div><br>

                <body>


                    <div>
                        <div class="horizontal-section">


                            <div id="horizontal-section">

                            </div>


                        </div>
                    </div>
        </div>

        <script src="index.js"></script>
        </body>

</html>