smartloop-ai
diff --git a/‎.gitignore
Lines changed: 132 additions & 0 deletions b/‎.gitignore
Lines changed: 132 additions & 0 deletions
diff --git a/‎Dockerfile
Lines changed: 26 additions & 0 deletions b/‎Dockerfile
Lines changed: 26 additions & 0 deletions
diff --git a/‎LICENSE.txt
Lines changed: 17 additions & 0 deletions b/‎LICENSE.txt
Lines changed: 17 additions & 0 deletions
diff --git a/‎MANIFEST.in
Lines changed: 1 addition & 0 deletions b/‎MANIFEST.in
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md
Lines changed: 151 additions & 0 deletions b/‎README.md
Lines changed: 151 additions & 0 deletions
diff --git a/‎config.yaml
Lines changed: 22 additions & 0 deletions b/‎config.yaml
Lines changed: 22 additions & 0 deletions
@@ -0,0 +1,132 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+.idea
+
+nlp_data/
@@ -0,0 +1,26 @@
+FROM tensorflow/tensorflow:2.9.1
+
+ENV HOME=/app
+ENV DATA_DIR='/nlp_data'
+ENV CUDA_VISIBLE_DEVICES=1
+ENV NLTK_DATA=/app/nltk_data
+
+COPY . ${HOME}
+
+RUN set -eux; \
+    python --version
+RUN set -eux; \
+    python -m pip install -U pip
+
+RUN pip install joblib~=1.1.0
+RUN pip install sklearn~=0.0
+RUN pip install scikit-learn~=1.1.1
+RUN pip install nltk~=3.7
+RUN pip install PyYAML~=6.0
+
+RUN set -eux; \
+    python -m nltk.downloader stopwords
+
+WORKDIR ${HOME}
+
+ENTRYPOINT [ "python", "main.py" ]
@@ -0,0 +1,17 @@
+MIT License
+Copyright (c) 2023 Smartloop Inc
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1 @@
+include data/sample.json
@@ -0,0 +1,151 @@
+# Smartloop NLU Framework
+Natural language processing framework
+
+# Train a bot
+
+Use the `sample.json`  file in the `\data` folder, you will pass the name of bot as an argument in the next step. 
+
+Below is as training JSON sample  containing the pattern and name of the intent that wil be resolved for a user input.
+
+```json
+{
+    "examples": {
+        "intents": [
+            {
+                "text": "about",
+                "intent": "about"
+            },
+            {
+                "text": "company",
+                "intent": "about"
+            },
+            {
+                "text": "what is smartloop",
+                "intent": "about"
+            },
+            {
+                "text": "start",
+                "intent": "start"
+            },
+            {
+                "text": "menu",
+                "intent": "start"
+            },
+            {
+                "text": "hi",
+                "intent": "start"
+            }
+        ]
+    },
+    "lang": "en"
+}
+```
+
+From the command line type the following to train the bot:
+
+```
+python main.py train -i sample
+
+```
+
+Testing the bot
+
+To test the type the following command:
+
+```
+python main.py parse -i sample -t "I need a chabot"
+```
+
+This should return the intent name followed by the confidence level
+
+```
+{
+    "topIntent": {
+        "intent": "i-need-chatbot",
+        "confidence": 0.9999436140060425
+    },
+    "intents": [
+        {
+            "intent": "i-need-chatbot",
+            "confidence": 0.9999436140060425
+        },
+        {
+            "intent": "chatter-good-afternoon",
+            "confidence": 4.835660001845099e-05
+        },
+        {
+            "intent": "bizbot-no-way",
+            "confidence": 3.6056665067008e-06
+        },
+        {
+            "intent": "about-chatbot",
+            "confidence": 1.9573460576793877e-06
+        },
+        {
+            "intent": "contact",
+            "confidence": 1.095663265004987e-06
+        }
+    ]
+}
+```
+
+## Tunning your model (Advanced)
+
+It is possible to override the default training parameters to create a model that fits your need, override `config.yaml` to tune your model:
+
+```yaml
+# number of epochs
+epochs: 100
+
+# Use tensorboard callback
+logs: True
+
+# classifier parameters
+embedded_intent_classifier:
+    # base neurons, this will be increased based on the intent size
+    neurons: 16
+    # length of input len("hello how are you") = 4
+    input_length: 100
+    learning_rate: 1e-2
+    flatten: False
+    hidden_layers: 2
+    # drop rate to avoid overfitting
+    drop_rate: 0.2
+    # early stop training in case of not improving
+    early_stopping: True
+```
+
+This can vary based on model size, can be tuned using the grid search capabablites to find the optimal settings. 
+
+Here is a list of basic parameters and their meaning:
+
+* epochs - This is the number of iterations where 1 epoch = 1 complete neural net cycle
+* learning_rate - How fast or slow, the model is learning through iterations
+* drop_rate - Adjust to prevent overfitting of the data to fine tune your model
+
+
+## Configuration
+
+Install stop words dictionary using following command
+
+```
+python -m nltk.downloader stopwords   
+```
+
+## Debugging
+
+Set `logs:True` in config.yaml to enable debugging using `tensorboard`. Once you have trained the bot. Type the following command to start tensorboard:
+
+```commandline
+tensorboard serve --logdir logs/nlp_data/<bot_id>/<model_id>
+```
+
+
+## Requirements
+
+* Tensorflow (>=2.9.1)
+
+## License
+Licensed under the Apache License, Version 2.0. 
+
+Copyright 2021-2022 Smartloop Inc.
@@ -0,0 +1,22 @@
+# number of epochs
+epochs: 100
+
+# Use tensorboard callback
+logs: True
+
+# classifier parameters
+embedded_intent_classifier:
+    # base neurons to be used by LSTM model
+    neurons: 32
+    # length of input len("hello how are you") = 4
+    input_length: 100
+    # learning rate
+    learning_rate: 1e-2
+    # flatten
+    flatten: False
+    # number of hidden layer
+    hidden_layers: 1
+    # drop rate to avoid overfitting
+    drop_rate: 0.5
+    # early stop training in case of not improving
+    early_stopping: True