diff --git a/.bazelrc b/.bazelrc
index 625f1e435a4..fb9585db115 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -1,3 +1,3 @@
-# load bazelrc from the legacy location as recommended 
+# load bazelrc from the legacy location as recommended
 # in https://github.com/bazelbuild/bazel/issues/6319
 import %workspace%/tools/bazel.rc
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 7ae205d5de2..2ad7df82394 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -42,10 +42,10 @@ RUN --mount=type=cache,target=/var/lib/apt \
 COPY requirements/*.txt /tmp/pip-tmp/
 RUN --mount=type=cache,target=/root/.cache/pip \
     pip install --no-warn-script-location -r /tmp/pip-tmp/dev-requirements.txt -r /tmp/pip-tmp/docs-requirements.txt \
-    && rm -rf /tmp/pip-tmp
+    && rm -rf /tmp/pip-tmp && pre-commit install
 
-# Setting the ENTRYPOINT to docker-init.sh will configure non-root access to 
-# the Docker socket if "overrideCommand": false is set in devcontainer.json. 
+# Setting the ENTRYPOINT to docker-init.sh will configure non-root access to
+# the Docker socket if "overrideCommand": false is set in devcontainer.json.
 # The script will also execute CMD if you need to alter startup behaviors.
 ENTRYPOINT [ "/usr/local/share/docker-init.sh" ]
 CMD [ "sleep", "infinity" ]
diff --git a/.devcontainer/library-scripts/common-debian.sh b/.devcontainer/library-scripts/common-debian.sh
index 2f36814f4bc..5a969e93686 100644
--- a/.devcontainer/library-scripts/common-debian.sh
+++ b/.devcontainer/library-scripts/common-debian.sh
@@ -113,7 +113,7 @@ if [ "${PACKAGES_ALREADY_INSTALLED}" != "true" ]; then
         manpages \
         manpages-dev \
         init-system-helpers"
-        
+
     # Needed for adding manpages-posix and manpages-posix-dev which are non-free packages in Debian
     if [ "${ADD_NON_FREE_PACKAGES}" = "true" ]; then
         # Bring in variables from /etc/os-release like VERSION_CODENAME
@@ -124,7 +124,7 @@ if [ "${PACKAGES_ALREADY_INSTALLED}" != "true" ]; then
         sed -i -E "s/deb-src http:\/\/(deb|httpredir)\.debian\.org\/debian ${VERSION_CODENAME}-updates main/deb http:\/\/\1\.debian\.org\/debian ${VERSION_CODENAME}-updates main contrib non-free/" /etc/apt/sources.list
         sed -i "s/deb http:\/\/security\.debian\.org\/debian-security ${VERSION_CODENAME}\/updates main/deb http:\/\/security\.debian\.org\/debian-security ${VERSION_CODENAME}\/updates main contrib non-free/" /etc/apt/sources.list
         sed -i "s/deb-src http:\/\/security\.debian\.org\/debian-security ${VERSION_CODENAME}\/updates main/deb http:\/\/security\.debian\.org\/debian-security ${VERSION_CODENAME}\/updates main contrib non-free/" /etc/apt/sources.list
-        sed -i "s/deb http:\/\/deb\.debian\.org\/debian ${VERSION_CODENAME}-backports main/deb http:\/\/deb\.debian\.org\/debian ${VERSION_CODENAME}-backports main contrib non-free/" /etc/apt/sources.list 
+        sed -i "s/deb http:\/\/deb\.debian\.org\/debian ${VERSION_CODENAME}-backports main/deb http:\/\/deb\.debian\.org\/debian ${VERSION_CODENAME}-backports main contrib non-free/" /etc/apt/sources.list
         sed -i "s/deb-src http:\/\/deb\.debian\.org\/debian ${VERSION_CODENAME}-backports main/deb http:\/\/deb\.debian\.org\/debian ${VERSION_CODENAME}-backports main contrib non-free/" /etc/apt/sources.list
         # Handle bullseye location for security https://www.debian.org/releases/bullseye/amd64/release-notes/ch-information.en.html
         sed -i "s/deb http:\/\/security\.debian\.org\/debian-security ${VERSION_CODENAME}-security main/deb http:\/\/security\.debian\.org\/debian-security ${VERSION_CODENAME}-security main contrib non-free/" /etc/apt/sources.list
@@ -140,7 +140,7 @@ if [ "${PACKAGES_ALREADY_INSTALLED}" != "true" ]; then
     if [[ ! -z $(apt-cache --names-only search ^libssl1.1$) ]]; then
         package_list="${package_list}       libssl1.1"
     fi
-    
+
     # Install appropriate version of libssl1.0.x if available
     libssl_package=$(dpkg-query -f '${db:Status-Abbrev}\t${binary:Package}\n' -W 'libssl1\.0\.?' 2>&1 || echo '')
     if [ "$(echo "$LIlibssl_packageBSSL" | grep -o 'libssl1\.0\.[0-9]:' | uniq | sort | wc -l)" -eq 0 ]; then
@@ -155,7 +155,7 @@ if [ "${PACKAGES_ALREADY_INSTALLED}" != "true" ]; then
 
     echo "Packages to verify are installed: ${package_list}"
     apt-get -y install --no-install-recommends ${package_list} 2> >( grep -v 'debconf: delaying package configuration, since apt-utils is not installed' >&2 )
-        
+
     # Install git if not already installed (may be more recent than distro version)
     if ! type git > /dev/null 2>&1; then
         apt-get -y install --no-install-recommends git
@@ -174,7 +174,7 @@ fi
 # Ensure at least the en_US.UTF-8 UTF-8 locale is available.
 # Common need for both applications and things like the agnoster ZSH theme.
 if [ "${LOCALE_ALREADY_SET}" != "true" ] && ! grep -o -E '^\s*en_US.UTF-8\s+UTF-8' /etc/locale.gen > /dev/null; then
-    echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen 
+    echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen
     locale-gen
     LOCALE_ALREADY_SET="true"
 fi
@@ -183,12 +183,12 @@ fi
 group_name="${USERNAME}"
 if id -u ${USERNAME} > /dev/null 2>&1; then
     # User exists, update if needed
-    if [ "${USER_GID}" != "automatic" ] && [ "$USER_GID" != "$(id -g $USERNAME)" ]; then 
+    if [ "${USER_GID}" != "automatic" ] && [ "$USER_GID" != "$(id -g $USERNAME)" ]; then
         group_name="$(id -gn $USERNAME)"
         groupmod --gid $USER_GID ${group_name}
         usermod --gid $USER_GID $USERNAME
     fi
-    if [ "${USER_UID}" != "automatic" ] && [ "$USER_UID" != "$(id -u $USERNAME)" ]; then 
+    if [ "${USER_UID}" != "automatic" ] && [ "$USER_UID" != "$(id -u $USERNAME)" ]; then
         usermod --uid $USER_UID $USERNAME
     fi
 else
@@ -198,7 +198,7 @@ else
     else
         groupadd --gid $USER_GID $USERNAME
     fi
-    if [ "${USER_UID}" = "automatic" ]; then 
+    if [ "${USER_UID}" = "automatic" ]; then
         useradd -s /bin/bash --gid $USERNAME -m $USERNAME
     else
         useradd -s /bin/bash --uid $USER_UID --gid $USERNAME -m $USERNAME
@@ -213,7 +213,7 @@ if [ "${USERNAME}" != "root" ] && [ "${EXISTING_NON_ROOT_USER}" != "${USERNAME}"
 fi
 
 # ** Shell customization section **
-if [ "${USERNAME}" = "root" ]; then 
+if [ "${USERNAME}" = "root" ]; then
     user_rc_path="/root"
 else
     user_rc_path="/home/${USERNAME}"
@@ -250,9 +250,9 @@ fi
 # Set the default git editor if not already set
 if [ -z "$(git config --get core.editor)" ] && [ -z "${GIT_EDITOR}" ]; then
     if  [ "${TERM_PROGRAM}" = "vscode" ]; then
-        if [[ -n $(command -v code-insiders) &&  -z $(command -v code) ]]; then 
+        if [[ -n $(command -v code-insiders) &&  -z $(command -v code) ]]; then
             export GIT_EDITOR="code-insiders --wait"
-        else 
+        else
             export GIT_EDITOR="code --wait"
         fi
     fi
@@ -329,7 +329,7 @@ codespaces_zsh="$(cat \
 # Codespaces zsh prompt theme
 __zsh_prompt() {
     local prompt_username
-    if [ ! -z "${GITHUB_USER}" ]; then 
+    if [ ! -z "${GITHUB_USER}" ]; then
         prompt_username="@${GITHUB_USER}"
     else
         prompt_username="%n"
diff --git a/.devcontainer/library-scripts/docker-debian.sh b/.devcontainer/library-scripts/docker-debian.sh
index 1b925bcd1ff..7a960e5a6cf 100644
--- a/.devcontainer/library-scripts/docker-debian.sh
+++ b/.devcontainer/library-scripts/docker-debian.sh
@@ -86,7 +86,7 @@ find_version_from_git_tags() {
     local repository=$2
     local prefix=${3:-"tags/v"}
     local separator=${4:-"."}
-    local last_part_optional=${5:-"false"}    
+    local last_part_optional=${5:-"false"}
     if [ "$(echo "${requested_version}" | grep -o "." | wc -l)" != "2" ]; then
         local escaped_separator=${separator//./\\.}
         local last_part
@@ -172,7 +172,7 @@ apt-get update
 if [ "${DOCKER_VERSION}" = "latest" ] || [ "${DOCKER_VERSION}" = "lts" ] || [ "${DOCKER_VERSION}" = "stable" ]; then
     # Empty, meaning grab whatever "latest" is in apt repo
     cli_version_suffix=""
-else    
+else
     # Fetch a valid version from the apt-cache (eg: the Microsoft repo appends +azure, breakfix, etc...)
     docker_version_dot_escaped="${DOCKER_VERSION//./\\.}"
     docker_version_dot_plus_escaped="${docker_version_dot_escaped//+/\\+}"
@@ -228,7 +228,7 @@ else
         fi
         ${pipx_bin} install --pip-args '--no-cache-dir --force-reinstall' docker-compose
         rm -rf /tmp/pip-tmp
-    else 
+    else
         compose_v1_version="1"
         find_version_from_git_tags compose_v1_version "https://github.com/docker/compose" "tags/"
         echo "(*) Installing docker-compose ${compose_v1_version}..."
@@ -286,13 +286,13 @@ usermod -aG docker "${USERNAME}"
 DOCKER_GID="$(grep -oP '^docker:x:\K[^:]+' /etc/group)"
 
 # If enabling non-root access and specified user is found, setup socat and add script
-chown -h "${USERNAME}":root "${TARGET_SOCKET}"        
+chown -h "${USERNAME}":root "${TARGET_SOCKET}"
 if ! dpkg -s socat > /dev/null 2>&1; then
     apt_get_update_if_needed
     apt-get -y install socat
 fi
 tee /usr/local/share/docker-init.sh > /dev/null \
-<< EOF 
+<< EOF
 #!/usr/bin/env bash
 #-------------------------------------------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
@@ -324,8 +324,8 @@ log()
 echo -e "\n** \$(date) **" | sudoIf tee -a \${SOCAT_LOG} > /dev/null
 log "Ensuring ${USERNAME} has access to ${SOURCE_SOCKET} via ${TARGET_SOCKET}"
 
-# If enabled, try to update the docker group with the right GID. If the group is root, 
-# fall back on using socat to forward the docker socket to another unix socket so 
+# If enabled, try to update the docker group with the right GID. If the group is root,
+# fall back on using socat to forward the docker socket to another unix socket so
 # that we can set permissions on it without affecting the host.
 if [ "${ENABLE_NONROOT_DOCKER}" = "true" ] && [ "${SOURCE_SOCKET}" != "${TARGET_SOCKET}" ] && [ "${USERNAME}" != "root" ] && [ "${USERNAME}" != "0" ]; then
     SOCKET_GID=\$(stat -c '%g' ${SOURCE_SOCKET})
@@ -345,7 +345,7 @@ if [ "${ENABLE_NONROOT_DOCKER}" = "true" ] && [ "${SOURCE_SOCKET}" != "${TARGET_
     log "Success"
 fi
 
-# Execute whatever commands were passed in (if any). This allows us 
+# Execute whatever commands were passed in (if any). This allows us
 # to set this script to ENTRYPOINT while still executing the default CMD.
 set +e
 exec "\$@"
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 3cf5b43467d..a33bd1c0694 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -15,4 +15,4 @@ contact_links:
     about: Contact the BentoML team for support.
   - name: BentoML Blog
     url: modelserving.com
-    about: Read the latest blogs/updates from community and the BentoML team. 
+    about: Read the latest blogs/updates from community and the BentoML team.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index b2b1c6511b8..672a6c9959c 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -37,7 +37,7 @@ Fixes #(issue)
 
 - [ ] Does the Pull Request follow [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/#summary) naming? Here are [GitHub's
       guide](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) on how to create a pull request.
-- [ ] Does the code follow BentoML's code style, both `make format` and `make lint` script have passed ([instructions](https://github.com/bentoml/BentoML/blob/main/DEVELOPMENT.md#style-check-auto-formatting-type-checking))?
+- [ ] Does the code follow BentoML's code style, `pre-commit run -a` script has passed ([instructions](https://github.com/bentoml/BentoML/blob/main/DEVELOPMENT.md#style-check-auto-formatting-type-checking))?
 - [ ] Did you read through [contribution guidelines](https://github.com/bentoml/BentoML/blob/main/CONTRIBUTING.md#ways-to-contribute) and follow [development guidelines](https://github.com/bentoml/BentoML/blob/main/DEVELOPMENT.md#start-developing)?
 - [ ] Did your changes require updates to the documentation? Have you updated
       those accordingly? Here are [documentation guidelines](https://github.com/bentoml/BentoML/tree/main/docs) and [tips on writting docs](https://github.com/bentoml/BentoML/tree/main/docs#writing-documentation).
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a0b9ccb967b..4696fd58a29 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -85,14 +85,9 @@ jobs:
       - name: Install dependencies
         run: |
           pip install .
-          pip install -r requirements/dev-requirements.txt
-      - name: Format check
-        run: |
-          black --check src examples tests
-          black --check --pyi typings
-          isort --check .
-      - name: Lint check
-        run: ruff check src tests examples
+          pip install pre-commit
+      - name: Format and lint check
+        run: pre-commit run --all-files
       - name: Type check
         if: ${{ github.event_name == 'pull_request' }}
         run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '*.py{,i}' | xargs -0 --no-run-if-empty pyright
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000000..9ef49b7ab83
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,25 @@
+ci:
+  autoupdate_schedule: monthly
+
+exclude: '.*\.(css|js|svg)$'
+repos:
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: 'v0.0.263'
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix, --show-fixes]
+
+  - repo: https://github.com/psf/black
+    rev: 23.3.0
+    hooks:
+      - id: black
+        additional_dependencies:  # black[jupyter] dependencies
+          - ipython>=7.8.0
+          - tokenize-rt>=3.2.0
+        files: '/(src|tests|docs|examples|typings)/'
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8035ddd41b8..a703fc7b299 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,12 +12,12 @@ To follow development updates and discussion, join the #bentoml-contributors cha
 There are many ways to contribute to BentoML.
 
 * Supporting new users by answering questions on the
-    [github issues tracker](https://github.com/bentoml/BentoML/issues) and the 
+    [github issues tracker](https://github.com/bentoml/BentoML/issues) and the
     [#bentoml-users slack channel](https://join.slack.bentoml.org).
- 
-* Report issues you're facing and "Thumbs up" on issues and feature requests that are 
+
+* Report issues you're facing and "Thumbs up" on issues and feature requests that are
     relevant to you in BentoML's [issues tracker](https://github.com/bentoml/BentoML/issues).
- 
+
 * Investigate bugs and reviewing other developer's pull requests.
 
 * Contributing code or documentation to the project by submitting a Github pull request.
@@ -31,8 +31,8 @@ There are many ways to contribute to BentoML.
 We use Github issues to track all bugs and feature requests. Feel free to open an issue
 if you have found a bug or wish to see a new feature implemented.
 
-Before submitting a github issue, ensure the bug was not already reported under 
-[issues](https://github.com/bentoml/bentoml/issues) or currently being addressed by 
+Before submitting a github issue, ensure the bug was not already reported under
+[issues](https://github.com/bentoml/bentoml/issues) or currently being addressed by
 other [pull requests](https://github.com/bentoml/BentoML/pulls).
 
 If you're unable to find an open issue addressing the problem,
@@ -44,10 +44,10 @@ the expected behavior that is not occurring.
 
 ## Contributing Code
 
-To avoid duplicating work, it is highly recommended to search through the 
-[issue tracker](https://github.com/bentoml/bentoml/issues) and 
+To avoid duplicating work, it is highly recommended to search through the
+[issue tracker](https://github.com/bentoml/bentoml/issues) and
 [pull requests list](https://github.com/bentoml/BentoML/pulls). If in doubt about
-duplicated work, or if you want to work on a non-trivial feature, it's recommended to 
+duplicated work, or if you want to work on a non-trivial feature, it's recommended to
 first open an issue in the [issue tracker](https://github.com/bentoml/bentoml/issues)
 to get some feedbacks from core developers.
 
@@ -66,11 +66,11 @@ prior knoledge and help you get familiar with its codebase.
 ## Documentation
 
 Improving the documentation is no less important than improving the library. If you find
-a typo in the documentation, or have made improvements, do not hesitate to submit a 
-GitHub pull request. 
+a typo in the documentation, or have made improvements, do not hesitate to submit a
+GitHub pull request.
 
 Full documentation can be found under the `docs/source` directory. You can edit the
-documentation `.rst` or `.md` files using any text editor. Follow the instructions 
+documentation `.rst` or `.md` files using any text editor. Follow the instructions
 [here](https://github.com/bentoml/BentoML/blob/main/DEVELOPMENT.md#how-to-edit-run-build-documentation-site)
 to build documentation site locally, generate HTML output and preview your changes.
 
@@ -108,9 +108,9 @@ Tags for managing issues:
 High quality testing is extremely important for BentoML project. Currently BentoML has
 three kind of tests: Unit tests(`tests/`) and integrations (`tests/integration/`) are
 running on Travis CI for every pull request.  End-to-end tests(`e2e_tests/`) is manually
-executed by the maintainer before every release and for pull requests that are 
+executed by the maintainer before every release and for pull requests that are
 introducing major changes.
 
-We expect pull requests that are introducing new features to have at least 90% test 
+We expect pull requests that are introducing new features to have at least 90% test
 coverages. Pull requests that are fixing a bug should add a test covering the issue
 being fixed if possible.
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index c560a0581ac..cdd96d93c5d 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -209,13 +209,13 @@ bentoml get IrisClassifier --verbose
 
 ## Style check, auto-formatting, type-checking
 
-formatter: [black](https://github.com/psf/black), [isort](https://github.com/PyCQA/isort), [buf](https://github.com/bufbuild/buf)
+formatter: [black](https://github.com/psf/black), [buf](https://github.com/bufbuild/buf)
 
 linter: [ruff](https://github.com/charliermarsh/ruff), [buf](https://github.com/bufbuild/buf)
 
 type checker: [pyright](https://github.com/microsoft/pyright)
 
-We are using [buf](https://github.com/bufbuild/buf) for formatting and linting
+We are using [pre-commit](https://pre-commit.com/) to manage our hooks, and [buf](https://github.com/bufbuild/buf) for formatting and linting
 of our proto files. Configuration can be found [here](./bentoml/grpc/buf.yaml).
 Currently, we are running `buf` with docker, hence we kindly ask our developers
 to have docker available. Docker installation can be found [here](https://docs.docker.com/get-docker/).
@@ -223,9 +223,7 @@ to have docker available. Docker installation can be found [here](https://docs.d
 Run linter/format script:
 
 ```bash
-make format
-
-make lint
+pre-commit run --all-files
 ```
 
 Run type checker:
diff --git a/GOVERNANCE.md b/GOVERNANCE.md
index 164b179bc44..f58acce66a6 100644
--- a/GOVERNANCE.md
+++ b/GOVERNANCE.md
@@ -1,11 +1,11 @@
 # BentoML Project Governance Document
 
-Welcome to the BentoML community. BentoML is an open, community-driven project for 
-machine learning model serving. 
+Welcome to the BentoML community. BentoML is an open, community-driven project for
+machine learning model serving.
 
-BentoML takes a meritocratic, consensus-based approach to its governance structure. 
+BentoML takes a meritocratic, consensus-based approach to its governance structure.
 Anyone with an interest in the project can join the community, contribute to the project
-and participate in the decision making process. And this is a living document aiming to 
+and participate in the decision making process. And this is a living document aiming to
 outline how that participation takes place.
 
 * Contribution guidelines: https://github.com/bentoml/BentoML/blob/main/CONTRIBUTING.md
@@ -19,8 +19,8 @@ outline how that participation takes place.
 
 #### Contributors
 
-Contributors are community members who contribute in concrete ways to the project. 
-Anyone can become a contributor, and contributions can take many forms, as detailed in 
+Contributors are community members who contribute in concrete ways to the project.
+Anyone can become a contributor, and contributions can take many forms, as detailed in
 the [BentoML Contribution Guidelines](https://github.com/bentoml/BentoML/blob/main/CONTRIBUTING.md).
 There is no expectation of commitment to the project, no specific skill requirements and
 no selection process.
@@ -28,24 +28,24 @@ no selection process.
 As a contributor, you may find yourself doing one or more of the following:
 
 * Support new users by answering questions on the
-    [github issue tracker](https://github.com/bentoml/BentoML/issues) and the 
+    [github issue tracker](https://github.com/bentoml/BentoML/issues) and the
     [#bentoml-users slack channel](https://join.slack.bentoml.org).
-* Report issues you're facing and "Thumbs up" on issues and feature requests that are 
+* Report issues you're facing and "Thumbs up" on issues and feature requests that are
     relevant to you in BentoML's [issue tracker](https://github.com/bentoml/BentoML/issues).
 * Investigate bugs and reviewing other developer's pull requests.
 * Contribute code or documentation to the project by submitting a Github pull request.
 * Create new example projects and contribute it to [bentoml/examples](https://github.com/bentoml/BentoML/tree/main/examples).
 
 
-Contributors engage with the project through the issue tracker and mailing list. They 
-submit code or documentation changes to the project via github pull request, which will 
-be reviewed by the Core Team members (see next section). 
+Contributors engage with the project through the issue tracker and mailing list. They
+submit code or documentation changes to the project via github pull request, which will
+be reviewed by the Core Team members (see next section).
 
-The [BentoML contributors mailing list](https://groups.google.com/forum/#!forum/bentoml) 
+The [BentoML contributors mailing list](https://groups.google.com/forum/#!forum/bentoml)
 is the most appropriate place to ask for help or get feedback when making contributions.
 
-As contributors gain experience and familiarity with the project, their profile within, 
-and commitment to, the community will increase. At some stage, they may find themselves 
+As contributors gain experience and familiarity with the project, their profile within,
+and commitment to, the community will increase. At some stage, they may find themselves
 being nominated for the BentoML Core Team.
 
 
@@ -53,7 +53,7 @@ being nominated for the BentoML Core Team.
 
 The BentoML Core Team consist of contributors who have produced contributions that are
 substantial in quality and quantity. The core team members has additional
-responsibilities over those of a contributor. These responsibilities ensure the smooth 
+responsibilities over those of a contributor. These responsibilities ensure the smooth
 running of the project:
 
 * Managing the BentoML code and documentation
@@ -62,17 +62,17 @@ running of the project:
 * Project governance decisions and changes
 * Coordination of core team, contributors and users
 * Managing the membership of the core team
-* Make final decisions when regular community discussion doesn’t produce consensus on 
+* Make final decisions when regular community discussion doesn’t produce consensus on
     an issue in a reasonable time frame.
 
 
 Members of the Core Team do not have significant authority over other members of the
-community, although it is the Core Team that votes on new Core Team members. It also 
-makes decisions when community consensus cannot be reached. 
+community, although it is the Core Team that votes on new Core Team members. It also
+makes decisions when community consensus cannot be reached.
 
-A contributor who shows an above-average level of contribution to the project, 
-particularly with respect to its strategic direction and long-term health, may be 
-nominated to become a member of the Core Team, by existing Core Team members. A 
+A contributor who shows an above-average level of contribution to the project,
+particularly with respect to its strategic direction and long-term health, may be
+nominated to become a member of the Core Team, by existing Core Team members. A
 nomination will result in discussion and then a vote by the existing Core Team members.
 Core Team membership votes are subject to consensus approval of the current Core Team
 members.
@@ -80,36 +80,36 @@ members.
 
 #### Benevolent dictator (project lead)
 
-The Project will have a BDFL (Benevolent Dictator for Life), who is currently Chaoyu 
-Yang. As Dictator, the BDFL has the authority to make all final decisions for The 
-Project. As Benevolent, the BDFL, in practice chooses to defer that authority to the 
-consensus of the community discussion channels and the Core Team. It is expected that 
+The Project will have a BDFL (Benevolent Dictator for Life), who is currently Chaoyu
+Yang. As Dictator, the BDFL has the authority to make all final decisions for The
+Project. As Benevolent, the BDFL, in practice chooses to defer that authority to the
+consensus of the community discussion channels and the Core Team. It is expected that
 the BDFL will only rarely assert his/her final authority. Because it is rarely used, we
 refer to BDFL’s final authority as a “special” or “overriding” vote. When it does occur,
-the BDFL override typically happens in situations where there is a deadlock in the Core 
-Team or if the Core Team asks the BDFL to make a decision on a specific matter. To 
+the BDFL override typically happens in situations where there is a deadlock in the Core
+Team or if the Core Team asks the BDFL to make a decision on a specific matter. To
 ensure the benevolence of the BDFL, The Project encourages others to fork The Project if
-they disagree with the overall direction the BDFL is taking. The BDFL is chair of the 
-Core Team and may delegate his/her authority on a particular decision or set of 
+they disagree with the overall direction the BDFL is taking. The BDFL is chair of the
+Core Team and may delegate his/her authority on a particular decision or set of
 decisions to any other Core Team Member at his/her discretion.
 
-The BDFL can appoint his/her successor, but it is expected that the Core Team would be 
-consulted on this decision. If the BDFL is unable to appoint a successor (e.g. due to 
+The BDFL can appoint his/her successor, but it is expected that the Core Team would be
+consulted on this decision. If the BDFL is unable to appoint a successor (e.g. due to
 death or illness), the Core Team will choose a successor by voting with at least 2/3 of
-the Core Team members voting in favor of the chosen successor. At least 80% of the Core 
+the Core Team members voting in favor of the chosen successor. At least 80% of the Core
 Team must participate in the vote.
 
 
 #### Conflict of interest
 
-It is expected that the BDFL and Core Team Members will be employed at a wide range of 
-companies, universities and non-profit organizations. Because of this, it is possible 
+It is expected that the BDFL and Core Team Members will be employed at a wide range of
+companies, universities and non-profit organizations. Because of this, it is possible
 that Members will have conflicts of interest. Such conflicts of interest include, but
 are not limited to:
 
 Financial interests, such as investments, employment or contracting work, outside of The
 Project that may influence their work on The Project. Access to proprietary information
-of their employer that could potentially leak into their work with the Project. All 
+of their employer that could potentially leak into their work with the Project. All
 members of the Core Team, BDFL included, shall disclose to the rest of the Core Team any
 conflict of interest they may have. Members with a conflict of interest in a particular
 issue may participate in Core Team discussions on that issue, but must recuse themselves
@@ -123,7 +123,7 @@ they will appoint a substitute BDFL for that decision.
 * Bozhao Yu - [@yubozhao](https://github.com/yubozhao), [🔗linkedin](https://www.linkedin.com/in/bozhaoyu/)
 * Chaoyu Yang (BDFL) - [@parano](https://github.com/parano), [🔗linkedin](https://www.linkedin.com/in/parano/)
 * Liang Sun - [@leonsim](https://github.com/leonsim), [🔗linkedin](https://www.linkedin.com/in/1e0ns/)
-* Sungjun Kim - [@withsmilo](https://github.com/withsmilo), [🔗linkedin](https://www.linkedin.com/in/smilo/) 
+* Sungjun Kim - [@withsmilo](https://github.com/withsmilo), [🔗linkedin](https://www.linkedin.com/in/smilo/)
 * TBD
 * TBD
 
@@ -131,23 +131,23 @@ they will appoint a substitute BDFL for that decision.
 ## Contribution Process
 
 Anyone can contribute to the project, regardless of their skills, as there are many ways
-to contribute. For instance, a contributor might be active on the project mailing list 
-and issue tracker, or might supply patches. The various ways of contributing are 
+to contribute. For instance, a contributor might be active on the project mailing list
+and issue tracker, or might supply patches. The various ways of contributing are
 described in more detail in the [Contribution Guidelines](https://github.com/bentoml/BentoML/blob/main/CONTRIBUTING.md).
 
-The [BentoML contributor mailing list](https://groups.google.com/forum/#!forum/bentoml) 
+The [BentoML contributor mailing list](https://groups.google.com/forum/#!forum/bentoml)
 is the most appropriate place to ask for help or get feedback when making contributions.
 
 
 ## Decision Making Process
 
-Decisions about the future of the project are made through discussion with all members 
-of the community, from the newest user to the most experienced PMC member. All 
-non-sensitive project management discussion takes place on the 
-[BentoML contributors mailing list](https://groups.google.com/forum/#!forum/bentoml). 
+Decisions about the future of the project are made through discussion with all members
+of the community, from the newest user to the most experienced PMC member. All
+non-sensitive project management discussion takes place on the
+[BentoML contributors mailing list](https://groups.google.com/forum/#!forum/bentoml).
 
-In order to ensure that the project is not bogged down by endless discussion and 
-continual voting, the project operates a policy of lazy consensus. This allows the 
+In order to ensure that the project is not bogged down by endless discussion and
+continual voting, the project operates a policy of lazy consensus. This allows the
 majority of decisions to be made without resorting to a formal vote.
 
 
@@ -161,36 +161,36 @@ Decision making typically involves the following steps:
 * Decision
 
 Any community member can make a proposal for consideration by the community. In order to
-initiate a discussion about a new idea, they should send an email to the BentoML 
-contributors’ list or create a github issue describing the idea. This will prompt a 
-review and, if necessary, a discussion of the idea. The goal of this review and 
-discussion is to gain approval for the contribution. Since most people in the project 
-community have a shared vision, there is often little need for discussion in order to 
+initiate a discussion about a new idea, they should send an email to the BentoML
+contributors’ list or create a github issue describing the idea. This will prompt a
+review and, if necessary, a discussion of the idea. The goal of this review and
+discussion is to gain approval for the contribution. Since most people in the project
+community have a shared vision, there is often little need for discussion in order to
 reach consensus.
 
-In general, as long as nobody explicitly opposes a proposal or patch, it is recognised 
+In general, as long as nobody explicitly opposes a proposal or patch, it is recognised
 as having the support of the community. This is called lazy consensus - that is, those
 who have not stated their opinion explicitly have implicitly agreed to the
 implementation of the proposal.
 
 Lazy consensus is a very important concept within the project. It is this process that
-allows a large group of people to efficiently reach consensus, as someone with no 
+allows a large group of people to efficiently reach consensus, as someone with no
 objections to a proposal need not spend time stating their position, and others need
 not spend time reading such mails.
 
 For lazy consensus to be effective, it is necessary to allow at least 72 hours before
-assuming that there are no objections to the proposal. This requirement ensures that 
-everyone is given enough time to read, digest and respond to the proposal. This time 
+assuming that there are no objections to the proposal. This requirement ensures that
+everyone is given enough time to read, digest and respond to the proposal. This time
 period is chosen so as to be as inclusive as possible of all participants, regardless
 of their location and time commitments.
 
 
 #### Voting
 
-Not all decisions can be made using lazy consensus. Issues such as those affecting the 
+Not all decisions can be made using lazy consensus. Issues such as those affecting the
 strategic direction or legal standing of the project must gain explicit approval from
-the Core Team, which will adopt the 
-[Apache Foundation voting process](https://www.apache.org/foundation/voting.html). 
+the Core Team, which will adopt the
+[Apache Foundation voting process](https://www.apache.org/foundation/voting.html).
 Every member of the community is encouraged to express their opinions in
 all discussion and all votes. However, only Core Team members have binding votes for the
 purposes of decision making.
diff --git a/Makefile b/Makefile
index 2b834e61dc4..5770cc0b4af 100644
--- a/Makefile
+++ b/Makefile
@@ -12,19 +12,13 @@ help: ## Show all Makefile targets
 .PHONY: format format-proto lint lint-proto type style clean
 format: ## Running code formatter: black and isort
 	@echo "(black) Formatting codebase..."
-	@black --config pyproject.toml src tests docs examples
-	@echo "(black) Formatting stubs..."
-	@find src -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \;
-	@echo "(isort) Reordering imports..."
-	@isort .
-	@echo "(ruff) Running fix only..."
-	@ruff check src examples tests --fix-only
+	@pre-commit run --all-files black
 format-proto: ## Running proto formatter: buf
 	@echo "Formatting proto files..."
 	docker run --init --rm --volume $(GIT_ROOT)/src:/workspace --workdir /workspace bufbuild/buf format --config "/workspace/bentoml/grpc/buf.yaml" -w bentoml/grpc
 lint: ## Running lint checker: ruff
 	@echo "(ruff) Linting development project..."
-	@ruff check src examples tests
+	@pre-commit run --all-files ruff
 lint-proto: ## Running proto lint checker: buf
 	@echo "Linting proto files..."
 	docker run --init --rm --volume $(GIT_ROOT)/src:/workspace --workdir /workspace bufbuild/buf lint --config "/workspace/bentoml/grpc/buf.yaml" --error-format msvs bentoml/grpc
diff --git a/SECURITY.md b/SECURITY.md
index 34ca4cb316d..800ea16955b 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -4,7 +4,7 @@
 
 BentoML is currently under active development and releases a new version
 every 2-3 weeks. We always recommend users to move to a newer version
-when it became available, and we only provide security updates in the 
+when it became available, and we only provide security updates in the
 latest version.
 
 If you are using an older version of BentoML and would like to receive
diff --git a/docs/README.md b/docs/README.md
index 249e424a27f..779c279b38f 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -7,13 +7,13 @@ http://docs.bentoml.org/ to read the full documentation.
 
 This guide is made for anyone who's interested in running BentoML documentation locally,
 making changes to it and make contributions. BentoML is made by the thriving community
-behind it, and you're always welcome to make contributions to the project and the 
-documentation. 
+behind it, and you're always welcome to make contributions to the project and the
+documentation.
 
-Before starting to make a contribution to the docs, make sure to check the 
-[issues page](https://github.com/bentoml/BentoML/issues) and the `#bentoml-contributors` 
-channel in the [community slack](https://l.bentoml.com/join-slack), to make sure no one 
-else is working on the same thing and to get feedback from the community for larger 
+Before starting to make a contribution to the docs, make sure to check the
+[issues page](https://github.com/bentoml/BentoML/issues) and the `#bentoml-contributors`
+channel in the [community slack](https://l.bentoml.com/join-slack), to make sure no one
+else is working on the same thing and to get feedback from the community for larger
 proposals.
 
 
@@ -49,7 +49,7 @@ python -m http.server 8000 -d docs/build/html
 And open your browser at http://0.0.0.0:8000/ to view the generated docs.
 
 
-#### Spellcheck 
+#### Spellcheck
 
 Install spellchecker dependencies:
 ```bash
@@ -63,12 +63,12 @@ make spellcheck-doc
 
 ##### Watch Docs
 
-We recommend using sphinx-autobuild during development, which provides a live-reloading 
-server, that rebuilds the documentation and refreshes any open pages automatically when 
-changes are saved. This enables a much shorter feedback loop which can help boost 
+We recommend using sphinx-autobuild during development, which provides a live-reloading
+server, that rebuilds the documentation and refreshes any open pages automatically when
+changes are saved. This enables a much shorter feedback loop which can help boost
 productivity when writing documentation.
 
-Simply run the following command from BentoML project's root directory: 
+Simply run the following command from BentoML project's root directory:
 
 ```bash
 sphinx-autobuild docs/source docs/build/html
@@ -90,7 +90,7 @@ BentoML docs is built with Sphinx, which natively supports [ReStructuredText](ht
 
 #### Document titles and section headers
 
-In reStructuredText, there are no heading levels assigned to certain characters as the 
+In reStructuredText, there are no heading levels assigned to certain characters as the
 structure is determined from the succession of headings. However in BentoML docs, we
 follow the following convention:
 
@@ -116,7 +116,7 @@ Top Level Headings
 
 When writing documentation, it is common to mention or link to other parts of the docs.
 
-If you need to refer to a specific documentation page, use `:doc:` plus path to the 
+If you need to refer to a specific documentation page, use `:doc:` plus path to the
 target documentation file under the `docs/source/`. e.g.:
 
 ```rst
@@ -153,7 +153,7 @@ A `note` section can be created with the following syntax:
    If you really want, you can even have lists, or code, or tables.
 ```
 
-There are other admonition types such as `caution`, `danger`, `hint`, `important`, 
+There are other admonition types such as `caution`, `danger`, `hint`, `important`,
 `seealso`, and `tip`. Learn more about it [here](https://pradyunsg.me/furo/reference/admonitions/).
 
 #### Code Blocks
@@ -204,10 +204,10 @@ https://sphinx-design.readthedocs.io/en/furo-theme/tabs.html
 BentoML docs relies heavily on the Python docstrings defined together with the source
 code. We ask our contributors to document every public facing APIs and CLIs, including
 their signatures, options, and example usage. Sphinx can then use these inline docs to
-generate API References pages. 
+generate API References pages.
 
 BentoML uses the [sphinx.ext.autodoc](https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html)
-extension to include documentation from docstring. For example, a `.rst` document can 
+extension to include documentation from docstring. For example, a `.rst` document can
 create a section made from a Python Class's docstring, using the following syntax:
 
 ```rst
diff --git a/docs/source/concepts/bento.rst b/docs/source/concepts/bento.rst
index ea9845c2823..fe9eba47d49 100644
--- a/docs/source/concepts/bento.rst
+++ b/docs/source/concepts/bento.rst
@@ -49,7 +49,7 @@ with a ``bentofile.yaml`` build file. Here's an example from the
     Building BentoML service "iris_classifier:dpijemevl6nlhlg6" from build context "/home/user/gallery/quickstart"
     Packing model "iris_clf:zy3dfgxzqkjrlgxi"
     Locking PyPI package versions..
- 
+
     ██████╗░███████╗███╗░░██╗████████╗░█████╗░███╗░░░███╗██╗░░░░░
     ██╔══██╗██╔════╝████╗░██║╚══██╔══╝██╔══██╗████╗░████║██║░░░░░
     ██████╦╝█████╗░░██╔██╗██║░░░██║░░░██║░░██║██╔████╔██║██║░░░░░
@@ -364,7 +364,7 @@ Service
 ^^^^^^^
 
 ``service`` is a **required** field which specifies where the
-``bentoml.Service`` object is defined. 
+``bentoml.Service`` object is defined.
 
 In the :doc:`tutorial </tutorial>`, we defined ``service: "service:svc"``, which can be
 interpreted as:
diff --git a/docs/source/concepts/deploy.rst b/docs/source/concepts/deploy.rst
index 91195eff0f1..ef80c9c9953 100644
--- a/docs/source/concepts/deploy.rst
+++ b/docs/source/concepts/deploy.rst
@@ -7,7 +7,7 @@ Deployment Overview
 -------------------
 
 BentoML is designed to provide a unified packaging format, for deploying AI applications
-via a wide range of serving patterns, including real-time inference API, offline batch inference, 
+via a wide range of serving patterns, including real-time inference API, offline batch inference,
 streaming inference, and custom integrations.
 
 For online API use cases, here are the three most common cloud deployment solutions:
@@ -15,7 +15,7 @@ For online API use cases, here are the three most common cloud deployment soluti
 * `☁️ Deploy to BentoCloud <https://www.bentoml.com/>`_
   - Serverless cloud for AI, the best place to deploy and operate BentoML for AI teams. `Sign up here <https://www.bentoml.com/bento-cloud/>`_ for early access.
 * `🦄️ Deploy on Kubernetes with Yatai <https://github.com/bentoml/Yatai>`_
-  - Cloud-native AI deployment on Kubernetes, comes with advanced auto-scaling 
+  - Cloud-native AI deployment on Kubernetes, comes with advanced auto-scaling
   and CI/CD workflows. Requires professional DevOps team to maintain and operate.
 * `🚀 Fast Cloud Deployment with BentoCTL <https://github.com/bentoml/bentoctl>`_
   - Great for proof-of-concept deployments directly running on public cloud services (EC2, ECS, SageMaker, Lambda, GCP, etc).
@@ -30,7 +30,7 @@ Feature comparison across deployment options:
    * - Feature
      - `🍱 BentoCloud <https://www.bentoml.com/>`_
      - `Yatai on Kubernetes <https://github.com/bentoml/Yatai>`_
-     - Cloud Deployment with `BentoCTL <https://github.com/bentoml/bentoctl>`_ 
+     - Cloud Deployment with `BentoCTL <https://github.com/bentoml/bentoctl>`_
    * - Auto-scaling
      - ✅ Fast auto-scaling optimized for AI
      - ✅ Kubernetes-native with custom metrics
@@ -61,7 +61,7 @@ Feature comparison across deployment options:
      - ✅ Native Terraform integration, easily customizable
    * - Access control
      - ✅ Flexible API token management and Role-based access control
-     - Inherits Kubernetes' account and RBAC mechanism, no model/bento/endpoint level access control 
+     - Inherits Kubernetes' account and RBAC mechanism, no model/bento/endpoint level access control
      - No access control besides basic cloud platform permissions such as creating/deleting resources
 
 
@@ -304,4 +304,4 @@ Deploy to BentoCloud
 --------------------
 
 `BentoCloud <https://www.bentoml.com>`_ is currently under private beta. Please contact
-us by scheduling a demo request `here <https://www.bentoml.com/bento-cloud/>`_.
\ No newline at end of file
+us by scheduling a demo request `here <https://www.bentoml.com/bento-cloud/>`_.
diff --git a/docs/source/concepts/index.rst b/docs/source/concepts/index.rst
index d6dfab23a9a..282cc09a99f 100644
--- a/docs/source/concepts/index.rst
+++ b/docs/source/concepts/index.rst
@@ -51,4 +51,3 @@ This is a step-by-step tour that help you dive into the main concepts in BentoML
     bento
     runner
     deploy
-
diff --git a/docs/source/concepts/service.rst b/docs/source/concepts/service.rst
index 9f07180c710..7cacd12a588 100644
--- a/docs/source/concepts/service.rst
+++ b/docs/source/concepts/service.rst
@@ -100,7 +100,7 @@ example, to debug a service called ``svc`` in ``service.py``:
 Service APIs
 ------------
 
-Inference APIs define how the service functionality can be called remotely. A service can 
+Inference APIs define how the service functionality can be called remotely. A service can
 have one or more APIs. An API consists of its input/output specs and a callback function:
 
 .. code-block:: python
@@ -116,11 +116,11 @@ have one or more APIs. An API consists of its input/output specs and a callback
 
 By decorating a function with ``@svc.api``, we declare that the function shall be
 invoked when this API is called. The API function is a great place for defining your
-serving logic, such as feature fetching, pre and post processing, and model inferences 
+serving logic, such as feature fetching, pre and post processing, and model inferences
 via Runners.
 
 When running ``bentoml serve`` with the example above, this API function is
-transformed into an HTTP endpoint, ``/predict``, that takes in a ``np.ndarray`` as 
+transformed into an HTTP endpoint, ``/predict``, that takes in a ``np.ndarray`` as
 input, and returns a ``np.ndarray`` as output. The endpoint can be called with the following
 ``curl`` command:
 
@@ -161,7 +161,7 @@ Inference Context
 ^^^^^^^^^^^^^^^^^
 
 The context of an inference call can be accessed through the additional ``bentoml.Context``
-argument added to the service API function. Both the request and response contexts can be 
+argument added to the service API function. Both the request and response contexts can be
 accessed through the inference context for getting and setting the headers, cookies, and
 status codes.
 
@@ -177,7 +177,7 @@ status codes.
 
         result = runner.run(input_array)
 
-        # set response headers, cookies, and status code 
+        # set response headers, cookies, and status code
         ctx.response.status_code = 202
         ctx.response.cookies = [
             bentoml.Cookie(
@@ -230,18 +230,18 @@ declare and invoke these descriptors please see the
 Schema and Validation
 ^^^^^^^^^^^^^^^^^^^^^
 
-IO descriptors allow users to define the expected data types, shape, and schema, based 
-on the type of the input and output descriptor specified. IO descriptors can also be defined 
-through  examples with the ``from_sample`` API to simplify the development of service 
+IO descriptors allow users to define the expected data types, shape, and schema, based
+on the type of the input and output descriptor specified. IO descriptors can also be defined
+through  examples with the ``from_sample`` API to simplify the development of service
 definitions.
 
 Numpy
 ~~~~~
 
-The data type and shape of the ``NumpyNdarray`` can be specified with the ``dtype`` 
-and ``shape`` arguments. By setting the ``enforce_shape`` and ``enforce_dtype`` 
-arguments to `True`, the IO descriptor will strictly validate the input and output data 
-based the specified data type and shape. To learn more, see IO descrptor reference for 
+The data type and shape of the ``NumpyNdarray`` can be specified with the ``dtype``
+and ``shape`` arguments. By setting the ``enforce_shape`` and ``enforce_dtype``
+arguments to `True`, the IO descriptor will strictly validate the input and output data
+based the specified data type and shape. To learn more, see IO descrptor reference for
 :ref:`reference/api_io_descriptors:NumPy ``ndarray```.
 
 .. code-block:: python
@@ -270,10 +270,10 @@ based the specified data type and shape. To learn more, see IO descrptor referen
 Pandas DataFrame
 ~~~~~~~~~~~~~~~~
 
-The data type and shape of the ``PandasDataFrame`` can be specified with the ``dtype`` 
-and ``shape`` arguments. By setting the ``enforce_shape`` and ``enforce_dtype`` 
-arguments to `True`, the IO descriptor will strictly validate the input and output data 
-based the specified data type and shape. To learn more, see IO descrptor reference for 
+The data type and shape of the ``PandasDataFrame`` can be specified with the ``dtype``
+and ``shape`` arguments. By setting the ``enforce_shape`` and ``enforce_dtype``
+arguments to `True`, the IO descriptor will strictly validate the input and output data
+based the specified data type and shape. To learn more, see IO descrptor reference for
 :ref:`reference/api_io_descriptors:Tabular Data with Pandas`.
 
 .. code-block:: python
@@ -303,7 +303,7 @@ based the specified data type and shape. To learn more, see IO descrptor referen
 JSON
 ~~~~
 
-The data type of a JSON IO descriptor can be specified through a Pydantic model. By setting 
+The data type of a JSON IO descriptor can be specified through a Pydantic model. By setting
 a pydantic model, the IO descriptor will validate the input based on the specified pydantic
 model and return. To learn more, see IO descrptor reference for
 :ref:`reference/api_io_descriptors:Structured Data with JSON`. We also provide
@@ -476,6 +476,3 @@ Further tuning of event loop configuration is not needed under common use cases.
     Exception handling
         custom error code
         custom error msg
-
-
-
diff --git a/docs/source/frameworks/catboost.rst b/docs/source/frameworks/catboost.rst
index b849b8fca9b..7eb2b17ed7e 100644
--- a/docs/source/frameworks/catboost.rst
+++ b/docs/source/frameworks/catboost.rst
@@ -153,7 +153,7 @@ access:
          resources:
              nvidia.com/gpu: 0
 
-Adaptive batching 
+Adaptive batching
 ~~~~~~~~~~~~~~~~~
 
 .. seealso::
diff --git a/docs/source/frameworks/detectron.rst b/docs/source/frameworks/detectron.rst
index 02b66497ac7..3389de2914b 100644
--- a/docs/source/frameworks/detectron.rst
+++ b/docs/source/frameworks/detectron.rst
@@ -2,8 +2,8 @@
 Detectron2
 ==========
 
-`Detectron2 <https://github.com/facebookresearch/detectron2>`_ is Facebook AI Research's (FAIR) next generation library 
-that provides state-of-the-art detection and segmentation algorithms. It is the successor of Detectron and maskrcnn-benchmark. 
+`Detectron2 <https://github.com/facebookresearch/detectron2>`_ is Facebook AI Research's (FAIR) next generation library
+that provides state-of-the-art detection and segmentation algorithms. It is the successor of Detectron and maskrcnn-benchmark.
 It supports a number of computer vision research projects and production applications in Facebook.
 
 This guide will provide an overview of how to save and load Detectron2 models with BentoML.
@@ -75,8 +75,8 @@ BentoML also supports saving this predictor with ``bentoml.detectron.save_model`
 
 .. note::
 
-    :bdg-info:`Remarks:` External python classes or utility functions required by the Detectron models/custom models 
-    must be referenced in ``<module>.<class>`` format, and such modules should be passed to ``bentoml.detectron.save_model`` via ``external_modules``. 
+    :bdg-info:`Remarks:` External python classes or utility functions required by the Detectron models/custom models
+    must be referenced in ``<module>.<class>`` format, and such modules should be passed to ``bentoml.detectron.save_model`` via ``external_modules``.
 
     For example:
 
@@ -90,7 +90,7 @@ BentoML also supports saving this predictor with ``bentoml.detectron.save_model`
 
     This is due to a limitation from PyTorch model serialisation, where PyTorch requires the model's source code to restore it.
 
-The signatures used for creating a Runner is ``{"__call__": {"batchable": False}}``. This means by default, BentoML’s `Adaptive Batching <guides/batching:Adaptive Batching>`_ is disabled when using :obj:`~bentoml.pytorch.save_model()`. If you want to utilize adaptive batching behavior and know your model's dynamic batching dimension, make sure to pass in ``signatures`` as follow: 
+The signatures used for creating a Runner is ``{"__call__": {"batchable": False}}``. This means by default, BentoML’s `Adaptive Batching <guides/batching:Adaptive Batching>`_ is disabled when using :obj:`~bentoml.pytorch.save_model()`. If you want to utilize adaptive batching behavior and know your model's dynamic batching dimension, make sure to pass in ``signatures`` as follow:
 
 .. code-block:: python
 
@@ -138,8 +138,8 @@ Create a BentoML service with the previously saved ``coco-masked-rcnn-predictor`
 Adaptive Batching
 -----------------
 
-Most Detectron models can accept batched data as input. If batched interence is supported, it is recommended to enable batching to take advantage of 
-the adaptive batching capability to improve the throughput and efficiency of the model. Enable adaptive batching by overriding the :code:`signatures` 
+Most Detectron models can accept batched data as input. If batched interence is supported, it is recommended to enable batching to take advantage of
+the adaptive batching capability to improve the throughput and efficiency of the model. Enable adaptive batching by overriding the :code:`signatures`
 argument with the method name and providing :code:`batchable` and :code:`batch_dim` configurations when saving the model to the model store.
 
 .. seealso::
diff --git a/docs/source/frameworks/fastai.rst b/docs/source/frameworks/fastai.rst
index f857eaa5ced..8aac138d91b 100644
--- a/docs/source/frameworks/fastai.rst
+++ b/docs/source/frameworks/fastai.rst
@@ -7,10 +7,10 @@ for researchers to build new approaches. To learn more about fastai, visit their
 
 BentoML provides native support for `fastai <https://github.com/fastai/fastai>`_, and this guide provides an overview of how to use BentoML with fastai.
 
-Compatibility 
+Compatibility
 -------------
 
-BentoML requires fastai **version 2** or higher to be installed. 
+BentoML requires fastai **version 2** or higher to be installed.
 
 BentoML does not support fastai version 1. If you are using fastai version 1, consider using :ref:`concepts/runner:Custom Runner`.
 
@@ -141,8 +141,8 @@ Using Runners
    See :ref:`concepts/runner:Using Runners` doc for a general introduction to the Runner concept and its usage.
 
 
-``runner.predict.run`` is generally a drop-in replacement for ``learner.predict`` regardless of the learner type 
-for executing the prediction in the model runner. A fastai runner will receive the same inputs type as 
+``runner.predict.run`` is generally a drop-in replacement for ``learner.predict`` regardless of the learner type
+for executing the prediction in the model runner. A fastai runner will receive the same inputs type as
 the given learner.
 
 
@@ -181,7 +181,7 @@ See `mixed precision <https://docs.fast.ai/callback.fp16.html>`_ to learn more a
 
 If you need to use GPU for inference, you can :ref:`use the PyTorch layer <frameworks/fastai:Using PyTorch layer>`.
 
-Adaptive batching 
+Adaptive batching
 ~~~~~~~~~~~~~~~~~
 
 fastai's ``Learner#predict`` does not support taking batch input for inference, hence
diff --git a/docs/source/frameworks/keras.rst b/docs/source/frameworks/keras.rst
index c3ee5bd54ad..c1ef8604124 100644
--- a/docs/source/frameworks/keras.rst
+++ b/docs/source/frameworks/keras.rst
@@ -68,7 +68,7 @@ to save the model instance to BentoML model store.
    bentoml.keras.save_model("keras_resnet50", model)
 
 
-Keras model can be loaded with :obj:`~bentoml.keras.load_model` to 
+Keras model can be loaded with :obj:`~bentoml.keras.load_model` to
 verify that the saved model can be loaded properly.
 
 .. code-block:: python
@@ -83,11 +83,11 @@ Building a Service using Keras
 
 .. seealso::
 
-   See :ref:`Building a Service <concepts/service:Service and APIs>` for more 
+   See :ref:`Building a Service <concepts/service:Service and APIs>` for more
    information on creating a prediction service with BentoML.
 
-The following service example creates a ``predict`` API endpoint that accepts an image as input 
-and return JSON data as output. Within the API function, Keras model runner created from the 
+The following service example creates a ``predict`` API endpoint that accepts an image as input
+and return JSON data as output. Within the API function, Keras model runner created from the
 previously saved ResNet50 model is used for inference.
 
 .. code-block:: python
diff --git a/docs/source/frameworks/lightgbm.rst b/docs/source/frameworks/lightgbm.rst
index 28aa3030096..35aae1b463b 100644
--- a/docs/source/frameworks/lightgbm.rst
+++ b/docs/source/frameworks/lightgbm.rst
@@ -56,5 +56,3 @@ Users can now use LightGBM with BentoML with the following API: :code:`load_mode
 .. note::
 
    You can find more examples for **LightGBM** in our `bentoml/examples <https://github.com/bentoml/BentoML/tree/main/examples>`_ repo.
-
-
diff --git a/docs/source/frameworks/pytorch.rst b/docs/source/frameworks/pytorch.rst
index 03a910fc328..d6eb1be004b 100644
--- a/docs/source/frameworks/pytorch.rst
+++ b/docs/source/frameworks/pytorch.rst
@@ -156,7 +156,7 @@ For common PyTorch models with single input:
     :code:`bentoml.pytorch.save_model` has parameter ``signatures``.
     The ``signatures`` argument of type :ref:`Model Signatures <concepts/model:Model Signatures>` in :obj:`bentoml.pytorch.save_model` is used to determine which methods will be used for inference and exposed in the Runner. The signatures dictionary will then be used during the creation process of a Runner instance.
 
-The signatures used for creating a Runner is ``{"__call__": {"batchable": False}}``. This means by default, BentoML’s `Adaptive Batching <guides/batching:Adaptive Batching>`_ is disabled when using :obj:`~bentoml.pytorch.save_model()`. If you want to utilize adaptive batching behavior and know your model's dynamic batching dimension, make sure to pass in ``signatures`` as follow: 
+The signatures used for creating a Runner is ``{"__call__": {"batchable": False}}``. This means by default, BentoML’s `Adaptive Batching <guides/batching:Adaptive Batching>`_ is disabled when using :obj:`~bentoml.pytorch.save_model()`. If you want to utilize adaptive batching behavior and know your model's dynamic batching dimension, make sure to pass in ``signatures`` as follow:
 
 
 
@@ -193,8 +193,8 @@ Create a BentoML service with the previously saved `my_torch_model` pipeline usi
 Adaptive Batching
 -----------------
 
-Most PyTorch models can accept batched data as input. If batched interence is supported, it is recommended to enable batching to take advantage of 
-the adaptive batching capability to improve the throughput and efficiency of the model. Enable adaptive batching by overriding the :code:`signatures` 
+Most PyTorch models can accept batched data as input. If batched interence is supported, it is recommended to enable batching to take advantage of
+the adaptive batching capability to improve the throughput and efficiency of the model. Enable adaptive batching by overriding the :code:`signatures`
 argument with the method name and providing :code:`batchable` and :code:`batch_dim` configurations when saving the model to the model store.
 
 .. seealso::
diff --git a/docs/source/frameworks/pytorch_lightning.rst b/docs/source/frameworks/pytorch_lightning.rst
index 61f3c1d4000..7ba3c1f6480 100644
--- a/docs/source/frameworks/pytorch_lightning.rst
+++ b/docs/source/frameworks/pytorch_lightning.rst
@@ -40,4 +40,3 @@ Here's a simple example of using PyTorch Lightning with BentoML:
 .. autofunction:: bentoml.pytorch_lightning.load_model
 
 .. autofunction:: bentoml.pytorch_lightning.get
-
diff --git a/docs/source/frameworks/tensorflow.rst b/docs/source/frameworks/tensorflow.rst
index a55cbebf4c9..0414d71356d 100644
--- a/docs/source/frameworks/tensorflow.rst
+++ b/docs/source/frameworks/tensorflow.rst
@@ -2,13 +2,13 @@
 TensorFlow
 ==========
 
-TensorFlow is an open source machine learning library focusing on deep neural networks. BentoML provides native support for 
+TensorFlow is an open source machine learning library focusing on deep neural networks. BentoML provides native support for
 serving and deploying models trained from TensorFlow.
 
 Preface
 -------
 
-Even though ``bentoml.tensorflow`` supports Keras model, we recommend our users to use :doc:`bentoml.keras </frameworks/keras>` for better development experience. 
+Even though ``bentoml.tensorflow`` supports Keras model, we recommend our users to use :doc:`bentoml.keras </frameworks/keras>` for better development experience.
 
 If you must use TensorFlow for your Keras model, make sure that your Keras model inference callback (such as ``predict``) is decorated with :obj:`~tf.function`.
 
@@ -148,7 +148,7 @@ Saving a Trained Model
         model.fit(train_x, train_y, epochs=10)
 
         bentoml.tensorflow.save_model(
-            "my_keras_model", 
+            "my_keras_model",
             model,
             signatures={"__call__": {"batchable": True, "batch_dim": 0}}
         )
@@ -195,7 +195,7 @@ Saving a Trained Model
 
 This means BentoML’s :ref:`Adaptive Batching <guides/batching:Adaptive Batching>` is disabled when using :obj:`~bentoml.tensorflow.save_model()`.
 
-If you want to utilize adaptive batching behavior and know your model's dynamic batching dimension, make sure to pass in ``signatures`` as follow: 
+If you want to utilize adaptive batching behavior and know your model's dynamic batching dimension, make sure to pass in ``signatures`` as follow:
 
 
 .. code-block:: python
diff --git a/docs/source/frameworks/transformers.rst b/docs/source/frameworks/transformers.rst
index 61690dfe368..99872e68420 100644
--- a/docs/source/frameworks/transformers.rst
+++ b/docs/source/frameworks/transformers.rst
@@ -5,16 +5,16 @@ Transformers
 `🤗 Transformers <https://huggingface.co/docs/transformers/main/en/index>`_ is a popular open-source library for natural language processing,
 providing pre-trained models and tools for building, training, and deploying custom language models. It offers support for a wide
 range of transformer-based architectures, access to pre-trained models for various NLP tasks, and the ability to fine-tune pre-trained models on
-specific tasks. BentoML provides native support for serving and deploying models trained from 
+specific tasks. BentoML provides native support for serving and deploying models trained from
 Transformers.
 
-Compatibility 
+Compatibility
 -------------
 
-BentoML requires Transformers version 4 or above. For other versions of Transformers, consider using a 
+BentoML requires Transformers version 4 or above. For other versions of Transformers, consider using a
 :ref:`concepts/runner:Custom Runner`.
 
-When constructing a :ref:`bentofile.yaml <concepts/bento:Bento Build Options>`, include ``transformers`` and the machine learning 
+When constructing a :ref:`bentofile.yaml <concepts/bento:Bento Build Options>`, include ``transformers`` and the machine learning
 framework of the model, e.g. ``pytorch``, ``tensorflow``, or ``jax``.
 
 .. tab-set::
@@ -58,7 +58,7 @@ Transformers provides pre-trained models for a wide range of tasks, including te
 and text generation. The pre-trained models have been trained on large amounts of data and are designed to be fine-tuned on specific downstream
 tasks. Fine-tuning pretrained models is a highly effective practice that enables users to reduce computation costs while adapting state-of-the-art
 models to their specific domain dataset. To facilitate this process, Transformers provides a diverse range of libraries specifically designed for
-fine-tuning pretrained models. To learn more, refer to the Transformers guide on 
+fine-tuning pretrained models. To learn more, refer to the Transformers guide on
 `fine-tuning pretrained models <https://huggingface.co/docs/transformers/main/en/training>`_.
 
 .. tip::
@@ -214,7 +214,7 @@ the ``to_runner`` method.
 Custom Pipelines
 ----------------
 
-Transformers custom pipelines allow users to define their own pre and post-process logic and customize how input data is forwarded to 
+Transformers custom pipelines allow users to define their own pre and post-process logic and customize how input data is forwarded to
 the model for inference.
 
 .. seealso::
@@ -223,7 +223,7 @@ the model for inference.
 
 .. code-block:: python
     :caption: `train.py`
-    
+
     from transformers import Pipeline
 
     class MyClassificationPipeline(Pipeline):
@@ -247,12 +247,12 @@ the model for inference.
 Saving a Custom Pipeline
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-A custom pipeline first needs to be added to the Transformers supported tasks, :code:`SUPPORTED_TASKS` before it can be created with 
+A custom pipeline first needs to be added to the Transformers supported tasks, :code:`SUPPORTED_TASKS` before it can be created with
 the Transformers :code:`pipeline` API.
 
 .. code-block:: python
     :caption: `train.py`
-    
+
     from transformers import pipeline
     from transformers import AutoTokenizer
     from transformers import AutoModelForSequenceClassification
@@ -278,13 +278,13 @@ the Transformers :code:`pipeline` API.
         ),
     )
 
-Once a new pipeline is added to the Transformers supported tasks, it can be saved to the BentoML model store with the additional 
-arguments of :code:`task_name` and :code:`task_definition`, the same arguments that were added to the Transformers :code:`SUPPORTED_TASKS` 
+Once a new pipeline is added to the Transformers supported tasks, it can be saved to the BentoML model store with the additional
+arguments of :code:`task_name` and :code:`task_definition`, the same arguments that were added to the Transformers :code:`SUPPORTED_TASKS`
 when creating the pipeline. :code:`task_name` and :code:`task_definition` will be saved as model options alongside the model.
 
 .. code-block:: python
    :caption: `train.py`
-    
+
     import bentoml
 
     bentoml.transformers.save_model(
@@ -297,12 +297,12 @@ when creating the pipeline. :code:`task_name` and :code:`task_definition` will b
 Serving a Custom Pipeline
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-To serve a custom pipeline, simply create a runner and service with the previously saved pipeline. :code:`task_name` and 
+To serve a custom pipeline, simply create a runner and service with the previously saved pipeline. :code:`task_name` and
 :code:`task_definition` will be automatically applied when initializing the runner.
 
 .. code-block:: python
     :caption: `service.py`
-    
+
     import bentoml
 
     from bentoml.io import Text, JSON
@@ -318,9 +318,9 @@ To serve a custom pipeline, simply create a runner and service with the previous
 Adaptive Batching
 -----------------
 
-If the model supports batched interence, it is recommended to enable batching to take advantage of the adaptive batching capability 
-in BentoML by overriding the :code:`signatures` argument with the method name (:code:`__call__`), :code:`batchable`, and :code:`batch_dim` 
-configurations when saving the model to the model store . 
+If the model supports batched interence, it is recommended to enable batching to take advantage of the adaptive batching capability
+in BentoML by overriding the :code:`signatures` argument with the method name (:code:`__call__`), :code:`batchable`, and :code:`batch_dim`
+configurations when saving the model to the model store .
 
 .. seealso::
 
diff --git a/docs/source/guides/ci.rst b/docs/source/guides/ci.rst
index 0526d662704..8d16cd1e43b 100644
--- a/docs/source/guides/ci.rst
+++ b/docs/source/guides/ci.rst
@@ -18,4 +18,3 @@ with BentoML.
     `BentoML development guide <https://github.com/bentoml/BentoML/blob/main/DEVELOPMENT.md>`_
     and `documentation guide <https://github.com/bentoml/BentoML/blob/main/docs/README.md>`_
     to get started.
-
diff --git a/docs/source/guides/containerization.rst b/docs/source/guides/containerization.rst
index 884ff4ca4aa..507417e2366 100644
--- a/docs/source/guides/containerization.rst
+++ b/docs/source/guides/containerization.rst
@@ -4,7 +4,7 @@ Advanced Containerization
 
 *time expected: 12 minutes*
 
-This guide describes advanced containerization options 
+This guide describes advanced containerization options
 provided by BentoML:
 
 - :ref:`Using base image <guides/containerization:Custom Base Image>`
@@ -156,7 +156,7 @@ Proceed to build your Bento with :code:`bentoml build` and containerize with :co
 
    bentoml containerize <bento>:<tag>
 
-.. tip:: 
+.. tip::
 
    You can also provide :code:`--progress plain` to see the progress from
    `buildkit <https://github.com/moby/buildkit>`_ in plain text
@@ -175,7 +175,7 @@ We will now demonstrate how to provide AWS credentials to a Bento via two approa
 
 .. note::
 
-   :bdg-info:`Remarks:` We recommend for most cases 
+   :bdg-info:`Remarks:` We recommend for most cases
    to use the second option (:ref:`guides/containerization:Mount credentials from host`)
    as it prevents any securities leak.
 
@@ -208,7 +208,7 @@ Define the following :code:`Dockerfile.template`:
 
    ARG AWS_SECRET_ACCESS_KEY
    ARG AWS_ACCESS_KEY_ID
-   
+
    ENV AWS_SECRET_ACCESS_KEY=$ARG AWS_SECRET_ACCESS_KEY
    ENV AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID
    {% endblock %}
@@ -238,7 +238,7 @@ Define the following :code:`Dockerfile.template`:
    {% extends bento_base_template %}
    {% block SETUP_BENTO_COMPONENTS %}
    {{ super() }}
-   
+
    RUN --mount=type=secret,id=aws,target=/root/.aws/credentials \
         aws s3 cp s3://path/to/file {{ bento__path }}
 
@@ -266,7 +266,7 @@ structure a :code:`Dockerfile.template`.
 
 The Dockerfile template is a mix between :code:`Jinja2` syntax and :code:`Dockerfile`
 syntax. BentoML set both `trim_blocks` and `lstrip_blocks` in Jinja
-templates environment to :code:`True`. 
+templates environment to :code:`True`.
 
 .. note::
 
@@ -337,14 +337,14 @@ To construct a custom :code:`Dockerfile` template, users have to provide an `ext
 
    This is an expected behaviour from Jinja2, where Jinja2 accepts **any file** as a template.
 
-   We decided not to put any restrictions to validate the template file, simply because we want to enable 
-   users to customize to their own needs. 
+   We decided not to put any restrictions to validate the template file, simply because we want to enable
+   users to customize to their own needs.
 
 :code:`{{ super() }}`
 ^^^^^^^^^^^^^^^^^^^^^
 
 As you can notice throughout this guides, we use a special function :code:`{{ super() }}`. This is a Jinja
-features that allow users to call content of `parent block <https://jinja.palletsprojects.com/en/3.1.x/templates/#super-blocks>`_. This 
+features that allow users to call content of `parent block <https://jinja.palletsprojects.com/en/3.1.x/templates/#super-blocks>`_. This
 enables users to fully extend base templates provided by BentoML to ensure that
 the result Bentos can be containerized.
 
@@ -451,7 +451,7 @@ By default, a Bento sets:
 
     CMD ["bentoml", "serve", "{{ bento__path }}"]
 
-This aboved instructions ensure that whenever :code:`docker run` is invoked on the 🍱 container, :code:`bentoml` is called correctly. 
+This aboved instructions ensure that whenever :code:`docker run` is invoked on the 🍱 container, :code:`bentoml` is called correctly.
 
 In scenarios where one needs to setup a custom :code:`ENTRYPOINT`, make sure to use
 the :code:`ENTRYPOINT` instruction under the :code:`SETUP_BENTO_ENTRYPOINT` block as follows:
@@ -536,7 +536,7 @@ To use any of the aforementioned backends, they must be installed on your system
 
 .. note::
 
-   By default, BentoML will use Docker as the container backend. 
+   By default, BentoML will use Docker as the container backend.
    To use other container engines, please set the environment variable ``BENTOML_CONTAINERIZE_BACKEND`` or
    pass in ``--backend`` to :ref:`bentoml containerize <reference/cli:containerize>`:
 
@@ -615,7 +615,7 @@ To register a new backend, use :meth:`bentoml.container.register_backend`:
 BuildKit interop
 ^^^^^^^^^^^^^^^^
 
-BentoML leverages `BuildKit <https://github.com/moby/buildkit>`_ for a more extensive feature set. However, we recognise that  
+BentoML leverages `BuildKit <https://github.com/moby/buildkit>`_ for a more extensive feature set. However, we recognise that
 BuildKit has come with a lot of friction for migration purposes as well as restrictions to use with other build tools (such as podman, buildah, kaniko).
 
 Therefore, since BentoML version 1.0.11, BuildKit will be an opt-out. To disable BuildKit, pass ``DOCKER_BUILDKIT=0`` to
diff --git a/docs/source/guides/grpc.rst b/docs/source/guides/grpc.rst
index a292508fb1e..8adf3c9b21b 100644
--- a/docs/source/guides/grpc.rst
+++ b/docs/source/guides/grpc.rst
@@ -48,7 +48,7 @@ Using your gRPC BentoService
 
 There are two ways to interact with your gRPC BentoService:
 
-1. Use tools such as :github:`fullstorydev/grpcurl`, :github:`fullstorydev/grpcui`: 
+1. Use tools such as :github:`fullstorydev/grpcurl`, :github:`fullstorydev/grpcui`:
    The server requires :github:`reflection <grpc/grpc/blob/master/doc/server-reflection.md>` to be enabled for those tools to work.
    Pass in ``--enable-reflection`` to enable reflection:
 
@@ -789,7 +789,7 @@ As you can see, BentoService defines a `simple rpc` ``Call`` that sends a ``Requ
 
 A ``Request`` message takes in:
 
-* `api_name`: the name of the API function defined inside your BentoService. 
+* `api_name`: the name of the API function defined inside your BentoService.
 * `oneof <https://developers.google.com/protocol-buffers/docs/proto3#oneof>`_ `content`: the field can be one of the following types:
 
 +------------------------------------------------------------------+-------------------------------------------------------------------------------------------+
@@ -1493,7 +1493,7 @@ on a connection at one time.
    application will higher load and long running streams could see a performance degradation caused by queuing because of the limit.
 
    Setting a limit cap on the number of concurrent streams will prevent this from happening, but it also means that
-   you need to tune the limit cap to the right number. 
+   you need to tune the limit cap to the right number.
 
    * If the limit cap is too low, you will sooner or later running into the issue mentioned above.
 
@@ -1542,4 +1542,3 @@ message.
 
 
 We recommend you to also check out `gRPC performance best practice <https://grpc.io/docs/guides/performance/>`_ to learn about best practice for gRPC.
-
diff --git a/docs/source/guides/logging.rst b/docs/source/guides/logging.rst
index ed2cbd94316..4aa38cb0125 100644
--- a/docs/source/guides/logging.rst
+++ b/docs/source/guides/logging.rst
@@ -13,7 +13,7 @@ webservices are logged along with requests to each of the model runner services.
 The request log format is as follows:
 
 .. parsed-literal::
- 
+
     time [LEVEL] [component] ClientIP:ClientPort (scheme,method,path,type,length) (status,type,length) Latency (trace,span,sampled)
 
 For example, a log message might look like:
@@ -144,4 +144,3 @@ When using BentoML as a library, BentoML does not configure any logs. By default
 .. [#span_documentation] `OpenTelemetry Span Documentation <https://opentelemetry.lightstep.com/spans/>`_
 
 .. [#sampling_documentation] `OpenTelemetry SDK Documentation <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md>`_
-
diff --git a/docs/source/guides/metrics.rst b/docs/source/guides/metrics.rst
index f90913de6d2..bfb255436d3 100644
--- a/docs/source/guides/metrics.rst
+++ b/docs/source/guides/metrics.rst
@@ -7,7 +7,7 @@ Metrics
 Metrics are measurements of statistics about your service, which can provide information about the usage and performance of your bentos in production.
 
 BentoML allows users to define custom metrics with |prometheus|_ to easily enable monitoring for their Bentos.
- 
+
 This article will dive into the default metrics and how to add custom metrics for
 either a :ref:`concepts/runner:Custom Runner` or :ref:`Service <concepts/service:Service and APIs>`.
 
diff --git a/docs/source/guides/performance.rst b/docs/source/guides/performance.rst
index 351a8d73f38..3e76eec81a6 100644
--- a/docs/source/guides/performance.rst
+++ b/docs/source/guides/performance.rst
@@ -35,4 +35,3 @@ This guide is intended to aid advanced BentoML users with a better understanding
     `BentoML development guide <https://github.com/bentoml/BentoML/blob/main/DEVELOPMENT.md>`_
     and `documentation guide <https://github.com/bentoml/BentoML/blob/main/docs/README.md>`_
     to get started.
-
diff --git a/docs/source/guides/security.rst b/docs/source/guides/security.rst
index b17aeaec529..89f6ee94b36 100644
--- a/docs/source/guides/security.rst
+++ b/docs/source/guides/security.rst
@@ -45,7 +45,7 @@ BentoML supports HTTPS with self-signed certificates. To enable HTTPS, you can t
 to the :code:`bentoml serve` command. Use :code:`bentoml serve --help` to see the full list of options.
 
 .. code::
-    
+
     bentoml serve iris_classifier:latest --ssl-certfile /path/to/cert.pem --ssl-keyfile /path/to/key.pem
 
 
diff --git a/docs/source/guides/snippets/grpc/grpc_tools.rst b/docs/source/guides/snippets/grpc/grpc_tools.rst
index 00872d1e06d..823d2a67235 100644
--- a/docs/source/guides/snippets/grpc/grpc_tools.rst
+++ b/docs/source/guides/snippets/grpc/grpc_tools.rst
@@ -69,4 +69,3 @@
                               --network=host fullstorydev/grpcui -plaintext 0.0.0.0:3000
 
       Proceed to http://127.0.0.1:8080 in your browser and send test request from the web UI.
-
diff --git a/docs/source/guides/snippets/grpc/swift/Request.swift b/docs/source/guides/snippets/grpc/swift/Request.swift
index fed78fb860b..dd6e3105b56 100644
--- a/docs/source/guides/snippets/grpc/swift/Request.swift
+++ b/docs/source/guides/snippets/grpc/swift/Request.swift
@@ -13,4 +13,3 @@ let request: Bentoml_Grpc_v1_Request = .with {
   $0.apiName = apiName
   $0.ndarray = ndarray
 }
-
diff --git a/docs/source/guides/tracing.rst b/docs/source/guides/tracing.rst
index cf0e52a1f46..f11f304dc81 100644
--- a/docs/source/guides/tracing.rst
+++ b/docs/source/guides/tracing.rst
@@ -168,7 +168,7 @@ By default, no traces will be collected. Set ``sample_rate`` to your desired fra
         sample_rate: 1.0
 
 If you would like to exclude some routes from tracing, you can specify them using
-the :code:`excluded_urls` parameter. This parameter can be either a comma-separated 
+the :code:`excluded_urls` parameter. This parameter can be either a comma-separated
 string of routes, or a list of strings.
 
 .. code-block:: yaml
@@ -228,7 +228,7 @@ Configuration fields are passed through the OpenTelemetry Zipkin exporter
 Jaeger
 ^^^^^^
 
-The Jaeger exporter supports sending trace over both the Thrift and gRPC protocol. By default, BentoML 
+The Jaeger exporter supports sending trace over both the Thrift and gRPC protocol. By default, BentoML
 will use the Thrift protocol.
 
 .. note::
@@ -237,7 +237,7 @@ will use the Thrift protocol.
    application code is running as Lambda function, a collector can be configured to send spans
    using Thrift over HTTP. If both agent and collector are configured, the exporter sends traces
    only to the collector to eliminate the duplicate entries. [#otlp_jaeger_exporter_docs]_.
- 
+
 To setup the collector endpoint that will be used to receive either Thrift or Protobuf
 over HTTP/gRPC, use the ``collector_endpoint`` parameter:
 
@@ -383,7 +383,7 @@ Configuration fields are passed through the OpenTelemetry Zipkin exporter
 
 .. [#otlp_jaeger_exporter_docs]  `OpenTelemetry Jaeger Exporter API docs <https://opentelemetry-python.readthedocs.io/en/latest/exporter/jaeger/jaeger.html#module-opentelemetry.exporter.jaeger>`_
 
-.. [#jaeger_source]  Jaeger exporter source code for :github:`Thrift <open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-jaeger-thrift/src/opentelemetry/exporter/jaeger/thrift/__init__.py>` and 
+.. [#jaeger_source]  Jaeger exporter source code for :github:`Thrift <open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-jaeger-thrift/src/opentelemetry/exporter/jaeger/thrift/__init__.py>` and
    :github:`gRPC <open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-jaeger-proto-grpc/src/opentelemetry/exporter/jaeger/proto/grpc/__init__.py>`.
 
 .. [#default_timeout] The default timeout is 10 seconds. For most use cases, you don't need to change this value.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index bd245c0de1b..9a8f7a78cc6 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -139,8 +139,8 @@ contributing to the project, helping other users and discuss all things MLOps.
    ☁️ BentoML Cloud <https://www.bentoml.com/bento-cloud/>
    🦄️ Yatai <https://github.com/bentoml/Yatai>
    🚀 bentoctl <https://github.com/bentoml/bentoctl>
-   
-   
+
+
 .. |pypi_status| image:: https://img.shields.io/pypi/v/bentoml.svg?style=flat-square
    :target: https://pypi.org/project/BentoML
 .. |downloads| image:: https://pepy.tech/badge/bentoml?style=flat-square
diff --git a/docs/source/integrations/airflow.rst b/docs/source/integrations/airflow.rst
index c76ad29a397..440b3b385f6 100644
--- a/docs/source/integrations/airflow.rst
+++ b/docs/source/integrations/airflow.rst
@@ -167,4 +167,3 @@ that demonstrates how to use BentoML with Airflow:
 
 * 📖 `Deploying BentoML using Airflow <https://medium.com/codex/deploying-bentoml-using-airflow-28972343ac68>`_
 * 💻 `Source Code <https://github.com/sdf94/bentoml-airflow>`_
-
diff --git a/docs/source/integrations/arize.rst b/docs/source/integrations/arize.rst
index 4dceb87a046..36e30cdbfe5 100644
--- a/docs/source/integrations/arize.rst
+++ b/docs/source/integrations/arize.rst
@@ -8,7 +8,7 @@ Our collaboration with Arize AI makes it easy to integrate end-to-end solutions
 
 
 .. seealso::
-   
+
    :ref:`The Arize section under the monitoring guide <guides/monitoring:Plugins and Third-party Monitoring Data Collectors>` demonstrates how to use the integration.
 
 
diff --git a/docs/source/integrations/databricks.rst b/docs/source/integrations/databricks.rst
index fbecb206e15..160a22a3e17 100644
--- a/docs/source/integrations/databricks.rst
+++ b/docs/source/integrations/databricks.rst
@@ -1,3 +1,3 @@
 ==========
 Databricks
-==========
\ No newline at end of file
+==========
diff --git a/docs/source/integrations/feast.rst b/docs/source/integrations/feast.rst
index 80f80fc0d20..8f6a6a9ce0a 100644
--- a/docs/source/integrations/feast.rst
+++ b/docs/source/integrations/feast.rst
@@ -1,3 +1,3 @@
 =====
 Feast
-=====
\ No newline at end of file
+=====
diff --git a/docs/source/integrations/flink.rst b/docs/source/integrations/flink.rst
index a5940779c63..50c74a2bd3b 100644
--- a/docs/source/integrations/flink.rst
+++ b/docs/source/integrations/flink.rst
@@ -5,19 +5,19 @@ Flink
 Apache Flink DataStream
 -----------------------
 
-BentoML support stream model inferencing in 
-`Apache Flink DataStream API <https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/overview/>`_ 
-through either embedded runners or remote calls to a separated deployed Bento Service. This guide assumes prior knowledge 
+BentoML support stream model inferencing in
+`Apache Flink DataStream API <https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/overview/>`_
+through either embedded runners or remote calls to a separated deployed Bento Service. This guide assumes prior knowledge
 on using runners and service APIs.
 
 Embedded Model Runners
 ^^^^^^^^^^^^^^^^^^^^^^
-In BentoML, a :ref:`Runner <concepts/runner:Using Runners>` 
-represents a unit of computation, such as model inferencing, that can executed on either a remote runner process or an 
-embedded runner instance. If available system resources allow loading the ML model in memory, invoking runners as embedded 
+In BentoML, a :ref:`Runner <concepts/runner:Using Runners>`
+represents a unit of computation, such as model inferencing, that can executed on either a remote runner process or an
+embedded runner instance. If available system resources allow loading the ML model in memory, invoking runners as embedded
 instances can typically achieve a better performance by avoiding the overhead incurred in the interprocess communication.
 
-Runners can be initialized as embedded instances by calling `init_local()`. Once a runner is initialized, inference functions 
+Runners can be initialized as embedded instances by calling `init_local()`. Once a runner is initialized, inference functions
 can be invoked on the runners.
 
 .. code:: python
@@ -29,7 +29,7 @@ can be invoked on the runners.
     iris_runner.predict.run(INPUT_TEXT)
 
 
-To integrate with Flink DataRunners API, runners can be used in `ProcessWindowFunction`` for iterative inferencing or a 
+To integrate with Flink DataRunners API, runners can be used in `ProcessWindowFunction`` for iterative inferencing or a
 `WindowFunction` for batched inferencing.
 
 .. code:: python
@@ -50,7 +50,7 @@ To integrate with Flink DataRunners API, runners can be used in `ProcessWindowFu
             # transform(data)
             return data[0], self.runner.run(data[1])
 
-The following is an end-to-end word classification example of using embedded runners in a Flink DataStream program. 
+The following is an end-to-end word classification example of using embedded runners in a Flink DataStream program.
 For simplicity, the input stream and output sink are abstracted out using in-memory collections and stdout sink.
 
 .. code:: python
@@ -91,7 +91,7 @@ For simplicity, the input stream and output sink are abstracted out using in-mem
 
         # Define the execution logic
         ds = ds.map(ClassifyFunction())
-        
+
         # Create sink and emit result to sink, e.g. Kafka, File, Table, etc. Example prints to stdout for simplicity.
         ds.print()
 
@@ -107,11 +107,11 @@ For simplicity, the input stream and output sink are abstracted out using in-mem
 Remote Bento Service
 ^^^^^^^^^^^^^^^^^^^^
 
-Model runners can also be invoked remotely as a separately deployed Bento Service. Calling a remote Bento Service may be 
-preferred if the model cannot be loaded into memory of the Flink DataStream program. This options is also advantageous because 
+Model runners can also be invoked remotely as a separately deployed Bento Service. Calling a remote Bento Service may be
+preferred if the model cannot be loaded into memory of the Flink DataStream program. This options is also advantageous because
 model runners can be scaled more easily with deployment frameworks like :ref:`Yatai <concepts/deploy:Deploy with Yatai>`.
 
-To send a prediction request to a remotely deployed Bento Service in the DataStream program, you can use any HTTP client 
+To send a prediction request to a remotely deployed Bento Service in the DataStream program, you can use any HTTP client
 implementation of your choice inside the `MapFunction` or `ProcessWindowFunction`.
 
 
@@ -126,5 +126,5 @@ implementation of your choice inside the `MapFunction` or `ProcessWindowFunction
             ).text
 
 
-Using a client with asynchronous IO support combined with Flink AsyncFunction is recommended to handle requests and responses 
+Using a client with asynchronous IO support combined with Flink AsyncFunction is recommended to handle requests and responses
 concurrent and minimize IO waiting time of calling a remote Bento Service.
diff --git a/docs/source/integrations/flyte.rst b/docs/source/integrations/flyte.rst
index 2155d4906b4..6874744087d 100644
--- a/docs/source/integrations/flyte.rst
+++ b/docs/source/integrations/flyte.rst
@@ -1,3 +1,3 @@
 =====
 Flyte
-=====
\ No newline at end of file
+=====
diff --git a/docs/source/integrations/index.rst b/docs/source/integrations/index.rst
index 019aec96850..5eac9ae570e 100644
--- a/docs/source/integrations/index.rst
+++ b/docs/source/integrations/index.rst
@@ -32,7 +32,7 @@ Below is a list of integration guides with various tools within the MLOps ecosys
     .. grid-item-card:: :doc:`/integrations/triton`
         :link: /integrations/triton
         :link-type: doc
-        
+
     .. grid-item-card:: :doc:`/integrations/ray`
         :link: /integrations/ray
         :link-type: doc
diff --git a/docs/source/integrations/knative.rst b/docs/source/integrations/knative.rst
index bc19a766c48..5d32c1f6ad0 100644
--- a/docs/source/integrations/knative.rst
+++ b/docs/source/integrations/knative.rst
@@ -1,3 +1,3 @@
 =======
 Knative
-=======
\ No newline at end of file
+=======
diff --git a/docs/source/integrations/kubeflow.rst b/docs/source/integrations/kubeflow.rst
index 9bb6a0cbc2a..5f655137db2 100644
--- a/docs/source/integrations/kubeflow.rst
+++ b/docs/source/integrations/kubeflow.rst
@@ -48,7 +48,7 @@ BentoML offers three custom resource definitions (CRDs) in the Kubernetes cluste
 Workflow on Notebook
 ####################
 
-In this example, we will train three fraud detection models using the Kubeflow notebook and the 
+In this example, we will train three fraud detection models using the Kubeflow notebook and the
 `Kaggle IEEE-CIS Fraud Detection dataset <https://www.kaggle.com/c/ieee-fraud-detection>`_. We will then create a BentoML service that can
 simultaneously invoke all three models and return a decision on whether a transaction is fraudulent and build it into a Bento. We will showcase
 two deployment workflows using BentoML's Kubernetes operators: deploying directly from the Bento, and deploying from an OCI image built from the Bento.
@@ -56,4 +56,4 @@ two deployment workflows using BentoML's Kubernetes operators: deploying directl
 .. image:: ../_static/img/kubeflow-fraud-detection.png
 
 See the BentoML `Fraud Detection Example <https://github.com/bentoml/BentoML/tree/main/examples/kubeflow>`_ for a detailed workflow from model training
-to end-to-end deployment on Kubernetes. 
+to end-to-end deployment on Kubernetes.
diff --git a/docs/source/integrations/kubernetes.rst b/docs/source/integrations/kubernetes.rst
index 93e02fad192..36295f1052d 100644
--- a/docs/source/integrations/kubernetes.rst
+++ b/docs/source/integrations/kubernetes.rst
@@ -1,3 +1,3 @@
 ==========
 Kubernetes
-==========
\ No newline at end of file
+==========
diff --git a/docs/source/integrations/mlflow.rst b/docs/source/integrations/mlflow.rst
index 6efca635ba4..807502639a8 100644
--- a/docs/source/integrations/mlflow.rst
+++ b/docs/source/integrations/mlflow.rst
@@ -167,7 +167,7 @@ There are two major limitations of using MLflow Runner in BentoML:
 * Lack of support for multiple inference method
 
 A common optimization we recommend, is to save trained model instance directly with BentoML,
-instead of importing MLflow pyfunc model. This makes it possible to run GPU inference and expose 
+instead of importing MLflow pyfunc model. This makes it possible to run GPU inference and expose
 multiple inference signatures.
 
 1. Save model directly with bentoml
@@ -301,7 +301,7 @@ Additional Tips
 Use MLflow model dependencies config
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Most MLflow models bundles dependency information that is required for running framework model. 
+Most MLflow models bundles dependency information that is required for running framework model.
 If no additional dependencies are required in the :obj:`~bentoml.Service` definition code, users may
 pass through dependency requirements from within MLflow model to BentoML.
 
diff --git a/docs/source/integrations/prefect.rst b/docs/source/integrations/prefect.rst
index 3f7d69501c0..95df8060070 100644
--- a/docs/source/integrations/prefect.rst
+++ b/docs/source/integrations/prefect.rst
@@ -1,3 +1,3 @@
 =======
 Prefect
-=======
\ No newline at end of file
+=======
diff --git a/docs/source/integrations/ray.rst b/docs/source/integrations/ray.rst
index 2442a06ceb7..0228ee8120a 100644
--- a/docs/source/integrations/ray.rst
+++ b/docs/source/integrations/ray.rst
@@ -5,4 +5,4 @@ Ray
 .. todo::
     Add more detailed user guide for deploying BentoML on Ray cluster
 
-.. autofunction:: bentoml.ray.deployment
\ No newline at end of file
+.. autofunction:: bentoml.ray.deployment
diff --git a/docs/source/integrations/sagemaker.rst b/docs/source/integrations/sagemaker.rst
index 17d08a86f38..d64a4423682 100644
--- a/docs/source/integrations/sagemaker.rst
+++ b/docs/source/integrations/sagemaker.rst
@@ -1,3 +1,3 @@
 =============
 AWS SageMaker
-=============
\ No newline at end of file
+=============
diff --git a/docs/source/integrations/triton.rst b/docs/source/integrations/triton.rst
index a5edf4e6b3c..7c7f13821c4 100644
--- a/docs/source/integrations/triton.rst
+++ b/docs/source/integrations/triton.rst
@@ -56,7 +56,7 @@ Get started with Triton Inference Server
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Triton Inference Server architecture evolves around the model repository and a inference server. The `model repository <https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/user_guide/model_repository.html>`_
-is a filesystem based persistent volume that contains the models file and its respective `configuration <https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/user_guide/model_configuration.html>`_ 
+is a filesystem based persistent volume that contains the models file and its respective `configuration <https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/user_guide/model_configuration.html>`_
 that defines how the model should be loaded and served. The inference server is implemented in either HTTP/REST or gRPC protocol to serve said models with various batching strategies.
 
 BentoML provides a simple integration with Triton via :ref:`Runner <concepts/runner:Using Runners>`:
@@ -89,7 +89,7 @@ supports S3 path:
    The ``cli_args`` argument is a list of arguments that will be passed to the ``tritonserver`` command. For example, the ``--load-model`` argument is used to load a specific model from the model repository.
    See ``tritonserver --help`` for all available arguments.
 
-From a developer perspective, remote invocation of Triton runners is similar to invoking any other BentoML runners. 
+From a developer perspective, remote invocation of Triton runners is similar to invoking any other BentoML runners.
 
 .. note::
 
@@ -217,10 +217,10 @@ There are a few things to note here:
 
    - ``as_numpy(name: str) -> NDArray[T]``: returns the result as a numpy array. The argument is the name of the output defined in ``config.pbtxt``.
 
-   - ``get_output(name: str) -> InferOutputTensor | dict[str, T]``: Returns the results as a |infer_output_tensor|_ (gRPC) or 
+   - ``get_output(name: str) -> InferOutputTensor | dict[str, T]``: Returns the results as a |infer_output_tensor|_ (gRPC) or
      a dictionary (HTTP). The argument is the name of the output defined in ``config.pbtxt``.
 
-   - ``get_response(self) -> ModelInferResponse | dict[str, T]``: Returns the entire response as a |model_infer_response|_ (gRPC) or 
+   - ``get_response(self) -> ModelInferResponse | dict[str, T]``: Returns the entire response as a |model_infer_response|_ (gRPC) or
      a dictionary (HTTP).
 
    Using the above ``config.pbtxt`` as example, the model consists of two outputs, ``OUTPUT__0`` and ``OUTPUT__1``.
@@ -336,7 +336,7 @@ or use :ref:`reference/core:bentoml.bentos.build`:
          :caption: `build_bento.py`
 
 Notice that we are using ``nvcr.io/nvidia/tritonserver:22.12-py3`` as our base image. This can be substituted with any other
-custom base image that has ``tritonserver`` binary available. See Triton's documentation `here <https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/customization_guide/build.html>`_ 
+custom base image that has ``tritonserver`` binary available. See Triton's documentation `here <https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/customization_guide/build.html>`_
 to learn more about building/composing custom Triton image.
 
 .. epigraph::
diff --git a/docs/source/integrations/tvm.rst b/docs/source/integrations/tvm.rst
index ce3913a30c8..7bc50faa61d 100644
--- a/docs/source/integrations/tvm.rst
+++ b/docs/source/integrations/tvm.rst
@@ -1,3 +1,3 @@
 ==========
 Apache TVM
-==========
\ No newline at end of file
+==========
diff --git a/docs/source/reference/api_io_descriptors.rst b/docs/source/reference/api_io_descriptors.rst
index 5efba47cd85..796713067bc 100644
--- a/docs/source/reference/api_io_descriptors.rst
+++ b/docs/source/reference/api_io_descriptors.rst
@@ -61,7 +61,7 @@ To use the IO descriptor, install bentoml with extra ``io-pandas`` dependency:
 .. note::
 
    The :code:`pandas` package is required to use the :obj:`bentoml.io.PandasDataFrame`
-   or :obj:`bentoml.io.PandasSeries`. 
+   or :obj:`bentoml.io.PandasSeries`.
 
    Install it with ``pip install pandas`` and add it to your :code:`bentofile.yaml`'s under either Python or Conda packages list.
 
diff --git a/docs/source/reference/frameworks/detectron.rst b/docs/source/reference/frameworks/detectron.rst
index d85090952ba..ba6c516d417 100644
--- a/docs/source/reference/frameworks/detectron.rst
+++ b/docs/source/reference/frameworks/detectron.rst
@@ -15,4 +15,3 @@ Detectron
 .. autofunction:: bentoml.detectron.load_model
 
 .. autofunction:: bentoml.detectron.get
-
diff --git a/docs/source/reference/frameworks/ray.rst b/docs/source/reference/frameworks/ray.rst
index d5911f6fa3e..7ec8318731c 100644
--- a/docs/source/reference/frameworks/ray.rst
+++ b/docs/source/reference/frameworks/ray.rst
@@ -10,4 +10,4 @@ Ray
 
 .. currentmodule:: bentoml.ray
 
-.. autofunction:: bentoml.ray.deployment
\ No newline at end of file
+.. autofunction:: bentoml.ray.deployment
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
index 425462995a7..4452e40271a 100644
--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -449,7 +449,7 @@ Next, run the ``bentoml build`` CLI command from the same directory:
     Building BentoML service "iris_classifier:6otbsmxzq6lwbgxi" from build context "/home/user/gallery/quickstart"
     Packing model "iris_clf:zy3dfgxzqkjrlgxi"
     Locking PyPI package versions..
- 
+
     ██████╗░███████╗███╗░░██╗████████╗░█████╗░███╗░░░███╗██╗░░░░░
     ██╔══██╗██╔════╝████╗░██║╚══██╔══╝██╔══██╗████╗░████║██║░░░░░
     ██████╦╝█████╗░░██╔██╗██║░░░██║░░░██║░░██║██╔████╔██║██║░░░░░
diff --git a/examples/custom_runner/torch_hub_yolov5/README.md b/examples/custom_runner/torch_hub_yolov5/README.md
index 75051ba61ee..cf62e877baf 100644
--- a/examples/custom_runner/torch_hub_yolov5/README.md
+++ b/examples/custom_runner/torch_hub_yolov5/README.md
@@ -1,4 +1,4 @@
-# Serving YOLOv5 model with BentoML 
+# Serving YOLOv5 model with BentoML
 
 This project demonstrate how to use pretrained YOLOv5 model from Torch hub, and use
 it to build a prediction service in BentoML.
@@ -74,7 +74,7 @@ Sample result:
 
 ## Build Bento
 
-The `bentofile.yaml` have configured all required system packages and python dependencies. 
+The `bentofile.yaml` have configured all required system packages and python dependencies.
 
 ```bash
 bentoml build
diff --git a/examples/custom_runner/torch_hub_yolov5/bentofile.yaml b/examples/custom_runner/torch_hub_yolov5/bentofile.yaml
index 47c9d253001..2707307c1b9 100644
--- a/examples/custom_runner/torch_hub_yolov5/bentofile.yaml
+++ b/examples/custom_runner/torch_hub_yolov5/bentofile.yaml
@@ -9,4 +9,3 @@ docker:
     - ffmpeg
     - libsm6
     - libxext6
-
diff --git a/examples/custom_web_serving/fastapi_example/train.py b/examples/custom_web_serving/fastapi_example/train.py
index 1b004effe94..6afeda9ebb2 100644
--- a/examples/custom_web_serving/fastapi_example/train.py
+++ b/examples/custom_web_serving/fastapi_example/train.py
@@ -9,7 +9,6 @@
 logging.basicConfig(level=logging.WARN)
 
 if __name__ == "__main__":
-
     # Load training data
     iris = datasets.load_iris()
     X = pd.DataFrame(
diff --git a/examples/custom_web_serving/flask_example/train.py b/examples/custom_web_serving/flask_example/train.py
index 3ab83b6f574..2f1bc082392 100644
--- a/examples/custom_web_serving/flask_example/train.py
+++ b/examples/custom_web_serving/flask_example/train.py
@@ -8,7 +8,6 @@
 logging.basicConfig(level=logging.WARN)
 
 if __name__ == "__main__":
-
     # Load training data
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
diff --git a/examples/fraud_detection/README.md b/examples/fraud_detection/README.md
index fabee1027a8..4a719d00785 100644
--- a/examples/fraud_detection/README.md
+++ b/examples/fraud_detection/README.md
@@ -7,7 +7,7 @@ pip install -r ./dev-requirements.txt
 
 2. Download dataset from Kaggle
 
-Before downloading, set up Kaggle API Credentials https://github.com/Kaggle/kaggle-api#api-credentials 
+Before downloading, set up Kaggle API Credentials https://github.com/Kaggle/kaggle-api#api-credentials
 and accept dataset rules: https://www.kaggle.com/competitions/ieee-fraud-detection/data
 
 ```bash
diff --git a/examples/fraud_detection/benchmark/README.md b/examples/fraud_detection/benchmark/README.md
index 2ca781f5850..f6db83498d2 100644
--- a/examples/fraud_detection/benchmark/README.md
+++ b/examples/fraud_detection/benchmark/README.md
@@ -41,4 +41,4 @@ Visit http://0.0.0.0:8089/ and start the test.
 
 * BentoML deployed on Ray
 
-  serve run ray_deploy:deploy -p 3000 
+  serve run ray_deploy:deploy -p 3000
diff --git a/examples/fraud_detection/train.sh b/examples/fraud_detection/train.sh
index c46ee95b3ac..de8aaef797d 100755
--- a/examples/fraud_detection/train.sh
+++ b/examples/fraud_detection/train.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-jupyter nbconvert --to notebook --inplace --execute ./IEEE-CIS-Fraud-Detection.ipynb --debug 2>&1 | grep -v '^\[NbConvertApp\]' 
+jupyter nbconvert --to notebook --inplace --execute ./IEEE-CIS-Fraud-Detection.ipynb --debug 2>&1 | grep -v '^\[NbConvertApp\]'
diff --git a/examples/inference_graph/README.md b/examples/inference_graph/README.md
index 0e301f7894a..7deb2789516 100644
--- a/examples/inference_graph/README.md
+++ b/examples/inference_graph/README.md
@@ -1,6 +1,6 @@
 # BentoML Inference Graph Tutorial
 
-This is a sample project demonstrating model inference graph of [BentoML](https://github.com/bentoml) 
+This is a sample project demonstrating model inference graph of [BentoML](https://github.com/bentoml)
 with Huggingface Transformers.
 
 In this project, we will download and save three pretrained text generation models and a pretrained text classification model
@@ -76,9 +76,9 @@ pipeline = bentoml.transformers.load_model("gpt2-generation:latest")
 pipeline("I have an idea!")
 ```
 
-In BentoML, the recommended way of running ML model inference in serving is via Runners, which 
-gives BentoML more flexibility in scheduling the inference computation, batching inference requests, 
-and taking advantage of hardware resoureces available. Saved models can be loaded as Runner instance as 
+In BentoML, the recommended way of running ML model inference in serving is via Runners, which
+gives BentoML more flexibility in scheduling the inference computation, batching inference requests,
+and taking advantage of hardware resoureces available. Saved models can be loaded as Runner instance as
 shown below:
 
 ```python
@@ -100,7 +100,7 @@ bert_runner.run("I have an idea!")
 The service definition below achieves the inference graph logic described above.
 
 First, the we create three text generation runners and one text classification runners with the `to_runner` function
-from the models we previously saved. Second, we create a `bentoml.Service` named "inference_graph" with pass in 
+from the models we previously saved. Second, we create a `bentoml.Service` named "inference_graph" with pass in
 the four runners instances. Lastly, we create an async `@svc.api` that accepts a `Text` input and `JSON` output. The API
 passes the input simultaneously to all three text generation models through `asyncio.gather` and iteratively passes
 the generated paragraphs to the text classification model. The API returns all three generated paragraphs and their
@@ -197,7 +197,7 @@ curl -X 'POST' \
 ### Build Bento for deployment
 
 Bento is the distribution format in BentoML which captures all the source code, model files, config
-files and dependency specifications required for running the service for production deployment. Think 
+files and dependency specifications required for running the service for production deployment. Think
 of it as Docker/Container designed for machine learning models.
 
 To begin with building Bento, create a `bentofile.yaml` under your project directory:
@@ -240,14 +240,14 @@ Locking PyPI package versions..
 Successfully built Bento(tag="inference_graph:owljo4hna25nblg6")
 ```
 
-A new Bento is now built and saved to local Bento store. You can view and manage it via 
+A new Bento is now built and saved to local Bento store. You can view and manage it via
 `bentoml list`,`bentoml get` and `bentoml delete` CLI command.
 
 
 ### Containerize and Deployment
 
 Bento is designed to be deployed to run efficiently in a variety of different environments.
-And there are lots of deployment options and tools as part of the BentoML eco-system, such as 
+And there are lots of deployment options and tools as part of the BentoML eco-system, such as
 [Yatai](https://github.com/bentoml/Yatai) and [bentoctl](https://github.com/bentoml/bentoctl) for
 direct deployment to cloud platforms.
 
@@ -264,7 +264,7 @@ This will build a new docker image with all source code, model files and depende
 and ready for production deployment. To start a container with this docker image locally, run:
 
 ```bash
-docker run -p 3000:3000 inference_graph:invwzzsw7li6zckb2ie5eubhd 
+docker run -p 3000:3000 inference_graph:invwzzsw7li6zckb2ie5eubhd
 ```
 
 ## What's Next?
@@ -275,4 +275,3 @@ docker run -p 3000:3000 inference_graph:invwzzsw7li6zckb2ie5eubhd
 - Learn more about model deployment options for Bento:
   - [🦄️ Yatai](https://github.com/bentoml/Yatai): Model Deployment at scale on Kubernetes
   - [🚀 bentoctl](https://github.com/bentoml/bentoctl): Fast model deployment on any cloud platform
-
diff --git a/examples/kfserving/train.py b/examples/kfserving/train.py
index 3ab83b6f574..2f1bc082392 100644
--- a/examples/kfserving/train.py
+++ b/examples/kfserving/train.py
@@ -8,7 +8,6 @@
 logging.basicConfig(level=logging.WARN)
 
 if __name__ == "__main__":
-
     # Load training data
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
diff --git a/examples/kubeflow/deployment_from_bento.yaml b/examples/kubeflow/deployment_from_bento.yaml
index 5b3d4ebc894..8ddb69772a4 100644
--- a/examples/kubeflow/deployment_from_bento.yaml
+++ b/examples/kubeflow/deployment_from_bento.yaml
@@ -94,4 +94,4 @@ spec:
         memory: 1024Mi
       requests:
         cpu: 100m
-        memory: 200Mi
\ No newline at end of file
+        memory: 200Mi
diff --git a/examples/kubeflow/deployment_from_bentorequest.yaml b/examples/kubeflow/deployment_from_bentorequest.yaml
index 5233f94bd5c..a3a92efe878 100644
--- a/examples/kubeflow/deployment_from_bentorequest.yaml
+++ b/examples/kubeflow/deployment_from_bentorequest.yaml
@@ -91,4 +91,4 @@ spec:
         memory: 1024Mi
       requests:
         cpu: 100m
-        memory: 200Mi
\ No newline at end of file
+        memory: 200Mi
diff --git a/examples/mlflow/lightgbm/train.py b/examples/mlflow/lightgbm/train.py
index 1b5a5ecc8bf..f30eef23b96 100644
--- a/examples/mlflow/lightgbm/train.py
+++ b/examples/mlflow/lightgbm/train.py
@@ -55,7 +55,6 @@ def main():
     train_set = lgb.Dataset(X_train, label=y_train)
 
     with mlflow.start_run():
-
         # train model
         params = {
             "objective": "multiclass",
diff --git a/examples/monitoring/task_classification/README.md b/examples/monitoring/task_classification/README.md
index fe732b3e89e..cc009da5cf0 100644
--- a/examples/monitoring/task_classification/README.md
+++ b/examples/monitoring/task_classification/README.md
@@ -122,7 +122,7 @@ BENTOML_CONFIG=deployment.yaml bentoml serve service.py:svc
 ### Containerized Serving with monitoring
 
 Bento is the distribution format in BentoML which captures all the source code, model files, config
-files and dependency specifications required for running the service for production deployment. Think 
+files and dependency specifications required for running the service for production deployment. Think
 of it as Docker/Container designed for machine learning models.
 
 To begin with building Bento, create a `bentofile.yaml` under your project directory:
@@ -161,7 +161,7 @@ Next, run `bentoml build` from current directory to start the Bento build:
 05/05/2022 19:19:17 INFO     [cli] Successfully built Bento(tag="iris_classifier:5wtigdwm4kwzduqj") at "/Users/bentoml/bentoml/bentos/iris_classifier/5wtigdwm4kwzduqj/"
 ```
 
-A new Bento is now built and saved to local Bento store. You can view and manage it via 
+A new Bento is now built and saved to local Bento store. You can view and manage it via
 `bentoml list`,`bentoml get` and `bentoml delete` CLI command.
 
 Then we will convert a Bento into a Docker image containing the HTTP model server.
@@ -187,4 +187,3 @@ docker run -p 3000:3000 iris_classifier:invwzzsw7li6zckb2ie5eubhd --mount type=b
 - Learn more about model deployment options for Bento:
   - [🦄️ Yatai](https://github.com/bentoml/Yatai): Model Deployment at scale on Kubernetes
   - [🚀 bentoctl](https://github.com/bentoml/bentoctl): Fast model deployment on any cloud platform
-
diff --git a/examples/pydantic_validation/train.py b/examples/pydantic_validation/train.py
index 1b004effe94..6afeda9ebb2 100644
--- a/examples/pydantic_validation/train.py
+++ b/examples/pydantic_validation/train.py
@@ -9,7 +9,6 @@
 logging.basicConfig(level=logging.WARN)
 
 if __name__ == "__main__":
-
     # Load training data
     iris = datasets.load_iris()
     X = pd.DataFrame(
diff --git a/examples/pytorch_mnist/locustfile.py b/examples/pytorch_mnist/locustfile.py
index 1fbaddf3799..b62fdd54733 100644
--- a/examples/pytorch_mnist/locustfile.py
+++ b/examples/pytorch_mnist/locustfile.py
@@ -7,7 +7,6 @@
 
 
 class PyTorchMNISTLoadTestUser(HttpUser):
-
     wait_time = between(0.01, 2)
 
     @task
diff --git a/examples/pytorch_mnist/pytorch_mnist_demo.ipynb b/examples/pytorch_mnist/pytorch_mnist_demo.ipynb
index f10ac7a10f0..c2fede3d637 100644
--- a/examples/pytorch_mnist/pytorch_mnist_demo.ipynb
+++ b/examples/pytorch_mnist/pytorch_mnist_demo.ipynb
@@ -221,7 +221,6 @@
     "\n",
     "    # K-fold Cross Validation model evaluation\n",
     "    for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):\n",
-    "\n",
     "        print(f\"FOLD {fold}\")\n",
     "        print(\"--------------------------------\")\n",
     "\n",
@@ -295,7 +294,6 @@
    "outputs": [],
    "source": [
     "def train(dataset, epochs=NUM_EPOCHS, device=\"cpu\"):\n",
-    "\n",
     "    train_sampler = torch.utils.data.RandomSampler(dataset)\n",
     "    train_loader = torch.utils.data.DataLoader(\n",
     "        dataset,\n",
diff --git a/examples/pytorch_mnist/train.py b/examples/pytorch_mnist/train.py
index 1293ac73d7b..fadbbea6436 100644
--- a/examples/pytorch_mnist/train.py
+++ b/examples/pytorch_mnist/train.py
@@ -90,7 +90,6 @@ def cross_validate(dataset, epochs=NUM_EPOCHS, k_folds=K_FOLDS, device="cpu"):
 
     # K-fold Cross Validation model evaluation
     for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
-
         print(f"FOLD {fold}")
         print("--------------------------------")
 
@@ -142,7 +141,6 @@ def cross_validate(dataset, epochs=NUM_EPOCHS, k_folds=K_FOLDS, device="cpu"):
 
 
 def train(dataset, epochs=NUM_EPOCHS, device="cpu"):
-
     print("Training using %s." % device)
     train_sampler = torch.utils.data.RandomSampler(dataset)
     train_loader = torch.utils.data.DataLoader(
@@ -161,7 +159,6 @@ def train(dataset, epochs=NUM_EPOCHS, device="cpu"):
 
 
 if __name__ == "__main__":
-
     parser = argparse.ArgumentParser(description="BentoML PyTorch MNIST Example")
     parser.add_argument(
         "--epochs",
diff --git a/examples/quickstart/README.md b/examples/quickstart/README.md
index 785150f620e..a732a3beb0d 100644
--- a/examples/quickstart/README.md
+++ b/examples/quickstart/README.md
@@ -4,7 +4,7 @@ This is a sample project demonstrating basic usage of [BentoML](https://github.c
 Scikit-learn.
 
 In this project, we will train a classifier model using Scikit-learn and the Iris dataset, build
-an prediction service for serving the trained model via an HTTP server, and containerize the 
+an prediction service for serving the trained model via an HTTP server, and containerize the
 model server as a docker image for production deployment.
 
 This project is also available to run from a notebook: https://github.com/bentoml/BentoML/blob/main/examples/quickstart/iris_classifier.ipynb
@@ -59,8 +59,8 @@ loaded_model = bentoml.sklearn.load_model("iris_clf:latest")
 loaded_model.predict([[5.9, 3. , 5.1, 1.8]])  # => array(2)
 ```
 
-In BentoML, the recommended way of running ML model inference in serving is via Runner, which 
-gives BentoML more flexibility in terms of how to schedule the inference computation, how to 
+In BentoML, the recommended way of running ML model inference in serving is via Runner, which
+gives BentoML more flexibility in terms of how to schedule the inference computation, how to
 batch inference requests and take advantage of hardware resoureces available. Saved models can
 be loaded as Runner instance as shown below:
 
@@ -114,7 +114,7 @@ curl -X POST -H "content-type: application/json" --data "[[5.9, 3, 5.1, 1.8]]" h
 ### Build Bento for deployment
 
 Bento is the distribution format in BentoML which captures all the source code, model files, config
-files and dependency specifications required for running the service for production deployment. Think 
+files and dependency specifications required for running the service for production deployment. Think
 of it as Docker/Container designed for machine learning models.
 
 To begin with building Bento, create a `bentofile.yaml` under your project directory:
@@ -153,14 +153,14 @@ Next, run `bentoml build` from current directory to start the Bento build:
 05/05/2022 19:19:17 INFO     [cli] Successfully built Bento(tag="iris_classifier:5wtigdwm4kwzduqj") at "/Users/bentoml/bentoml/bentos/iris_classifier/5wtigdwm4kwzduqj/"
 ```
 
-A new Bento is now built and saved to local Bento store. You can view and manage it via 
+A new Bento is now built and saved to local Bento store. You can view and manage it via
 `bentoml list`,`bentoml get` and `bentoml delete` CLI command.
 
 
 ### Containerize and Deployment
 
 Bento is designed to be deployed to run efficiently in a variety of different environments.
-And there are lots of deployment options and tools as part of the BentoML eco-system, such as 
+And there are lots of deployment options and tools as part of the BentoML eco-system, such as
 [Yatai](https://github.com/bentoml/Yatai) and [bentoctl](https://github.com/bentoml/bentoctl) for
 direct deployment to cloud platforms.
 
@@ -177,7 +177,7 @@ This will build a new docker image with all source code, model files and depende
 and ready for production deployment. To start a container with this docker image locally, run:
 
 ```bash
-docker run -p 3000:3000 iris_classifier:invwzzsw7li6zckb2ie5eubhd 
+docker run -p 3000:3000 iris_classifier:invwzzsw7li6zckb2ie5eubhd
 ```
 
 ## What's Next?
@@ -188,4 +188,3 @@ docker run -p 3000:3000 iris_classifier:invwzzsw7li6zckb2ie5eubhd
 - Learn more about model deployment options for Bento:
   - [🦄️ Yatai](https://github.com/bentoml/Yatai): Model Deployment at scale on Kubernetes
   - [🚀 bentoctl](https://github.com/bentoml/bentoctl): Fast model deployment on any cloud platform
-
diff --git a/examples/quickstart/train.py b/examples/quickstart/train.py
index 6ebf99083ff..9cb15bb28ce 100644
--- a/examples/quickstart/train.py
+++ b/examples/quickstart/train.py
@@ -8,7 +8,6 @@
 logging.basicConfig(level=logging.WARN)
 
 if __name__ == "__main__":
-
     # Load training data
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
diff --git a/examples/sklearn/linear_regression/README.md b/examples/sklearn/linear_regression/README.md
index fddbe5035eb..093d2951257 100644
--- a/examples/sklearn/linear_regression/README.md
+++ b/examples/sklearn/linear_regression/README.md
@@ -35,5 +35,3 @@ bentoml build
 ```
 bentoml containerize linear_regression:latest
 ```
-
-
diff --git a/examples/sklearn/pipeline/README.md b/examples/sklearn/pipeline/README.md
index 71b8396012c..a41e30f8709 100644
--- a/examples/sklearn/pipeline/README.md
+++ b/examples/sklearn/pipeline/README.md
@@ -42,5 +42,3 @@ bentoml build
 ```
 bentoml containerize doc_classifier:latest
 ```
-
-
diff --git a/examples/tensorflow2_keras/README.md b/examples/tensorflow2_keras/README.md
index 1ae7b771499..2655d95e5b6 100644
--- a/examples/tensorflow2_keras/README.md
+++ b/examples/tensorflow2_keras/README.md
@@ -31,7 +31,7 @@ export CPATH="/opt/homebrew/include/"
 export HDF5_DIR=/opt/homebrew/
 ```
 
-Then try running the "pip install tensorflow-macos" again 
+Then try running the "pip install tensorflow-macos" again
 
 
 ### Model Training
@@ -183,21 +183,21 @@ This may take a while when running for the first time for BentoML to resolve all
 ```
 > bentoml build
 
-[01:14:04 AM] INFO     Building BentoML service "tensorflow_mnist_demo:bmygukdtzpy6zlc5vcqvsoywq" from build context      
-                       "/home/chef/workspace/gallery/tensorflow2"                                                         
-              INFO     Packing model "tensorflow_mnist_demo:xm6jsddtu3y6zluuvcqvsoywq" from                               
-                       "/home/chef/bentoml/models/tensorflow_mnist_demo/xm6jsddtu3y6zluuvcqvsoywq"                       
-              INFO     Locking PyPI package versions..                                                                 
-[01:14:05 AM] INFO                                                                                                     
-                       ██████╗░███████╗███╗░░██╗████████╗░█████╗░███╗░░░███╗██╗░░░░░                                   
-                       ██╔══██╗██╔════╝████╗░██║╚══██╔══╝██╔══██╗████╗░████║██║░░░░░                                   
-                       ██████╦╝█████╗░░██╔██╗██║░░░██║░░░██║░░██║██╔████╔██║██║░░░░░                                   
-                       ██╔══██╗██╔══╝░░██║╚████║░░░██║░░░██║░░██║██║╚██╔╝██║██║░░░░░                                   
-                       ██████╦╝███████╗██║░╚███║░░░██║░░░╚█████╔╝██║░╚═╝░██║███████╗                                   
-                       ╚═════╝░╚══════╝╚═╝░░╚══╝░░░╚═╝░░░░╚════╝░╚═╝░░░░░╚═╝╚══════╝                                   
-                                                                                                                       
-              INFO     Successfully built Bento(tag="tensorflow_mnist_demo:bmygukdtzpy6zlc5vcqvsoywq") at                 
-                       "/home/chef/bentoml/bentos/tensorflow_mnist_demo/bmygukdtzpy6zlc5vcqvsoywq/"                      
+[01:14:04 AM] INFO     Building BentoML service "tensorflow_mnist_demo:bmygukdtzpy6zlc5vcqvsoywq" from build context
+                       "/home/chef/workspace/gallery/tensorflow2"
+              INFO     Packing model "tensorflow_mnist_demo:xm6jsddtu3y6zluuvcqvsoywq" from
+                       "/home/chef/bentoml/models/tensorflow_mnist_demo/xm6jsddtu3y6zluuvcqvsoywq"
+              INFO     Locking PyPI package versions..
+[01:14:05 AM] INFO
+                       ██████╗░███████╗███╗░░██╗████████╗░█████╗░███╗░░░███╗██╗░░░░░
+                       ██╔══██╗██╔════╝████╗░██║╚══██╔══╝██╔══██╗████╗░████║██║░░░░░
+                       ██████╦╝█████╗░░██╔██╗██║░░░██║░░░██║░░██║██╔████╔██║██║░░░░░
+                       ██╔══██╗██╔══╝░░██║╚████║░░░██║░░░██║░░██║██║╚██╔╝██║██║░░░░░
+                       ██████╦╝███████╗██║░╚███║░░░██║░░░╚█████╔╝██║░╚═╝░██║███████╗
+                       ╚═════╝░╚══════╝╚═╝░░╚══╝░░░╚═╝░░░░╚════╝░╚═╝░░░░░╚═╝╚══════╝
+
+              INFO     Successfully built Bento(tag="tensorflow_mnist_demo:bmygukdtzpy6zlc5vcqvsoywq") at
+                       "/home/chef/bentoml/bentos/tensorflow_mnist_demo/bmygukdtzpy6zlc5vcqvsoywq/"
 ```
 
 This Bento can now be loaded for serving:
diff --git a/examples/tensorflow2_keras/locustfile.py b/examples/tensorflow2_keras/locustfile.py
index ea041067ab4..490fe59bea0 100644
--- a/examples/tensorflow2_keras/locustfile.py
+++ b/examples/tensorflow2_keras/locustfile.py
@@ -7,7 +7,6 @@
 
 
 class TensorFlow2MNISTLoadTestUser(HttpUser):
-
     wait_time = between(0.9, 1.1)
 
     @task
diff --git a/examples/tensorflow2_native/README.md b/examples/tensorflow2_native/README.md
index 5680a4e67a8..654ae65e211 100644
--- a/examples/tensorflow2_native/README.md
+++ b/examples/tensorflow2_native/README.md
@@ -31,7 +31,7 @@ export CPATH="/opt/homebrew/include/"
 export HDF5_DIR=/opt/homebrew/
 ```
 
-Then try running the "pip install tensorflow-macos" again 
+Then try running the "pip install tensorflow-macos" again
 
 
 ### Model Training
@@ -168,21 +168,21 @@ This may take a while when running for the first time for BentoML to resolve all
 ```
 > bentoml build
 
-[01:14:04 AM] INFO     Building BentoML service "tensorflow_mnist_demo:bmygukdtzpy6zlc5vcqvsoywq" from build context      
-                       "/home/chef/workspace/gallery/tensorflow2"                                                         
-              INFO     Packing model "tensorflow_mnist_demo:xm6jsddtu3y6zluuvcqvsoywq" from                               
-                       "/home/chef/bentoml/models/tensorflow_mnist_demo/xm6jsddtu3y6zluuvcqvsoywq"                       
-              INFO     Locking PyPI package versions..                                                                 
-[01:14:05 AM] INFO                                                                                                     
-                       ██████╗░███████╗███╗░░██╗████████╗░█████╗░███╗░░░███╗██╗░░░░░                                   
-                       ██╔══██╗██╔════╝████╗░██║╚══██╔══╝██╔══██╗████╗░████║██║░░░░░                                   
-                       ██████╦╝█████╗░░██╔██╗██║░░░██║░░░██║░░██║██╔████╔██║██║░░░░░                                   
-                       ██╔══██╗██╔══╝░░██║╚████║░░░██║░░░██║░░██║██║╚██╔╝██║██║░░░░░                                   
-                       ██████╦╝███████╗██║░╚███║░░░██║░░░╚█████╔╝██║░╚═╝░██║███████╗                                   
-                       ╚═════╝░╚══════╝╚═╝░░╚══╝░░░╚═╝░░░░╚════╝░╚═╝░░░░░╚═╝╚══════╝                                   
-                                                                                                                       
-              INFO     Successfully built Bento(tag="tensorflow_mnist_demo:bmygukdtzpy6zlc5vcqvsoywq") at                 
-                       "/home/chef/bentoml/bentos/tensorflow_mnist_demo/bmygukdtzpy6zlc5vcqvsoywq/"                      
+[01:14:04 AM] INFO     Building BentoML service "tensorflow_mnist_demo:bmygukdtzpy6zlc5vcqvsoywq" from build context
+                       "/home/chef/workspace/gallery/tensorflow2"
+              INFO     Packing model "tensorflow_mnist_demo:xm6jsddtu3y6zluuvcqvsoywq" from
+                       "/home/chef/bentoml/models/tensorflow_mnist_demo/xm6jsddtu3y6zluuvcqvsoywq"
+              INFO     Locking PyPI package versions..
+[01:14:05 AM] INFO
+                       ██████╗░███████╗███╗░░██╗████████╗░█████╗░███╗░░░███╗██╗░░░░░
+                       ██╔══██╗██╔════╝████╗░██║╚══██╔══╝██╔══██╗████╗░████║██║░░░░░
+                       ██████╦╝█████╗░░██╔██╗██║░░░██║░░░██║░░██║██╔████╔██║██║░░░░░
+                       ██╔══██╗██╔══╝░░██║╚████║░░░██║░░░██║░░██║██║╚██╔╝██║██║░░░░░
+                       ██████╦╝███████╗██║░╚███║░░░██║░░░╚█████╔╝██║░╚═╝░██║███████╗
+                       ╚═════╝░╚══════╝╚═╝░░╚══╝░░░╚═╝░░░░╚════╝░╚═╝░░░░░╚═╝╚══════╝
+
+              INFO     Successfully built Bento(tag="tensorflow_mnist_demo:bmygukdtzpy6zlc5vcqvsoywq") at
+                       "/home/chef/bentoml/bentos/tensorflow_mnist_demo/bmygukdtzpy6zlc5vcqvsoywq/"
 ```
 
 This Bento can now be loaded for serving:
diff --git a/examples/tensorflow2_native/locustfile.py b/examples/tensorflow2_native/locustfile.py
index ea041067ab4..490fe59bea0 100644
--- a/examples/tensorflow2_native/locustfile.py
+++ b/examples/tensorflow2_native/locustfile.py
@@ -7,7 +7,6 @@
 
 
 class TensorFlow2MNISTLoadTestUser(HttpUser):
-
     wait_time = between(0.9, 1.1)
 
     @task
diff --git a/examples/triton/onnx/README.md b/examples/triton/onnx/README.md
index 028f900a06a..26d8c02e2e3 100644
--- a/examples/triton/onnx/README.md
+++ b/examples/triton/onnx/README.md
@@ -121,7 +121,7 @@ docker run --rm -it -p 3000-4000:3000-4000 \
            nvcr.io/nvidia/tritonserver:22.12-py3 bash
 ```
 
-If you have NVIDIA GPU available, make sure to install 
+If you have NVIDIA GPU available, make sure to install
 [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) on your system.
 Afterward, passing in `--gpus all` to `docker`:
 
@@ -166,7 +166,7 @@ python3 serve_bento.py
 > [here](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md)
 > for more details on building customisation.
 
-<!-- 
+<!--
 docker run --rm -it -p 3000-3030:3000-3030 -v $(pwd)/model_repository:/models -v ${PWD}:/workspace -v ${BENTOML_GIT_ROOT}:/opt/bentoml -e BENTOML_HOME=/opt/bentoml -v $BENTOML_HOME:/opt/bentoml nvcr.io/nvidia/tritonserver:22.12-py3 bash
 
 cd /opt/bentoml && pip install -r requirements/dev-requirements.txt && cd /workspace && pip install -r requirements/requirements.txt && python3 train.py && ./setup && bentoml serve-http
diff --git a/examples/triton/pytorch/README.md b/examples/triton/pytorch/README.md
index 4beebddeb21..05f2ab571bb 100644
--- a/examples/triton/pytorch/README.md
+++ b/examples/triton/pytorch/README.md
@@ -118,7 +118,7 @@ docker run --rm -it -p 3000-4000:3000-4000 \
            nvcr.io/nvidia/tritonserver:22.12-py3 bash
 ```
 
-If you have NVIDIA GPU available, make sure to install 
+If you have NVIDIA GPU available, make sure to install
 [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) on your system.
 Afterward, passing in `--gpus all` to `docker`:
 
@@ -184,7 +184,7 @@ export LD_PRELOAD=/lib/aarch64-linux-gnu/libgomp.so.1
 Then run the `bentoml serve` command again.
 
 
-<!-- 
+<!--
 docker run --rm -it -p 3000-3030:3000-3030 -v $(pwd)/model_repository:/models -v ${PWD}:/workspace -v ${BENTOML_GIT_ROOT}:/opt/bentoml -e BENTOML_HOME=/opt/bentoml -v $BENTOML_HOME:/opt/bentoml nvcr.io/nvidia/tritonserver:22.12-py3 bash
 
 cd /opt/bentoml && pip install -r requirements/dev-requirements.txt && cd /workspace && pip install -r requirements/requirements.txt && python3 train.py && ./setup && bentoml serve-http
diff --git a/examples/triton/tensorflow/README.md b/examples/triton/tensorflow/README.md
index 1af85ff5cd3..b7ab01cfc0b 100644
--- a/examples/triton/tensorflow/README.md
+++ b/examples/triton/tensorflow/README.md
@@ -119,7 +119,7 @@ docker run --rm -it -p 3000-4000:3000-4000 \
            nvcr.io/nvidia/tritonserver:22.12-py3 bash
 ```
 
-If you have NVIDIA GPU available, make sure to install 
+If you have NVIDIA GPU available, make sure to install
 [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) on your system.
 Afterward, passing in `--gpus all` to `docker`:
 
@@ -164,7 +164,7 @@ python3 serve_bento.py
 > [here](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md)
 > for more details on building customisation.
 
-<!-- 
+<!--
 docker run --rm -it -p 3000-3030:3000-3030 -v $(pwd)/model_repository:/models -v ${PWD}:/workspace -v ${BENTOML_GIT_ROOT}:/opt/bentoml -e BENTOML_HOME=/opt/bentoml -v $BENTOML_HOME:/opt/bentoml nvcr.io/nvidia/tritonserver:22.12-py3 bash
 
 cd /opt/bentoml && pip install -r requirements/dev-requirements.txt && cd /workspace && pip install -r requirements/requirements.txt && python3 train.py && ./setup && bentoml serve-http
diff --git a/grpc-client/java/build.gradle b/grpc-client/java/build.gradle
index f78aa0ff96d..11f0ce0776a 100644
--- a/grpc-client/java/build.gradle
+++ b/grpc-client/java/build.gradle
@@ -9,7 +9,7 @@ plugins {
 repositories {
     // The google mirror is less flaky than mavenCentral()
     maven {
-        url "https://maven-central.storage-download.googleapis.com/maven2/" 
+        url "https://maven-central.storage-download.googleapis.com/maven2/"
     }
     mavenCentral()
     mavenLocal()
diff --git a/pyproject.toml b/pyproject.toml
index ebd814b2f67..093dfb2f0ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -254,6 +254,7 @@ exclude = [
     "bazel-*/",
     "venv",
     "typings",
+    "docs/source",
     "src/bentoml/_internal/external_typing",
     "src/bentoml/grpc/v1alpha1",
     "src/bentoml/grpc/v1",
@@ -272,25 +273,6 @@ convention = "google"
 force-single-line = true
 known-first-party = ["bentoml"]
 
-[tool.isort]
-profile = "black"
-line_length = 88
-length_sort = true
-force_single_line = true
-order_by_type = true
-known_first_party = ["bentoml"]
-force_alphabetical_sort_within_sections = true
-skip_glob = [
-    "typings/*",
-    "test/*",
-    "**/*_pb2*",
-    "venv/*",
-    "lib/*",
-    "grpc-client/thirdparty",
-    "grpc-client/bentoml",
-    "bazel-*",
-]
-
 [tool.pyright]
 pythonVersion = "3.11"
 include = ["src/", "examples/", "tests/"]
diff --git a/requirements/tests-requirements.txt b/requirements/tests-requirements.txt
index 7b5bb07a478..ecda868365c 100644
--- a/requirements/tests-requirements.txt
+++ b/requirements/tests-requirements.txt
@@ -2,12 +2,10 @@
 pandas
 pydantic
 scikit-learn
+pre-commit
 yamllint==1.32.0
-black[jupyter]==22.12.0
 coverage[toml]==7.2.6
 setuptools>=63
-isort==5.11.4
-ruff==0.0.270
 pytest-cov==4.1.0
 pytest==7.3.1
 pytest-xdist[psutil]==3.3.1
@@ -19,4 +17,4 @@ protobuf<4.0dev
 grpcio
 grpcio-health-checking
 opentelemetry-instrumentation-grpc==0.35b0
-Pillow
\ No newline at end of file
+Pillow
diff --git a/scripts/release_quickstart_image.sh b/scripts/release_quickstart_image.sh
index ea25ace6f32..abbe4e3e5ce 100755
--- a/scripts/release_quickstart_image.sh
+++ b/scripts/release_quickstart_image.sh
@@ -12,7 +12,7 @@ export DOCKER_BUILDKIT=1
 docker buildx build --platform=linux/arm64,linux/amd64 -t bentoml/quickstart:$BENTOML_VERSION -t bentoml/quickstart:latest --pull -o type=image,push=True -f- . <<EOF
 FROM jupyter/minimal-notebook:python-3.9.13
 
-# ./start.sh requires root permission to set up notebook user and ensure access to home directory 
+# ./start.sh requires root permission to set up notebook user and ensure access to home directory
 USER root
 WORKDIR /home/bentoml
 
@@ -29,10 +29,10 @@ EXPOSE 3001
 ENV NB_USER=bentoml \
     NB_UID=1101 \
     NB_GID=1101 \
-    CHOWN_HOME=yes \ 
+    CHOWN_HOME=yes \
     CHOWN_HOME_OPTS="-R" \
     GRANT_SUDO=yes \
-    DOCKER_STACKS_JUPYTER_CMD=notebook \ 
+    DOCKER_STACKS_JUPYTER_CMD=notebook \
     NOTEBOOK_ARGS="./iris_classifier.ipynb" \
     BENTOML_HOST=0.0.0.0
 EOF
diff --git a/src/bentoml/_internal/frameworks/diffusers.py b/src/bentoml/_internal/frameworks/diffusers.py
index 1f03a288fb0..8d08cd901f9 100644
--- a/src/bentoml/_internal/frameworks/diffusers.py
+++ b/src/bentoml/_internal/frameworks/diffusers.py
@@ -65,7 +65,6 @@ class DiffusersOptions(PartialKwargsModelOptions):
 def _str2cls(
     full_cls_str: str,
 ) -> type[diffusers.pipelines.DiffusionPipeline | diffusers.SchedulerMixin]:
-
     import importlib
 
     module_name, _, class_name = full_cls_str.rpartition(".")
@@ -200,11 +199,9 @@ def load_model(
         pipeline.scheduler = scheduler
 
     if device_id is not None:
-
         move_model_to_device = True
 
         if str(device_id).lower().startswith("cuda"):
-
             # when device_map is not None, we should not move the
             # pipeline to gpu again see
             # https://github.com/huggingface/diffusers/issues/2782
diff --git a/src/bentoml/_internal/runner/container.py b/src/bentoml/_internal/runner/container.py
index 43d5a4e134d..1346f543c99 100644
--- a/src/bentoml/_internal/runner/container.py
+++ b/src/bentoml/_internal/runner/container.py
@@ -277,10 +277,8 @@ def to_payload(
         batch: ext.NpNDArray,
         batch_dim: int,
     ) -> Payload:
-
         # skip 0-dimensional array
         if batch.shape:
-
             if not (batch.flags["C_CONTIGUOUS"] or batch.flags["F_CONTIGUOUS"]):
                 # TODO: use fortan contiguous if it's faster
                 batch = np.ascontiguousarray(batch)
diff --git a/src/bentoml/_internal/runner/runnable.py b/src/bentoml/_internal/runner/runnable.py
index 0220eea0ef1..eef6f682e01 100644
--- a/src/bentoml/_internal/runner/runnable.py
+++ b/src/bentoml/_internal/runner/runnable.py
@@ -105,9 +105,10 @@ def method(
         batch_dim: tuple[int, int] | int = 0,
         input_spec: AnyType | tuple[AnyType, ...] | None = None,
         output_spec: AnyType | None = None,
-    ) -> t.Callable[
-        [t.Callable[t.Concatenate[T, P], R]], RunnableMethod[T, P, R]
-    ] | RunnableMethod[T, P, R]:
+    ) -> (
+        t.Callable[[t.Callable[t.Concatenate[T, P], R]], RunnableMethod[T, P, R]]
+        | RunnableMethod[T, P, R]
+    ):
         def method_decorator(
             meth: t.Callable[t.Concatenate[T, P], R]
         ) -> RunnableMethod[T, P, R]:
diff --git a/src/bentoml/_internal/server/README.md b/src/bentoml/_internal/server/README.md
index bbd782e5491..03f4d842f1c 100644
--- a/src/bentoml/_internal/server/README.md
+++ b/src/bentoml/_internal/server/README.md
@@ -1,4 +1,4 @@
-# Developing BentoServer 
+# Developing BentoServer
 
 
 ## Run BentoServer with sample Service
@@ -36,4 +36,4 @@ uvicorn hello:app --reload
 Send test request to the server:
 ```bash
 curl -X POST localhost:8000/predict -H 'Content-Type: application/json' -d '{"abc": 123}'
-```
\ No newline at end of file
+```
diff --git a/src/bentoml/_internal/server/metrics/prometheus.py b/src/bentoml/_internal/server/metrics/prometheus.py
index d466864f627..32ce0fda518 100644
--- a/src/bentoml/_internal/server/metrics/prometheus.py
+++ b/src/bentoml/_internal/server/metrics/prometheus.py
@@ -8,7 +8,6 @@
 from functools import partial
 
 if TYPE_CHECKING:
-
     from ... import external_typing as ext
 
 logger = logging.getLogger(__name__)
diff --git a/src/bentoml/_internal/service/openapi/__init__.py b/src/bentoml/_internal/service/openapi/__init__.py
index 0b02dd3f0ee..2a0c6091edd 100644
--- a/src/bentoml/_internal/service/openapi/__init__.py
+++ b/src/bentoml/_internal/service/openapi/__init__.py
@@ -28,7 +28,6 @@
 from .specification import OpenAPISpecification
 
 if TYPE_CHECKING:
-
     from .. import Service
     from ..inference_api import InferenceAPI
 
diff --git a/src/bentoml/_internal/utils/pickle.py b/src/bentoml/_internal/utils/pickle.py
index a0e54194063..64684cf7f0a 100644
--- a/src/bentoml/_internal/utils/pickle.py
+++ b/src/bentoml/_internal/utils/pickle.py
@@ -36,7 +36,6 @@ def pep574_dumps(obj: t.Any) -> tuple[bytes, bytes, list[int]]:
 def pep574_loads(
     main_bytes: bytes, concat_buffer_bytes: bytes, indices: list[int]
 ) -> t.Any:
-
     if not indices:
         return pickle.loads(main_bytes)
 
diff --git a/src/bentoml/grpc/interceptors/opentelemetry.py b/src/bentoml/grpc/interceptors/opentelemetry.py
index 7268f867cca..94d61395e29 100644
--- a/src/bentoml/grpc/interceptors/opentelemetry.py
+++ b/src/bentoml/grpc/interceptors/opentelemetry.py
@@ -255,7 +255,6 @@ def wrapper(behaviour: AsyncHandlerMethod[Response]):
             async def new_behaviour(
                 request: Request, context: BentoServicerContext
             ) -> Response | t.Awaitable[Response]:
-
                 async with self.set_remote_context(context):
                     with self.start_span(method_name, context) as span:
                         # wrap context
diff --git a/src/bentoml_cli/cli.py b/src/bentoml_cli/cli.py
index f0865e1acf8..69fd50196a4 100644
--- a/src/bentoml_cli/cli.py
+++ b/src/bentoml_cli/cli.py
@@ -14,7 +14,6 @@
 
 
 def create_bentoml_cli() -> click.Group:
-
     from bentoml import __version__ as BENTOML_VERSION
     from bentoml._internal.context import component_context
 
diff --git a/src/bentoml_cli/serve.py b/src/bentoml_cli/serve.py
index dfbdba013fc..416de0594bb 100644
--- a/src/bentoml_cli/serve.py
+++ b/src/bentoml_cli/serve.py
@@ -195,7 +195,6 @@ def serve(  # type: ignore (unused warning)
         from bentoml.serve import serve_http_production
 
         if development:
-
             serve_http_production(
                 bento,
                 working_dir=working_dir,
@@ -214,7 +213,6 @@ def serve(  # type: ignore (unused warning)
                 development_mode=True,
             )
         else:
-
             serve_http_production(
                 bento,
                 working_dir=working_dir,
@@ -412,7 +410,6 @@ def serve_grpc(  # type: ignore (unused warning)
         from bentoml.serve import serve_grpc_production
 
         if development:
-
             serve_grpc_production(
                 bento,
                 working_dir=working_dir,
@@ -431,7 +428,6 @@ def serve_grpc(  # type: ignore (unused warning)
                 development_mode=True,
             )
         else:
-
             serve_grpc_production(
                 bento,
                 working_dir=working_dir,
diff --git a/src/bentoml_cli/worker/README.md b/src/bentoml_cli/worker/README.md
index 72c0143c015..8805dba4939 100644
--- a/src/bentoml_cli/worker/README.md
+++ b/src/bentoml_cli/worker/README.md
@@ -1,4 +1,4 @@
-Here are entrypoints to the bare workers that internally used by the bentoml.  
+Here are entrypoints to the bare workers that internally used by the bentoml.
 They are typically used by the supervisor and not directly by the user.
 
 Instead use `bentoml serve <path> [--options]`
diff --git a/tests/integration/frameworks/test_tensorflow_unit.py b/tests/integration/frameworks/test_tensorflow_unit.py
index ee4fd559bb5..400dcc67f13 100644
--- a/tests/integration/frameworks/test_tensorflow_unit.py
+++ b/tests/integration/frameworks/test_tensorflow_unit.py
@@ -38,7 +38,6 @@ def assert_tensor_equal(t1: ext.TensorLike, t2: ext.TensorLike) -> None:
 @pytest.mark.requires_eager_execution
 @pytest.mark.parametrize("batch_axis", [0, 1])
 def test_tensorflow_container(batch_axis: int):
-
     from bentoml._internal.frameworks.tensorflow_v2 import TensorflowTensorContainer
 
     one_batch: ext.TensorLike = tf.reshape(tf.convert_to_tensor(np.arange(6)), (2, 3))
@@ -74,7 +73,6 @@ def test_tensorflow_container(batch_axis: int):
 
 @requires_disable_eager_execution
 def test_register_container():
-
     assert not tf.executing_eagerly()
 
     from bentoml._internal.frameworks.tensorflow_v2 import (  # type: ignore # noqa
diff --git a/tests/unit/_internal/runner/test_container.py b/tests/unit/_internal/runner/test_container.py
index 3a5473c4d32..32f568d807f 100644
--- a/tests/unit/_internal/runner/test_container.py
+++ b/tests/unit/_internal/runner/test_container.py
@@ -12,7 +12,6 @@
 @pytest.mark.parametrize("batch_dim_exc", [AssertionError])
 @pytest.mark.parametrize("wrong_batch_dim", [1, 19])
 def test_default_container(batch_dim_exc: t.Type[Exception], wrong_batch_dim: int):
-
     l1 = [1, 2, 3]
     l2 = [3, 4, 5, 6]
     batch, indices = c.DefaultContainer.batches_to_batch([l1, l2])
@@ -45,7 +44,6 @@ def _generator():
 
 @pytest.mark.parametrize("batch_dim", [0, 1])
 def test_ndarray_container(batch_dim: int):
-
     arr1 = np.ones((3, 3))
     if batch_dim == 0:
         arr2 = np.arange(6).reshape(2, 3)
@@ -77,7 +75,6 @@ def test_ndarray_container(batch_dim: int):
 @pytest.mark.parametrize("batch_dim_exc", [AssertionError])
 @pytest.mark.parametrize("wrong_batch_dim", [1, 19])
 def test_pandas_container(batch_dim_exc: t.Type[Exception], wrong_batch_dim: int):
-
     cols = ["a", "b", "c"]
     arr1 = np.ones((3, 3))
     df1 = pd.DataFrame(arr1, columns=cols)
diff --git a/tests/unit/_internal/test_utils.py b/tests/unit/_internal/test_utils.py
index 1f9caea535e..f2ae78ffc96 100644
--- a/tests/unit/_internal/test_utils.py
+++ b/tests/unit/_internal/test_utils.py
@@ -13,7 +13,6 @@
 
 
 def test_typeref():
-
     # assert __eq__
     assert LazyType("numpy", "ndarray") == np.ndarray
     assert LazyType("numpy", "ndarray") == LazyType(type(np.array([2, 3])))
diff --git a/tests/unit/_internal/utils/test_analytics.py b/tests/unit/_internal/utils/test_analytics.py
index fd90b57bffe..180f07ff67a 100644
--- a/tests/unit/_internal/utils/test_analytics.py
+++ b/tests/unit/_internal/utils/test_analytics.py
@@ -136,7 +136,6 @@ def test_track_serve_init(
     production: bool,
     caplog: LogCaptureFixture,
 ):
-
     mock_do_not_track.return_value = False
     mock_usage_event_debugging.return_value = False
 
diff --git a/tools/dev.Dockerfile b/tools/dev.Dockerfile
index c222fed5ef1..bbb097472ca 100644
--- a/tools/dev.Dockerfile
+++ b/tools/dev.Dockerfile
@@ -185,4 +185,3 @@ EOT
 FROM scratch as generate-tests-proto-4-output
 
 COPY --from=generate-tests-proto-4 /result/* /
-
diff --git a/typings/tritonclient/grpc/service_pb2.pyi b/typings/tritonclient/grpc/service_pb2.pyi
index 24dd8b1b9e7..a01bed401aa 100644
--- a/typings/tritonclient/grpc/service_pb2.pyi
+++ b/typings/tritonclient/grpc/service_pb2.pyi
@@ -1218,7 +1218,7 @@ class InferStatistics(google.protobuf.message.Message):
         """@@  .. cpp:var:: StatisticDuration queue
         @@
         @@     The count and cumulative duration that inference requests wait in
-        @@     scheduling or other queues. The "queue" count and cumulative 
+        @@     scheduling or other queues. The "queue" count and cumulative
         @@     duration includes cache hits.
         @@
         """
@@ -1264,7 +1264,7 @@ class InferStatistics(google.protobuf.message.Message):
         @@     and extract output tensor data from the Response Cache on a cache
         @@     hit. For example, this duration should include the time to copy
         @@     output tensor data from the Response Cache to the response object.
-        @@     On cache hits, triton does not need to go to the model/backend 
+        @@     On cache hits, triton does not need to go to the model/backend
         @@     for the output tensor data, so the "compute_input", "compute_infer",
         @@     and "compute_output" fields are not updated. Assuming the response
         @@     cache is enabled for a given model, a cache hit occurs for a