diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
index 3a459050ec..5456a3d668 100644
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -54,7 +54,7 @@ jobs:
       - name: Install pytorch
         run: |
           python3 -m pip cache dir
-          python3 -m pip install torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118
+          python3 -m pip install torch==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu118
       - name: Build lmdeploy
         run: |
           python3 -m pip install cmake
@@ -78,7 +78,7 @@ jobs:
         run: |
           python3 -m pip install pynvml packaging protobuf transformers_stream_generator
           # manually install flash attn
-          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
+          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
           python3 -m pip install -r requirements_cuda.txt -r requirements/test.txt
           python3 -m pip install .
       - name: Check env
diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md
index 8877d510cc..c205b68a07 100644
--- a/docs/en/get_started/installation.md
+++ b/docs/en/get_started/installation.md
@@ -23,7 +23,7 @@ pip install lmdeploy
 The default prebuilt package is compiled on **CUDA 12**. If CUDA 11+ (>=11.3) is required, you can install lmdeploy by:
 
 ```shell
-export LMDEPLOY_VERSION=0.6.5
+export LMDEPLOY_VERSION=0.7.0
 export PYTHON_VERSION=38
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
@@ -65,7 +65,7 @@ cd lmdeploy
 **Step 3** - launch docker container in interactive mode
 
 ```shell
-docker run --gpus all --net host --shm-size 16g -v $(pwd):/opt/lmdeploy --name lmdeploy -it openmmlab/lmdeploy:latest bin/bash
+docker run --gpus all --net host --shm-size 16g -v $(pwd):/opt/lmdeploy --name lmdeploy -it openmmlab/lmdeploy:latest /bin/bash
 ```
 
 **Step 4** - build and installation
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 54a36c22c8..5d49e01c86 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -103,7 +103,6 @@ Documentation
    advance/chat_template.md
    advance/debug_turbomind.md
    advance/structed_output.md
-   advance/pytorch_multithread.md
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md
index 501f8a13e8..b8a92f70d1 100644
--- a/docs/zh_cn/get_started/installation.md
+++ b/docs/zh_cn/get_started/installation.md
@@ -23,7 +23,7 @@ pip install lmdeploy
 默认的预构建包是在 **CUDA 12** 上编译的。如果需要 CUDA 11+ (>=11.3)，你可以使用以下命令安装 lmdeploy：
 
 ```shell
-export LMDEPLOY_VERSION=0.6.5
+export LMDEPLOY_VERSION=0.7.0
 export PYTHON_VERSION=38
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
@@ -65,7 +65,7 @@ cd lmdeploy
 **步骤 3** - 以交互模式启动 docker 容器
 
 ```shell
-docker run --gpus all --net host --shm-size 16g -v $(pwd):/opt/lmdeploy --name lmdeploy -it openmmlab/lmdeploy:latest bin/bash
+docker run --gpus all --net host --shm-size 16g -v $(pwd):/opt/lmdeploy --name lmdeploy -it openmmlab/lmdeploy:latest /bin/bash
 ```
 
 **步骤 4** - 编译与安装
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index 197e800d58..018a00487f 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -104,7 +104,6 @@ LMDeploy 工具箱提供以下核心功能：
    advance/chat_template.md
    advance/debug_turbomind.md
    advance/structed_output.md
-   advance/pytorch_multithread.md
 
 .. toctree::
    :maxdepth: 1
diff --git a/lmdeploy/version.py b/lmdeploy/version.py
index 0b4b8a5379..b4f2ca71be 100644
--- a/lmdeploy/version.py
+++ b/lmdeploy/version.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Tuple
 
-__version__ = '0.6.5'
+__version__ = '0.7.0'
 short_version = __version__
 
 
diff --git a/requirements/runtime_cuda.txt b/requirements/runtime_cuda.txt
index 41af6039bd..965a4e0511 100644
--- a/requirements/runtime_cuda.txt
+++ b/requirements/runtime_cuda.txt
@@ -18,5 +18,5 @@ tiktoken
 torch<=2.5.1,>=2.0.0
 torchvision<=0.20.1,>=0.15.0
 transformers
-triton==3.0.0; sys_platform == "linux"
+triton<=3.1.0,>=3.0.0; sys_platform == "linux"
 uvicorn