Skip to content

Commit 22e8b2c

Browse files
authored
Package 'bin/llama_gemm' to wheel (InternLM#320)
* pack llama_gemm * update CMakeLists.txt * remove candidate * update MANIFEST.in
1 parent eaccbc0 commit 22e8b2c

File tree

6 files changed

+40
-22
lines changed

6 files changed

+40
-22
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ htmlcov/
4444
*build*/
4545
!builder/
4646
lmdeploy/lib/
47+
lmdeploy/bin/
4748
dist/
4849
examples/cpp/llama/*.csv
4950
*.npy

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@ include lmdeploy/lib/*.so
33
include lmdeploy/lib/*.so*
44
include lmdeploy/lib/*.dll
55
include lmdeploy/lib/*.pyd
6+
include lmdeploy/bin/*
67
include lmdeploy/serve/turbomind/service_docker_up.sh
78
recursive-include lmdeploy/serve/turbomind/triton_models *

examples/cpp/llama/generate_gemm_config.py

-22
This file was deleted.
+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright (c) OpenMMLab. All rights reserved.
2+
3+
import subprocess
4+
5+
import fire
6+
7+
8+
def get_llama_gemm():
9+
import os.path as osp
10+
11+
import lmdeploy
12+
lmdeploy_dir = osp.split(lmdeploy.__file__)[0]
13+
bin_path = osp.join(lmdeploy_dir, 'bin', 'llama_gemm')
14+
assert osp.exists(bin_path), f'{bin_path} not exists'
15+
return bin_path
16+
17+
18+
def main(head_num: int = 32,
19+
size_per_head: int = 128,
20+
vocab_size: int = 32000,
21+
inter_size: int = 11008,
22+
tensor_para_size: int = 1,
23+
max_batch_size: int = 64):
24+
for bsz in range(1, max_batch_size + 1):
25+
subprocess.call(
26+
f'{get_llama_gemm()} {bsz} 1 1 {head_num} {size_per_head}'
27+
f' {inter_size} {vocab_size} 1 {tensor_para_size}'
28+
f' {0 if bsz == 1 else 1}',
29+
shell=True)
30+
31+
32+
if __name__ == '__main__':
33+
fire.Fire(main)

setup.py

+4
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def gen_packages_items():
120120

121121

122122
if __name__ == '__main__':
123+
lmdeploy_package_data = ['lmdeploy/bin/llama_gemm']
123124
setup(name='lmdeploy',
124125
version=get_version(),
125126
description='A toolset for compressing, deploying and serving LLM',
@@ -128,6 +129,9 @@ def gen_packages_items():
128129
author='OpenMMLab',
129130
author_email='[email protected]',
130131
packages=find_packages(exclude=()),
132+
package_data={
133+
'lmdeploy': lmdeploy_package_data,
134+
},
131135
include_package_data=True,
132136
install_requires=parse_requirements('requirements.txt'),
133137
has_ext_modules=check_ext_modules,

src/turbomind/models/llama/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,4 @@ endif()
4848

4949
add_executable(llama_gemm llama_gemm.cc)
5050
target_link_libraries(llama_gemm PUBLIC CUDA::cudart gpt_gemm_func memory_utils cuda_utils logger)
51+
install(TARGETS llama_gemm DESTINATION ${CMAKE_SOURCE_DIR}/lmdeploy/bin)

0 commit comments

Comments
 (0)