1010import  re 
1111import  subprocess 
1212import  sys 
13+ import  shutil 
1314from  pathlib  import  Path 
1415
1516from  setuptools  import  Extension , find_packages , setup 
1617from  setuptools .command .build_ext  import  build_ext 
18+ from  setuptools .command .build_py  import  build_py  as  build_py_orig 
1719
1820# Read the README file 
1921with  open ("README.md" , "r" ) as  f :
@@ -126,19 +128,52 @@ def build_extension(self, ext):  # noqa C901
126128        )
127129
128130
131+ class  BuildPy (build_py_orig ):
132+     """Ensure header files are copied into the package during build.""" 
133+ 
134+     def  run (self ):
135+         super ().run ()
136+         headers_src  =  Path ("include" )
137+         if  not  headers_src .exists ():
138+             return 
139+ 
140+         headers_dst  =  Path (self .build_lib ) /  "pytorch_tokenizers"  /  "include" 
141+         for  file_path  in  headers_src .rglob ("*" ):
142+             if  file_path .is_file ():
143+                 destination  =  headers_dst  /  file_path .relative_to (headers_src )
144+                 destination .parent .mkdir (parents = True , exist_ok = True )
145+                 shutil .copy2 (file_path , destination )
146+ 
147+ 
129148setup (
130149    name = "pytorch-tokenizers" ,
131-     version = "0. 1.0" ,
150+     version = "1.0.1 " ,
132151    long_description = long_description ,
133152    long_description_content_type = "text/markdown" ,
134153    url = "https://github.com/meta-pytorch/tokenizers" ,
135154    packages = find_packages (),
155+     include_package_data = True ,
156+     package_data = {
157+         "pytorch_tokenizers" : [
158+             "include/*.h" ,
159+             "include/**/*.h" ,
160+             "include/*.hpp" ,
161+             "include/**/*.hpp" ,
162+         ]
163+     },
136164    ext_modules = [CMakeExtension ("pytorch_tokenizers.pytorch_tokenizers_cpp" )],
137-     cmdclass = {"build_ext" : CMakeBuild },
165+     cmdclass = {
166+         "build_ext" : CMakeBuild ,
167+         "build_py" : BuildPy ,
168+     },
138169    zip_safe = False ,
139170    python_requires = ">=3.10" ,
140171    install_requires = [
141172        "pybind11>=2.6.0" ,
173+         "sentencepiece" ,
174+         "mistral-common" ,
175+         "tokenizers" ,
176+         "tiktoken" ,
142177    ],
143178    setup_requires = [
144179        "pybind11>=2.6.0" ,
@@ -150,8 +185,6 @@ def build_extension(self, ext):  # noqa C901
150185        "License :: OSI Approved :: BSD License" ,
151186        "Operating System :: OS Independent" ,
152187        "Programming Language :: Python :: 3" ,
153-         "Programming Language :: Python :: 3.8" ,
154-         "Programming Language :: Python :: 3.9" ,
155188        "Programming Language :: Python :: 3.10" ,
156189        "Programming Language :: Python :: 3.11" ,
157190        "Programming Language :: Python :: 3.12" ,
0 commit comments