-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
ilker.sigirci
committed
Mar 19, 2024
1 parent
5f9b271
commit 4846e66
Showing
17 changed files
with
406 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
""" | ||
pip install python-dotenv huggingface_hub | ||
HF_HOME=/workspace/huggingface python download_model.py | ||
""" | ||
|
||
import os | ||
|
||
from dotenv import load_dotenv | ||
from huggingface_hub import snapshot_download | ||
|
||
|
||
def main(): | ||
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | ||
# repo_id = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO" | ||
# repo_id = "turboderp/Mixtral-8x7B-instruct-exl2" | ||
# repo_id = "turboderp/TinyLlama-1B-exl2" | ||
# repo_id = "CohereForAI/aya-101" | ||
# repo_id = "wolfram/miquliz-120b-v2.0-5.0bpw-h6-exl2" | ||
# repo_id = "Trendyol/Trendyol-LLM-7b-base-v0.1" | ||
# repo_id = "teknium/OpenHermes-2.5-Mistral-7B" | ||
# repo_id = "sambanovasystems/SambaLingo-Turkish-Chat" | ||
|
||
local_dir_name = repo_id.split("/")[1] | ||
|
||
token = os.getenv("HF_TOKEN", None) | ||
|
||
# hf_hub_download(repo_id=repo_id, filename="config.json", revision="8.0bpw") | ||
|
||
# NOTE: First downloads to cache and then copies to local_dir | ||
snapshot_download( | ||
repo_id=repo_id, | ||
revision="main", | ||
local_dir=f"./models/{local_dir_name}", | ||
local_dir_use_symlinks=False, | ||
# ignore_patterns=["*.pt"], | ||
ignore_patterns=["*.pt", "*.bin"], | ||
token=token, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
load_dotenv() | ||
|
||
main() |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
|
||
if ! command -v nano >/dev/null 2>&1; then | ||
apt update -y && apt install nano htop nvtop ncdu -y | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
|
||
if ! command -v nano >/dev/null 2>&1; then | ||
apt update -y && apt install nano htop nvtop ncdu -y | ||
fi | ||
|
||
if pip show torch | grep -q "Version: 2.1.1"; then | ||
pip uninstall torch torchaudio torchvision -y | ||
fi | ||
|
||
if ! pip show aphrodite-engine >/dev/null 2>&1; then | ||
pip install aphrodite-engine | ||
fi | ||
|
||
# python -m aphrodite.endpoints.openai.api_server --help | ||
MODEL=/workspace/models/Mixtral-8x7B-Instruct-v0.1 && \ | ||
MODEL_NAME=Mixtral-8x7B-Instruct-v0.1 && \ | ||
HF_HOME=/workspace/huggingface \ | ||
python -m aphrodite.endpoints.openai.api_server \ | ||
--host 0.0.0.0 \ | ||
--port 8000 \ | ||
--tensor-parallel-size 2 \ | ||
--gpu-memory-utilization 0.9 \ | ||
--model $MODEL \ | ||
--served-model-name $MODEL_NAME |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/bin/bash | ||
|
||
if ! command -v nano >/dev/null 2>&1; then | ||
apt update -y && apt install nano htop nvtop ncdu -y | ||
fi | ||
|
||
if pip show torch | grep -q "Version: 2.1.1"; then | ||
pip uninstall torch torchaudio torchvision -y | ||
fi | ||
|
||
# Symbolic link models | ||
for file in /workspace/models/*; do | ||
ln -s "$file" "/workspace/tabbyAPI/official-repo/models/$(basename "$file")" | ||
done | ||
|
||
if ! pip show exllamav2 >/dev/null 2>&1; then | ||
pip install -r /workspace/tabbyAPI/official-repo/requirements.txt | ||
fi | ||
|
||
cd /workspace/tabbyAPI/official-repo && python3 main.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/bin/bash | ||
|
||
if ! command -v nano >/dev/null 2>&1; then | ||
apt update -y && apt install nano htop nvtop ncdu -y | ||
fi | ||
|
||
if pip show torch | grep -q "Version: 2.1.1"; then | ||
pip uninstall torchaudio torchvision -y | ||
fi | ||
|
||
if ! pip show vllm >/dev/null 2>&1; then | ||
pip install vllm==0.3.3 | ||
fi | ||
|
||
# --tensor-parallel-size 2 | ||
MODEL=/workspace/models/Mixtral-8x7B-Instruct-v0.1 && \ | ||
MODEL_NAME=Mixtral-8x7B-Instruct-v0.1 && \ | ||
# MODEL=/workspace/models/Trendyol-LLM-7b-chat-v0.1 && \ | ||
# MODEL_NAME=Trendyol && \ | ||
# MODEL=/workspace/models/aya-101&& \ | ||
# MODEL_NAME=Aya && \ | ||
HF_HOME=/workspace/huggingface \ | ||
python -m vllm.entrypoints.openai.api_server \ | ||
--host 0.0.0.0 \ | ||
--port 8000 \ | ||
--tensor-parallel-size 2 \ | ||
--gpu-memory-utilization 0.9 \ | ||
--model $MODEL \ | ||
--served-model-name $MODEL_NAME |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
# Sample YAML file for configuration. | ||
# Comment and uncomment values as needed. Every value has a default within the application. | ||
# This file serves to be a drop in for config.yml | ||
|
||
# Unless specified in the comments, DO NOT put these options in quotes! | ||
# You can use https://www.yamllint.com/ if you want to check your YAML formatting. | ||
|
||
# Options for networking | ||
network: | ||
# The IP to host on (default: 127.0.0.1). | ||
# Use 0.0.0.0 to expose on all network adapters | ||
host: 0.0.0.0 | ||
|
||
# The port to host on (default: 5000) | ||
port: 8000 | ||
# port: 8080 | ||
|
||
# Disable HTTP token authenticaion with requests | ||
# WARNING: This will make your instance vulnerable! | ||
# Turn on this option if you are ONLY connecting from localhost | ||
disable_auth: True | ||
|
||
# Options for logging | ||
logging: | ||
# Enable prompt logging (default: False) | ||
prompt: False | ||
|
||
# Enable generation parameter logging (default: False) | ||
generation_params: False | ||
|
||
# Options for sampling | ||
sampling: | ||
# Override preset name. Find this in the sampler-overrides folder (default: None) | ||
# This overrides default fallbacks for sampler values that are passed to the API | ||
# Server-side overrides are NOT needed by default | ||
# WARNING: Using this can result in a generation speed penalty | ||
override_preset: sample_overrides | ||
|
||
# Options for model overrides and loading | ||
model: | ||
# Overrides the directory to look for models (default: models) | ||
# Windows users, DO NOT put this path in quotes! This directory will be invalid otherwise. | ||
model_dir: models | ||
|
||
# An initial model to load. Make sure the model is located in the model directory! | ||
# A model can be loaded later via the API. | ||
# REQUIRED: This must be filled out to load a model on startup! | ||
# model_name: | ||
# model_name: TinyLlama-1B-exl2 | ||
# model_name: Mixtral-8x7B-instruct-exl2 | ||
# model_name: Mixtral-8x7B-Instruct-v0.1 | ||
model_name: Trendyol-LLM-7b-chat-v0.1 | ||
# model_name: miquliz-120b-v2.0-5.0bpw-h6-exl2 | ||
# model_name: aya-101 | ||
|
||
# Sends dummy model names when the models endpoint is queried | ||
# Enable this if the program is looking for a specific OAI model | ||
#use_dummy_models: False | ||
|
||
# The below parameters apply only if model_name is set | ||
|
||
# Max sequence length (default: Empty) | ||
# Fetched from the model's base sequence length in config.json by default | ||
#max_seq_len: | ||
|
||
# Overrides base model context length (default: Empty) | ||
# WARNING: Don't set this unless you know what you're doing! | ||
# Only use this if the model's base sequence length in config.json is incorrect (ex. Mistral/Mixtral models) | ||
# override_base_seq_len: 30000 # TODO: What is this ?? | ||
|
||
# Automatically allocate resources to GPUs (default: True) | ||
# gpu_split_auto: False | ||
|
||
# An integer array of GBs of vram to split between GPUs (default: []) | ||
# gpu_split: [50, 50] | ||
|
||
# Rope scale (default: 1.0) | ||
# Same thing as compress_pos_emb | ||
# Only use if your model was trained on long context with rope (check config.json) | ||
# Leave blank to pull the value from the model | ||
#rope_scale: 1.0 | ||
|
||
# Rope alpha (default: 1.0) | ||
# Same thing as alpha_value | ||
# Leave blank to automatically calculate alpha | ||
#rope_alpha: 1.0 | ||
|
||
# Disable Flash-attention 2. Set to True for GPUs lower than Nvidia's 3000 series. (default: False) | ||
#no_flash_attention: False | ||
|
||
# Enable 8 bit cache mode for VRAM savings (slight performance hit). Possible values FP16, FP8. (default: FP16) | ||
#cache_mode: FP16 | ||
|
||
# Set the prompt template for this model. If empty, chat completions will be disabled. (default: Empty) | ||
# NOTE: Only works with chat completion message lists! | ||
# prompt_template: chatml | ||
# prompt_template: zephyr # For tiny llama | ||
prompt_template: mixtral | ||
# prompt_template: mistral-instruct | ||
# prompt_template: mistral-official # NOTE: Not working | ||
|
||
# Number of experts to use PER TOKEN. Fetched from the model's config.json if not specified (default: Empty) | ||
# WARNING: Don't set this unless you know what you're doing! | ||
# NOTE: For MoE models (ex. Mixtral) only! | ||
#num_experts_per_token: | ||
|
||
# Enables CFG support (default: False) | ||
# WARNING: This flag disables Flash Attention! (a stopgap fix until it's fixed in upstream) | ||
#use_cfg: False | ||
|
||
# Enables fasttensors to possibly increase model loading speeds (default: False) | ||
fasttensors: true | ||
|
||
# Options for draft models (speculative decoding). This will use more VRAM! | ||
#draft: | ||
# Overrides the directory to look for draft (default: models) | ||
#draft_model_dir: models | ||
|
||
# An initial draft model to load. Make sure this model is located in the model directory! | ||
# A draft model can be loaded later via the API. | ||
#draft_model_name: A model name | ||
|
||
# Rope scale for draft models (default: 1.0) | ||
# Same thing as compress_pos_emb | ||
# Only use if your draft model was trained on long context with rope (check config.json) | ||
#draft_rope_scale: 1.0 | ||
|
||
# Rope alpha for draft model (default: 1.0) | ||
# Same thing as alpha_value | ||
# Leave blank to automatically calculate alpha value | ||
#draft_rope_alpha: 1.0 | ||
|
||
# Options for loras | ||
#lora: | ||
# Overrides the directory to look for loras (default: loras) | ||
#lora_dir: loras | ||
|
||
# List of loras to load and associated scaling factors (default: 1.0). Comment out unused entries or add more rows as needed. | ||
#loras: | ||
#- name: lora1 | ||
# scaling: 1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }} | ||
|
||
{% for message in messages %} | ||
{% if message['role'] == 'user' %} | ||
### Instruction: | ||
{{ message['content']|trim -}} | ||
{% if not loop.last %} | ||
|
||
|
||
{% endif %} | ||
{% elif message['role'] == 'assistant' %} | ||
### Response: | ||
{{ message['content']|trim -}} | ||
{% if not loop.last %} | ||
|
||
|
||
{% endif %} | ||
{% elif message['role'] == 'user_context' %} | ||
### Input: | ||
{{ message['content']|trim -}} | ||
{% if not loop.last %} | ||
|
||
|
||
{% endif %} | ||
{% endif %} | ||
{% endfor %} | ||
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %} | ||
### Response: | ||
{% endif %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %} | ||
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{% if messages[0]['role'] == 'system' %} | ||
{% set offset = 1 %} | ||
{% else %} | ||
{% set offset = 0 %} | ||
{% endif %} | ||
{{ bos_token }} | ||
{% for message in messages %} | ||
{% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %} | ||
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} | ||
{% endif %} | ||
{{ '<|im_start|>' + message['role'] + '\n' + message['content'].strip() + '<|im_end|>\n' }} | ||
{% endfor %} | ||
{% if add_generation_prompt %} | ||
{{ '<|im_start|>assistant\n' }} | ||
{% endif %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{% if messages[0]['role'] == 'system' %} | ||
{% set loop_messages = messages[1:] %} | ||
{% set system_message = messages[0]['content'].strip() + '\n\n' %} | ||
{% else %} | ||
{% set loop_messages = messages %} | ||
{% set system_message = '' %} | ||
{% endif %} | ||
{{ bos_token }} | ||
{% for message in loop_messages %} | ||
{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} | ||
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} | ||
{% endif %} | ||
{% if loop.index0 == 0 %} | ||
{% set content = system_message + message['content'] %} | ||
{% else %} | ||
{% set content = message['content'] %} | ||
{% endif %} | ||
{% if message['role'] == 'user' %} | ||
{{ '[INST] ' + content.strip() + ' [/INST]' }} | ||
{% elif message['role'] == 'assistant' %} | ||
{{ ' ' + content.strip() + ' ' + eos_token }} | ||
{% endif %} | ||
{% endfor %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{{- bos_token -}} | ||
{% if bos_token|length > 0 %} | ||
{{- ' ' -}} | ||
{% endif %} | ||
{% for message in messages %} | ||
{% if message['role'] == 'system' %} | ||
{{- message['content'] -}} | ||
{% elif message['role'] == 'user' %} | ||
{{- '[INST] ' + message['content'] + ' [/INST]' -}} | ||
{% elif message['role'] == 'assistant' %} | ||
{{- message['content'] + eos_token -}} | ||
{% else %} | ||
{{ raise_exception('Only user, assistant, and system roles are supported!') }} | ||
{% endif %} | ||
{% endfor %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{% if messages[0]['role'] == 'system' %} | ||
{% set loop_messages = messages[1:] %} | ||
{% set system_message = messages[0]['content'].strip() + '\n\n' %} | ||
{% else %} | ||
{% set loop_messages = messages %} | ||
{% set system_message = '' %} | ||
{% endif %} | ||
{{ bos_token + system_message }} | ||
{% for message in loop_messages %} | ||
{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} | ||
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} | ||
{% endif %} | ||
{% if message['role'] == 'user' %} | ||
{{ 'USER: ' + message['content'].strip() + '\n' }} | ||
{% elif message['role'] == 'assistant' %} | ||
{{ 'ASSISTANT: ' + message['content'].strip() + eos_token + '\n' }} | ||
{% endif %} | ||
{% endfor %} | ||
{% if add_generation_prompt %} | ||
{{ 'ASSISTANT:' }} | ||
{% endif %} |
Oops, something went wrong.