Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ def get_loss(self):
line_words = line.split(self.separator) if self.separator else line.split()
for i in range(len(line_words) - 1):
if line_words[i] == self.convergence_key:
result_loss = line_words[i + 1]
# train_tokens_per_second = 11210.536
result_loss = line_words[i + 2]
result_loss = result_loss.replace(',', '')
raise ExceptionTest()
except ExceptionTest:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ mkdir -p /opt/${model_name_or_path} && cd /opt/${model_name_or_path}
export no_proxy=bcebos.com
case ${model_name_or_path} in
shakechen/Llama-2-7b-hf)
echo '
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/shakechen/Llama-2-7b-hf/LICENSE.txt
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/shakechen/Llama-2-7b-hf/README.md
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/shakechen/Llama-2-7b-hf/Responsible-Use-Guide.pdf
Expand All @@ -42,8 +43,11 @@ shakechen/Llama-2-7b-hf)
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/shakechen/Llama-2-7b-hf/tokenizer.json
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/shakechen/Llama-2-7b-hf/tokenizer.model
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/shakechen/Llama-2-7b-hf/tokenizer_config.json
' > wget_7b.sh
bash wget_7b.sh
echo "download models for shakechen/Llama-2-7b-hf done" ;;
ydyajyA/Llama-2-13b-chat-hf)
echo '
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/ydyajyA/Llama-2-13b-chat-hf/config.json
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/ydyajyA/Llama-2-13b-chat-hf/configuration.json
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/ydyajyA/Llama-2-13b-chat-hf/generation_config.json
Expand All @@ -60,27 +64,29 @@ ydyajyA/Llama-2-13b-chat-hf)
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/ydyajyA/Llama-2-13b-chat-hf/tokenizer.model
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/ydyajyA/Llama-2-13b-chat-hf/tokenizer_config.json
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/ydyajyA/Llama-2-13b-chat-hf/up.ipynb
' > wget_13b.sh
bash wget_13b.sh
echo "download models for ydyajyA/Llama-2-13b-chat-hf done" ;;
meta-llama/Llama-2-70b-hf)
nums1=("00001" "00002" "00003" "00004" "00005")
for num in "${nums1[@]}"; do
url="https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-${num}-of-00015.safetensors"
wget -c "${url}" &
done
wait
nums2=("00006" "00007" "00008" "00009" "00010")
for num in "${nums2[@]}"; do
url="https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-${num}-of-00015.safetensors"
wget -c "${url}" &
done
wait
nums3=("00011" "00012" "00013" "00014" "00015")
for num in "${nums3[@]}"; do
url="https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-${num}-of-00015.safetensors"
wget -c "${url}" &
done
wait

# nums1=("00001" "00002" "00003" "00004" "00005")
# for num in "${nums1[@]}"; do
# url="https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-${num}-of-00015.safetensors"
# wget -c "${url}" &
# done
# wait
# nums2=("00006" "00007" "00008" "00009" "00010")
# for num in "${nums2[@]}"; do
# url="https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-${num}-of-00015.safetensors"
# wget -c "${url}" &
# done
# wait
# nums3=("00011" "00012" "00013" "00014" "00015")
# for num in "${nums3[@]}"; do
# url="https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-${num}-of-00015.safetensors"
# wget -c "${url}" &
# done
# wait
echo '
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/LICENSE.txt
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/README.md
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/Responsible-Use-Guide.pdf
Expand All @@ -89,25 +95,27 @@ meta-llama/Llama-2-70b-hf)
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/configuration.json
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/generation_config.json
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/llama_updates.patch
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00001-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00002-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00003-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00004-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00005-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00006-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00007-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00008-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00009-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00010-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00011-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00012-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00013-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00014-of-00015.safetensors
# wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00015-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00001-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00002-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00003-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00004-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00005-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00006-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00007-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00008-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00009-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00010-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00011-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00012-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00013-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00014-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model-00015-of-00015.safetensors
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/model.safetensors.index.json
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/special_tokens_map.json
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/tokenizer.model
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/models/huggingface/meta-llama/Llama-2-70b-hf/tokenizer_config.json
' > wget_70b.sh
bash wget_70b.sh
echo "download models for meta-llama/Llama-2-70b-hf done" ;;
*)
echo "${model_name_or_path} not in bos, download from modelscope";
Expand All @@ -119,8 +127,7 @@ esac
cd -


mv -v data data_bak
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/llm_benchmark_data/llamafactory_data.tar.gz
tar zxf llamafactory_data.tar.gz && rm -rf llamafactory_data.tar.gz
tar zxf llamafactory_data.tar.gz
wget --no-proxy -c https://paddlenlp.bj.bcebos.com/llm_benchmark_data/deepspeed.tar.gz
tar zxf deepspeed.tar.gz && rm -rf deepspeed.tar.gz
tar zxf deepspeed.tar.gz
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ function _set_params(){
speed_unit="effective_tokens/sec" # (必选)速度指标单位
skip_steps=0 # (必选)解析日志,跳过模型前几个性能不稳定的step
keyword="effective_tokens_per_sec" # (必选)解析日志,筛选出性能数据所在行的关键字
convergence_key="total_tokens:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:"
convergence_key="train_tokens_per_second" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:"
max_iter=${8:-"100"} # (可选)需保证模型执行时间在5分钟内,需要修改代码提前中断的直接提PR 合入套件 或是max_epoch
num_workers=${9:-"3"} # (可选)
is_large_model=True
Expand Down Expand Up @@ -91,11 +91,11 @@ function _train(){
export TOKENS_PER_STEP=131072
fi
timeout 40m ${train_cmd} > ${log_file} 2>&1
Tokens_per_second_per_gpu=`cat ${log_file} | grep 'train_samples_per_second =' \
|awk -F'= ' '{print $2}' |awk -F' ' '{print $1}'`
length=4096
Total_Tokens_per_second_per_gpu=$(awk -v a="$Tokens_per_second_per_gpu" -v b="$length" 'BEGIN {printf "%.2f\n", a * b}')
echo "total_tokens: ${Total_Tokens_per_second_per_gpu}" >> ${log_file}
# Tokens_per_second_per_gpu=`cat ${log_file} | grep 'train_samples_per_second =' \
# |awk -F'= ' '{print $2}' |awk -F' ' '{print $1}'`
# length=4096
# Total_Tokens_per_second_per_gpu=$(awk -v a="$Tokens_per_second_per_gpu" -v b="$length" 'BEGIN {printf "%.2f\n", a * b}')
# echo "total_tokens: ${Total_Tokens_per_second_per_gpu}" >> ${log_file}
# 这个判断,无论是否成功都是0
if [ $? -ne 0 ];then
echo -e "${model_name}, FAIL"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ def get_loss(self):
line_words = line.split(self.separator) if self.separator else line.split()
for i in range(len(line_words) - 1):
if line_words[i] == self.convergence_key:
result_loss = line_words[i + 1]
# train_tokens_per_second = 11210.536
result_loss = line_words[i + 2]
result_loss = result_loss.replace(',', '')
raise ExceptionTest()
except ExceptionTest:
Expand Down
Loading