Skip to content

Commit 03872a4

Browse files
authored
improve benchmark tput by moving prompt preparation outside of loop (#54)
1 parent afd137a commit 03872a4

File tree

2 files changed

+19
-15
lines changed

2 files changed

+19
-15
lines changed

src/llmperf/utils.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ def randomly_sample_sonnet_lines_prompt(
6060
prompt_tokens_mean: int = 550,
6161
prompt_tokens_stddev: int = 250,
6262
expect_output_tokens: int = 150,
63+
tokenizer = LlamaTokenizerFast.from_pretrained(
64+
"hf-internal-testing/llama-tokenizer")
6365
) -> Tuple[str, int]:
6466
"""Generate a prompt that randomly samples lines from a the shakespeare sonnet at sonnet.txt.
6567
@@ -80,10 +82,6 @@ def randomly_sample_sonnet_lines_prompt(
8082
A tuple of the prompt and the length of the prompt.
8183
"""
8284

83-
tokenizer = LlamaTokenizerFast.from_pretrained(
84-
"hf-internal-testing/llama-tokenizer"
85-
)
86-
8785
get_token_length = lambda text: len(tokenizer.encode(text))
8886

8987
prompt = (

token_benchmark_ray.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,21 @@ def get_token_throughput_latencies(
7171
req_launcher = RequestsLauncher(clients)
7272
completed_requests = []
7373
num_completed_requests = 0
74+
# make up prompts outside of send loop for faster benchmarking loop
75+
num_output_tokens_list = []
76+
prompts = []
77+
for i in range(max_num_completed_requests):
78+
num_output_tokens = (sample_random_positive_int(
79+
mean_output_tokens, stddev_output_tokens
80+
))
81+
num_output_tokens_list.append(num_output_tokens)
82+
83+
prompts.append(randomly_sample_sonnet_lines_prompt(
84+
prompt_tokens_mean=mean_input_tokens,
85+
prompt_tokens_stddev=stddev_input_tokens,
86+
expect_output_tokens=num_output_tokens,
87+
tokenizer=tokenizer
88+
))
7489
start_time = time.monotonic()
7590
iter = 0
7691
pbar = tqdm(total=max_num_completed_requests)
@@ -79,21 +94,12 @@ def get_token_throughput_latencies(
7994
and len(completed_requests) < max_num_completed_requests
8095
):
8196
iter += 1
82-
num_output_tokens = sample_random_positive_int(
83-
mean_output_tokens, stddev_output_tokens
84-
)
85-
86-
prompt = randomly_sample_sonnet_lines_prompt(
87-
prompt_tokens_mean=mean_input_tokens,
88-
prompt_tokens_stddev=stddev_input_tokens,
89-
expect_output_tokens=num_output_tokens,
90-
)
9197

92-
default_sampling_params = {"max_tokens": num_output_tokens}
98+
default_sampling_params = {"max_tokens": num_output_tokens_list.pop()}
9399
default_sampling_params.update(additional_sampling_params)
94100
request_config = RequestConfig(
95101
model=model,
96-
prompt=prompt,
102+
prompt=prompts.pop(),
97103
sampling_params=default_sampling_params,
98104
llm_api=llm_api,
99105
)

0 commit comments

Comments
 (0)