Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into jh/persistent_kernel_…
Browse files Browse the repository at this point in the history
…impl
  • Loading branch information
jacobhinkle committed Feb 6, 2025
2 parents 4d0226c + 714f974 commit 52d2bca
Show file tree
Hide file tree
Showing 61 changed files with 2,932 additions and 738 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ jobs:
OPENAI__KEY: ${{ secrets.LLM_OPENAI__KEY }}
OPENAI__API_BASE: ${{ secrets.LLM_OPENAI__API_BASE }}
CONFIG__MODEL: ${{ secrets.LLM_CONFIG__MODEL }}
CONFIG__CUSTOM_MODEL_MAX_TOKENS: 131072
CONFIG__CUSTOM_MODEL_MAX_TOKENS: 32768
CONFIG__FALLBACK_MODELS: '[]'

CONFIG__MAX_MODEL_TOKENS: 65536
CONFIG__MAX_MODEL_TOKENS: 32768
CONFIG__PUBLISH_OUTPUT_PROGRESS: false

PR_REVIEWER__REQUIRE_SCORE_REVIEW: false
Expand Down
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ list(APPEND NVFUSER_SRCS
${NVFUSER_SRCS_DIR}/preseg_passes/remove_empty.cpp
${NVFUSER_SRCS_DIR}/preseg_passes/reorder_sharded_axis.cpp
${NVFUSER_SRCS_DIR}/preseg_passes/segment_inplace_update.cpp
${NVFUSER_SRCS_DIR}/preseg_passes/translate_no_reduction_matmul_to_mul_squeeze.cpp
${NVFUSER_SRCS_DIR}/preseg_passes/translate_repeat_to_expand.cpp
${NVFUSER_SRCS_DIR}/rng.cpp
${NVFUSER_SRCS_DIR}/runtime/allocations.cpp
Expand Down Expand Up @@ -867,6 +868,7 @@ list(APPEND NVFUSER_RUNTIME_FILES
${NVFUSER_ROOT}/runtime/mbarrier.cu
${NVFUSER_ROOT}/runtime/memory.cu
${NVFUSER_ROOT}/runtime/random_numbers.cu
${NVFUSER_ROOT}/runtime/tensor_memory.cu
${NVFUSER_ROOT}/runtime/tensor.cu
${NVFUSER_ROOT}/runtime/tuple.cu
${NVFUSER_ROOT}/runtime/type_traits.cu
Expand Down
105 changes: 105 additions & 0 deletions benchmarks/python/model_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-present NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
from functools import partial

from transformers import AutoConfig


def llama_hf_cfg(config_str):
class Config:
def __init__(
self, n_head, head_size, n_query_groups, rope_n_elem, batches, seq_length
):
self.n_head = n_head
self.head_size = head_size
self.n_query_groups = n_query_groups
self.rope_n_elem = rope_n_elem
self.batches = batches
self.seq_length = seq_length

configs = {}
configs["llama_2_7b_hf"] = Config(
n_head=32,
head_size=128,
n_query_groups=32,
rope_n_elem=128,
batches=2,
seq_length=4096,
)
configs["llama_3_8B"] = Config(
n_head=32,
head_size=128,
n_query_groups=8,
rope_n_elem=128,
batches=2,
seq_length=8192,
)

return configs[config_str]


def hf_qwen2_cfg():
config = AutoConfig.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
config.batch_size = 1
config.seq_len = 4096
config._attn_implementation = "sdpa"
return config


def hf_phi3_cfg():
config = AutoConfig.from_pretrained("microsoft/Phi-3.5-mini-instruct")
config.batch_size = 1
config.seq_len = 8192
config._attn_implementation = "sdpa"
return config


def hf_mistral_nemo_cfg():
import json
from transformers.models.mistral import MistralConfig

mistral_cfg_str = r"""{
"_name_or_path": "mistralai/Mistral-Nemo-Base-2407",
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 5120,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 128000,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 40,
"num_key_value_heads": 8,
"rms_norm_eps": 1e-05,
"rope_theta": 1000000.0,
"sliding_window": null,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.43.3",
"use_cache": true,
"vocab_size": 131072
}
"""

cfg = MistralConfig.from_dict(json.loads(mistral_cfg_str))
cfg.batch_size = 1
cfg.seq_len = 4096
cfg._attn_implementation = "sdpa"

return cfg


configs = {
"llama_2_7b_hf": partial(llama_hf_cfg, config_str="llama_2_7b_hf"),
"llama_3_8B": partial(llama_hf_cfg, config_str="llama_3_8B"),
"hf_qwen2": hf_qwen2_cfg,
"hf_phi3": hf_phi3_cfg,
"hf_mistral_nemo": hf_mistral_nemo_cfg,
}
Loading

0 comments on commit 52d2bca

Please sign in to comment.