Skip to content

Commit

Permalink
[MultiGPU] Fix pipeline parallelism prompt message (#2855)
Browse files Browse the repository at this point in the history
This PR fixes the prompt message of pipeline parallelism when the
socket address is 0.0.0.0.

This PR also updates the positional embedding TIR function to use
int32 dtype.
  • Loading branch information
MasterJH5574 authored Aug 26, 2024
1 parent 83d0fe3 commit 7264faa
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions cpp/serve/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -744,8 +744,8 @@ class EngineImpl : public Engine {
LOG(INFO) << "Please launch " << green_text_begin << max_num_stages - 1 << colored_text_end
<< " remote socket node(s) with the following command to proceed:\n\t"
<< green_text_begin << "python -m mlc_llm.cli.disco_remote_socket_session "
<< socket_host.value() << " " << socket_port << " " << num_shards
<< colored_text_end;
<< (socket_host.value() == "0.0.0.0" ? "<YOUR_NODE_IP>" : socket_host.value())
<< " " << socket_port << " " << num_shards << colored_text_end;
const PackedFunc* f_create_socket_sess = Registry::Get("runtime.disco.SocketSession");
CHECK(f_create_socket_sess != nullptr)
<< "SocketSession constructor \"runtime.disco.SocketSession\" not found in TVM "
Expand Down
4 changes: 2 additions & 2 deletions python/mlc_llm/op/position_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,8 @@ def fused_rope( # pylint: disable=too-many-locals
"tir.noalias": T.bool(True),
}
)
seq_len = T.int64()
position_map_elem_offset = T.int64()
seq_len = T.int32()
position_map_elem_offset = T.int32()
qkv = T.match_buffer(var_qkv, (seq_len, fused_heads, head_dim), dtype)
q = T.match_buffer(var_q, (seq_len, num_q_heads, head_dim), dtype)
k = T.match_buffer(var_k, (seq_len, num_kv_heads, head_dim), dtype)
Expand Down

0 comments on commit 7264faa

Please sign in to comment.