Skip to content

Commit 4349c50

Browse files
function47meta-codesync[bot]
authored andcommitted
Add MCCL Backend Override cvars for mccl.
Summary: This diff adds a new CVARs MCCL_CTRAN_BACKEND that allow mcclComm to directly override the backends individually. This CVARs will affect on the ctran hints when creating a Ctran. This diff create a individual path for spcifying the ctran backends without interruption to existing backends selection for other comms like NCCLx. Reviewed By: dboyda, saifhhasan Differential Revision: D86116884 fbshipit-source-id: 704bce46ab3888e065aa8c39d2a757df2dc04c36
1 parent 0514666 commit 4349c50

File tree

3 files changed

+51
-2
lines changed

3 files changed

+51
-2
lines changed

comms/utils/cvars/nccl_cvars.cc

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Copyright (c) Meta Platforms, Inc. and affiliates.
2-
// @generated SignedSource<<12578d0c7386c85de2417d28efac7a19e1861246>>
2+
// @generated SignedSource<<235577956cedef2ff545bc7d628c065308a4075d>>
33
// Automatically generated by ./comms/utils/cvars/extractcvars.py --- START
44
// DO NOT EDIT!!!
55

@@ -292,6 +292,8 @@ void ncclCvarInit() {
292292

293293
bool CUDA_LAUNCH_BLOCKING;
294294
bool CUDA_LAUNCH_BLOCKING_DEFAULT;
295+
std::vector<enum MCCL_CTRAN_BACKENDS> MCCL_CTRAN_BACKENDS;
296+
std::vector<enum MCCL_CTRAN_BACKENDS> MCCL_CTRAN_BACKENDS_DEFAULT;
295297
std::string NCCL_ALGO;
296298
std::string NCCL_ALGO_DEFAULT;
297299
enum NCCL_ALLGATHER_ALGO NCCL_ALLGATHER_ALGO;
@@ -1516,6 +1518,7 @@ std::unordered_map<std::string, bool*> env_bool_values = {
15161518

15171519
static void initEnvSet(std::unordered_set<std::string>& env) {
15181520
env.insert("CUDA_LAUNCH_BLOCKING");
1521+
env.insert("MCCL_CTRAN_BACKENDS");
15191522
env.insert("NCCL_ALGO");
15201523
env.insert("NCCL_ALLGATHER_ALGO");
15211524
env.insert("NCCL_ALLGATHER_P_ALGO");
@@ -1935,6 +1938,31 @@ static void readCvarEnv() {
19351938
if (CUDA_LAUNCH_BLOCKING_DEFAULT != CUDA_LAUNCH_BLOCKING) {
19361939
CVAR_INFO("NCCL Config - CVAR {} has an override", "CUDA_LAUNCH_BLOCKING");
19371940
}
1941+
{
1942+
MCCL_CTRAN_BACKENDS.clear();
1943+
auto tokens = env2strlist("MCCL_CTRAN_BACKENDS", "not_set");
1944+
for (auto token : tokens) {
1945+
if (token == std::string("not_set")) {
1946+
MCCL_CTRAN_BACKENDS.emplace_back(MCCL_CTRAN_BACKENDS::not_set);
1947+
} else if (token == std::string("ib")) {
1948+
MCCL_CTRAN_BACKENDS.emplace_back(MCCL_CTRAN_BACKENDS::ib);
1949+
} else if (token == std::string("nvl")) {
1950+
MCCL_CTRAN_BACKENDS.emplace_back(MCCL_CTRAN_BACKENDS::nvl);
1951+
} else if (token == std::string("socket")) {
1952+
MCCL_CTRAN_BACKENDS.emplace_back(MCCL_CTRAN_BACKENDS::socket);
1953+
} else if (token == std::string("tcpdm")) {
1954+
MCCL_CTRAN_BACKENDS.emplace_back(MCCL_CTRAN_BACKENDS::tcpdm);
1955+
} else {
1956+
CVAR_WARN_UNKNOWN_VALUE("MCCL_CTRAN_BACKENDS", token.c_str());
1957+
}
1958+
}
1959+
}
1960+
MCCL_CTRAN_BACKENDS_DEFAULT.clear();
1961+
MCCL_CTRAN_BACKENDS_DEFAULT.emplace_back(MCCL_CTRAN_BACKENDS::not_set);
1962+
1963+
if (MCCL_CTRAN_BACKENDS_DEFAULT != MCCL_CTRAN_BACKENDS) {
1964+
CVAR_INFO("NCCL Config - CVAR {} has an override", "MCCL_CTRAN_BACKENDS");
1965+
}
19381966
NCCL_ALGO = env2str("NCCL_ALGO", "");
19391967
NCCL_ALGO_DEFAULT = env2str("NCCL_ENV_DO_NOT_SET", "");
19401968

comms/utils/cvars/nccl_cvars.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Copyright (c) Meta Platforms, Inc. and affiliates.
2-
// @generated SignedSource<<12578d0c7386c85de2417d28efac7a19e1861246>>
2+
// @generated SignedSource<<235577956cedef2ff545bc7d628c065308a4075d>>
33
// Automatically generated by ./comms/utils/cvars/extractcvars.py --- START
44
// DO NOT EDIT!!!
55

@@ -14,6 +14,16 @@
1414
extern bool CUDA_LAUNCH_BLOCKING;
1515
extern bool CUDA_LAUNCH_BLOCKING_DEFAULT;
1616

17+
enum class MCCL_CTRAN_BACKENDS {
18+
not_set,
19+
ib,
20+
nvl,
21+
socket,
22+
tcpdm,
23+
};
24+
extern std::vector<enum MCCL_CTRAN_BACKENDS> MCCL_CTRAN_BACKENDS;
25+
extern std::vector<enum MCCL_CTRAN_BACKENDS> MCCL_CTRAN_BACKENDS_DEFAULT;
26+
1727
extern std::string NCCL_ALGO;
1828
extern std::string NCCL_ALGO_DEFAULT;
1929

comms/utils/cvars/nccl_cvars.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,3 +2720,14 @@ cvars:
27202720
type : string
27212721
default : ""
27222722
description : Used to bind NCCL's network proxy threads to specific CPU cores.
2723+
2724+
- name : MCCL_CTRAN_BACKENDS
2725+
type : enumlist
2726+
default : not_set
2727+
choices : not_set, ib, nvl, socket, tcpdm
2728+
description : |-
2729+
MCCL Backend override to enable for ctran. It should uses the same format
2730+
as NCCL_CTRAN_BACKENDS. By deafult, it is not_set, which means it will use whatever
2731+
backends specified by NCCL_CTRAN_BACKENDS.
2732+
Usage: If the NCCL_CTRAN_BACKENS is set with certain values,
2733+
this config will allow MCCL to override backends.

0 commit comments

Comments
 (0)