Skip to content

Commit

Permalink
add support for SLURM block topology format
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Shmulevich <[email protected]>
  • Loading branch information
dmitsh committed Oct 21, 2024
1 parent b7d5a58 commit d469ac1
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 20 deletions.
6 changes: 6 additions & 0 deletions pkg/common/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const (
ProviderBM = "baremetal"
ProviderTest = "test"

KeyEngine = "engine"
EngineSLURM = "slurm"
EngineK8S = "k8s"
EngineTest = "test"
Expand All @@ -32,5 +33,10 @@ const (
KeyTopoConfigPath = "topology_config_path"
KeyTopoConfigmapName = "topology_configmap_name"
KeyTopoConfigmapNamespace = "topology_configmap_namespace"
KeyBlockSizes = "block_sizes"
KeySkipReload = "skip_reload"

KeyPlugin = "plugin"
ValTopologyTree = "topology/tree"
ValTopologyBlock = "topology/block"
)
4 changes: 2 additions & 2 deletions pkg/common/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ func TestPayload(t *testing.T) {
Engine: engine{
Name: "slurm",
Params: map[string]string{
"plugin": "topology/block",
"block_sizes": "30,120",
KeyPlugin: ValTopologyBlock,
KeyBlockSizes: "30,120",
},
},
Nodes: []ComputeInstances{
Expand Down
2 changes: 1 addition & 1 deletion pkg/engines/k8s/labeler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func (l *testLabeler) AddNodeLabels(_ context.Context, nodeName string, labels m
}

func TestApplyNodeLabels(t *testing.T) {
root, _ := translate.GetTestSet(true)
root, _ := translate.GetTreeTestSet(true)
labeler := &testLabeler{data: make(map[string]map[string]string)}
data := map[string]map[string]string{
"Node201": {"topology.kubernetes.io/network-level-1": "S2", "topology.kubernetes.io/network-level-2": "S1"},
Expand Down
17 changes: 12 additions & 5 deletions pkg/engines/slurm/slurm.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,12 @@ import (
"github.com/NVIDIA/topograph/pkg/utils"
)

const (
TopoTreeHeader = `
const TopologyHeader = `
###############################################################
# Slurm's network topology configuration file for use with the
# topology/tree plugin
# %s plugin
###############################################################
`
)

type SlurmEngine struct{}

Expand Down Expand Up @@ -76,7 +74,16 @@ func GenerateOutput(ctx context.Context, tree *common.Vertex, params map[string]
path := params[common.KeyTopoConfigPath]

if len(path) != 0 {
buf.WriteString(TopoTreeHeader)
var plugin string
if len(tree.Metadata) != 0 {
plugin = tree.Metadata[common.KeyPlugin]
}
if len(plugin) == 0 {
plugin = common.ValTopologyTree
}
if _, err := buf.WriteString(fmt.Sprintf(TopologyHeader, plugin)); err != nil {
return nil, err
}
}

err := translate.ToSLURM(buf, tree)
Expand Down
2 changes: 1 addition & 1 deletion pkg/factory/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type testProvider struct {

func GetTestProvider() *testProvider {
p := &testProvider{}
p.tree, p.instance2node = translate.GetTestSet(false)
p.tree, p.instance2node = translate.GetTreeTestSet(false)

return p
}
Expand Down
11 changes: 6 additions & 5 deletions pkg/providers/baremetal/mnnvl.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ import (
"bufio"
"context"
"fmt"
"github.com/NVIDIA/topograph/pkg/common"
"github.com/NVIDIA/topograph/pkg/utils"
"strconv"
"strings"

"github.com/NVIDIA/topograph/pkg/common"
"github.com/NVIDIA/topograph/pkg/utils"
)

// domain contains map of each domainID(clusterUUID) -> list of nodeNames in that domain
Expand Down Expand Up @@ -84,9 +85,9 @@ func toGraph(domainMap map[string]domain) *common.Vertex {
root.Vertices[domainName] = tree
}
// add root metadata
root.Metadata["engine"] = "slurm"
root.Metadata["plugin"] = "topology/block"
root.Metadata["blocksize"] = strconv.Itoa(blockSize)
root.Metadata[common.KeyEngine] = common.EngineSLURM
root.Metadata[common.KeyPlugin] = common.ValTopologyBlock
root.Metadata[common.KeyBlockSizes] = strconv.Itoa(blockSize)
return root
}

Expand Down
59 changes: 58 additions & 1 deletion pkg/translate/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@ import (
)

func ToSLURM(wr io.Writer, root *common.Vertex) error {
if len(root.Metadata) != 0 && root.Metadata[common.KeyPlugin] == common.ValTopologyBlock {
return toBlockSLURM(wr, root, root.Metadata[common.KeyBlockSizes])
}
return toTreeSLURM(wr, root)
}

func toBlockSLURM(wr io.Writer, root *common.Vertex, blocksizes string) error {
for _, block := range root.Vertices {
nodes := make([]string, 0, len(block.Vertices))
for _, node := range block.Vertices {
nodes = append(nodes, node.Name)
}
_, err := wr.Write([]byte(fmt.Sprintf("BlockName=%s Nodes=%s\n", block.ID, strings.Join(compress(nodes), ","))))
if err != nil {
return err
}
}
_, err := wr.Write([]byte(fmt.Sprintf("BlockSizes=%s\n", blocksizes)))
return err
}

func toTreeSLURM(wr io.Writer, root *common.Vertex) error {
visited := make(map[string]bool)
leaves := make(map[string][]string)
parents := []*common.Vertex{}
Expand Down Expand Up @@ -197,7 +219,7 @@ func split(input string) (string, string) {
return input[:i], input[i:]
}

func GetTestSet(testForLongLabelName bool) (*common.Vertex, map[string]string) {
func GetTreeTestSet(testForLongLabelName bool) (*common.Vertex, map[string]string) {
var s3name string
if testForLongLabelName {
s3name = "S3very-very-long-id-to-check-label-value-limits-of-63-characters"
Expand Down Expand Up @@ -236,3 +258,38 @@ func GetTestSet(testForLongLabelName bool) (*common.Vertex, map[string]string) {

return root, instance2node
}

func GetBlockTestSet() (*common.Vertex, map[string]string) {
instance2node := map[string]string{
"I14": "Node104", "I15": "Node105", "I16": "Node106",
"I21": "Node201", "I22": "Node202", "I25": "Node205",
}

n14 := &common.Vertex{ID: "I14", Name: "Node104"}
n15 := &common.Vertex{ID: "I15", Name: "Node105"}
n16 := &common.Vertex{ID: "I16", Name: "Node106"}

n21 := &common.Vertex{ID: "I21", Name: "Node201"}
n22 := &common.Vertex{ID: "I22", Name: "Node202"}
n25 := &common.Vertex{ID: "I25", Name: "Node205"}

block1 := &common.Vertex{
ID: "B1",
Vertices: map[string]*common.Vertex{"I14": n14, "I15": n15, "I16": n16},
}
block2 := &common.Vertex{
ID: "B2",
Vertices: map[string]*common.Vertex{"I21": n21, "I22": n22, "I25": n25},
}

root := &common.Vertex{
Vertices: map[string]*common.Vertex{"B1": block1, "B2": block2},
Metadata: map[string]string{
common.KeyEngine: common.EngineSLURM,
common.KeyPlugin: common.ValTopologyBlock,
common.KeyBlockSizes: "8",
},
}

return root, instance2node
}
34 changes: 29 additions & 5 deletions pkg/translate/output_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,26 @@ import (
)

const (
testConfig1 = `SwitchName=S1 Switches=S[2-3]
testTreeConfig1 = `SwitchName=S1 Switches=S[2-3]
SwitchName=S2 Nodes=Node[201-202],Node205
SwitchName=S3 Nodes=Node[304-306]
`

testConfig2 = `SwitchName=S1 Switches=S[2-3]
testTreeConfig2 = `SwitchName=S1 Switches=S[2-3]
SwitchName=S3 Nodes=Node[304-306]
SwitchName=S2 Nodes=Node[201-202],Node205
`

testBlockConfig1 = `BlockName=B1 Nodes=Node[104-106]
BlockName=B2 Nodes=Node[201-202],Node205
BlockSizes=8
`

testBlockConfig2 = `BlockName=B2 Nodes=Node[201-202],Node205
BlockName=B1 Nodes=Node[104-106]
BlockSizes=8
`

shortNameExpectedResult = `# switch.3.1=hpcislandid-1
SwitchName=switch.3.1 Switches=switch.2.[1-2]
# switch.2.1=network-block-1
Expand All @@ -47,13 +58,26 @@ SwitchName=switch.1.2 Nodes=node-2
`
)

func TestToSLURM(t *testing.T) {
v, _ := GetTestSet(false)
func TestToTreeSLURM(t *testing.T) {
v, _ := GetTreeTestSet(false)
buf := &bytes.Buffer{}
err := ToSLURM(buf, v)
require.NoError(t, err)
switch buf.String() {
case testTreeConfig1, testTreeConfig2:
// nop
default:
t.Errorf("unexpected result %s", buf.String())
}
}

func TestToBlockSLURM(t *testing.T) {
v, _ := GetBlockTestSet()
buf := &bytes.Buffer{}
err := ToSLURM(buf, v)
require.NoError(t, err)
switch buf.String() {
case testConfig1, testConfig2:
case testBlockConfig1, testBlockConfig2:
// nop
default:
t.Errorf("unexpected result %s", buf.String())
Expand Down

0 comments on commit d469ac1

Please sign in to comment.