Skip to content

WIP: use netlink instead of calling iproute2 commands #1990

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 22 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5343bef
incusd/ip/utils: Switch to netlink
gwenya Apr 25, 2025
f981481
incusd/ip/addr: Switch to netlink
gwenya Apr 23, 2025
e178d2a
incusd/ip/class: Switch to netlink
gwenya Apr 23, 2025
ab64cea
incusd/ip/filter: Switch to netlink
gwenya Apr 23, 2025
ff84cd8
incusd/ip/link: Switch to netlink (incomplete)
gwenya Apr 24, 2025
d8cc640
incusd/ip/neigh: Switch to netlink
gwenya Apr 24, 2025
6a94358
incusd/ip/neigh_proxy: Switch to netlink
gwenya Apr 24, 2025
d3c2e07
incusd/ip/qdisc: Switch to netlink
gwenya Apr 24, 2025
70a007e
incusd/ip/route: Switch to netlink
gwenya Apr 25, 2025
12e8d1e
incusd/ip/tuntap: Switch to netlink
gwenya Apr 25, 2025
dabad91
incusd/ip/vdpa: Switch to vishvananda/netlink library instead of doin…
gwenya Apr 25, 2025
37dccfc
incusd/ip: Refactor family from string to `Family` type
gwenya Apr 26, 2025
2ed3317
incusd/ip/filter: Refactor Rate, Burst and Mtu from strings to integers
gwenya Apr 27, 2025
7ffc44d
incusd/ip/filter: Refactor Mask and Value from strings to integers
gwenya Apr 27, 2025
eafa5c6
incusd/ip/link_vxlan: Refactor VxLanID, DstPort and TTL from string t…
gwenya Apr 27, 2025
dbea830
incusd/ip/neigh: Refactor NeighbourIPState from string to int
gwenya Apr 27, 2025
a01c26c
incusd/ip/qdisc_htb: Refactor Default from string to int
gwenya Apr 27, 2025
828bb69
incusd/ip/vdpa: Remove unused
gwenya Apr 27, 2025
7031eeb
incusd/ip: Merge GetLinkInfoByName and LinkFromName into LinkByName
gwenya Apr 27, 2025
924191d
incusd/ip/link: Refactor SetVfSpoofchk mode parameter to bool
gwenya Apr 27, 2025
45bed3e
fixup! incusd/ip/link: Switch to netlink (incomplete)
gwenya Apr 27, 2025
d3207fe
fixup! incusd/ip/link: Switch to netlink (incomplete)
gwenya Apr 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 25 additions & 21 deletions internal/server/device/device_utils_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package device

import (
"context"
"errors"
"fmt"
"net"
"net/netip"
Expand All @@ -14,6 +15,7 @@ import (

"github.com/mdlayher/arp"
"github.com/mdlayher/ndp"
"golang.org/x/sys/unix"

deviceConfig "github.com/lxc/incus/v6/internal/server/device/config"
pcidev "github.com/lxc/incus/v6/internal/server/device/pci"
Expand Down Expand Up @@ -528,14 +530,21 @@ func networkSetupHostVethLimits(d *deviceCommon, oldConfig deviceConfig.Device,
}

// Clean any existing entry
qdisc := &ip.Qdisc{Dev: veth, Root: true}
_ = qdisc.Delete()
qdisc = &ip.Qdisc{Dev: veth, Ingress: true}
_ = qdisc.Delete()
qdiscIngress := &ip.QdiscIngress{Qdisc: ip.Qdisc{Dev: veth, Handle: "ffff:0"}}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stgraber should I put these kinds of changes into their own commit or leave them together with the corresponding change in the ip package?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's fine to leave them with the change I think

err = qdiscIngress.Delete()
if err != nil && !errors.Is(err, unix.ENOENT) {
return err
}

qdiscHTB := &ip.QdiscHTB{Qdisc: ip.Qdisc{Dev: veth, Handle: "1:0", Parent: "root"}}
err = qdiscHTB.Delete()
if err != nil && !errors.Is(err, unix.ENOENT) {
return err
}

// Apply new limits
if d.config["limits.ingress"] != "" {
qdiscHTB := &ip.QdiscHTB{Qdisc: ip.Qdisc{Dev: veth, Handle: "1:0", Root: true}, Default: "10"}
qdiscHTB = &ip.QdiscHTB{Qdisc: ip.Qdisc{Dev: veth, Handle: "1:0", Parent: "root"}, Default: 10}
err := qdiscHTB.Add()
if err != nil {
return fmt.Errorf("Failed to create root tc qdisc: %s", err)
Expand All @@ -547,22 +556,22 @@ func networkSetupHostVethLimits(d *deviceCommon, oldConfig deviceConfig.Device,
return fmt.Errorf("Failed to create limit tc class: %s", err)
}

filter := &ip.U32Filter{Filter: ip.Filter{Dev: veth, Parent: "1:0", Protocol: "all", Flowid: "1:1"}, Value: "0", Mask: "0"}
filter := &ip.U32Filter{Filter: ip.Filter{Dev: veth, Parent: "1:0", Protocol: "all", Flowid: "1:1"}, Value: 0, Mask: 0}
err = filter.Add()
if err != nil {
return fmt.Errorf("Failed to create tc filter: %s", err)
}
}

if d.config["limits.egress"] != "" {
qdisc = &ip.Qdisc{Dev: veth, Handle: "ffff:0", Ingress: true}
err := qdisc.Add()
qdiscIngress = &ip.QdiscIngress{Qdisc: ip.Qdisc{Dev: veth, Handle: "ffff:0"}}
err := qdiscIngress.Add()
if err != nil {
return fmt.Errorf("Failed to create ingress tc qdisc: %s", err)
}

police := &ip.ActionPolice{Rate: fmt.Sprintf("%dbit", egressInt), Burst: fmt.Sprintf("%d", egressInt/40), Mtu: "64kb", Drop: true}
filter := &ip.U32Filter{Filter: ip.Filter{Dev: veth, Parent: "ffff:0", Protocol: "all"}, Value: "0", Mask: "0", Actions: []ip.Action{police}}
police := &ip.ActionPolice{Rate: uint32(egressInt / 8), Burst: uint32(egressInt / 40), Mtu: 65535, Drop: true}
filter := &ip.U32Filter{Filter: ip.Filter{Dev: veth, Parent: "ffff:0", Protocol: "all"}, Value: 0, Mask: 0, Actions: []ip.Action{police}}
err = filter.Add()
if err != nil {
return fmt.Errorf("Failed to create ingress tc filter: %s", err)
Expand Down Expand Up @@ -691,7 +700,7 @@ func bgpRemovePrefix(d *deviceCommon, config map[string]string) error {
return nil
}

// networkSRIOVParentVFInfo returns info about an SR-IOV virtual function from the parent NIC using the ip tool.
// networkSRIOVParentVFInfo returns info about an SR-IOV virtual function from the parent NIC.
func networkSRIOVParentVFInfo(vfParent string, vfID int) (ip.VirtFuncInfo, error) {
link := &ip.Link{Name: vfParent}
vfi, err := link.GetVFInfo(vfID)
Expand All @@ -717,9 +726,9 @@ func networkSRIOVSetupVF(d deviceCommon, vfParent string, vfDevice string, vfID

// Record properties of VF settings on the parent device.
volatile["last_state.vf.parent"] = vfParent
volatile["last_state.vf.hwaddr"] = vfInfo.Address
volatile["last_state.vf.hwaddr"] = vfInfo.Address.String()
volatile["last_state.vf.id"] = fmt.Sprintf("%d", vfID)
volatile["last_state.vf.vlan"] = fmt.Sprintf("%d", vfInfo.VLANs[0]["vlan"])
volatile["last_state.vf.vlan"] = fmt.Sprintf("%d", vfInfo.VLAN)
volatile["last_state.vf.spoofcheck"] = fmt.Sprintf("%t", vfInfo.SpoofCheck)

// Record the host interface we represents the VF device which we will move into instance.
Expand Down Expand Up @@ -777,7 +786,7 @@ func networkSRIOVSetupVF(d deviceCommon, vfParent string, vfDevice string, vfID
}

// Now that MAC is set on VF, we can enable spoof checking.
err = link.SetVfSpoofchk(volatile["last_state.vf.id"], "on")
err = link.SetVfSpoofchk(volatile["last_state.vf.id"], true)
if err != nil {
return vfPCIDev, 0, fmt.Errorf("Failed enabling spoof check for VF %q: %w", volatile["last_state.vf.id"], err)
}
Expand All @@ -790,7 +799,7 @@ func networkSRIOVSetupVF(d deviceCommon, vfParent string, vfDevice string, vfID

if useSpoofCheck {
// Ensure spoof checking is disabled if not enabled in instance (only for real VF).
err = link.SetVfSpoofchk(volatile["last_state.vf.id"], "off")
err = link.SetVfSpoofchk(volatile["last_state.vf.id"], false)
if err != nil {
return vfPCIDev, 0, fmt.Errorf("Failed disabling spoof check for VF %q: %w", volatile["last_state.vf.id"], err)
}
Expand Down Expand Up @@ -903,13 +912,8 @@ func networkSRIOVRestoreVF(d deviceCommon, useSpoofCheck bool, volatile map[stri
// Reset VF MAC spoofing protection if recorded. Do this first before resetting the MAC
// to avoid any issues with zero MACs refusing to be set whilst spoof check is on.
if useSpoofCheck && volatile["last_state.vf.spoofcheck"] != "" {
mode := "off"
if util.IsTrue(volatile["last_state.vf.spoofcheck"]) {
mode = "on"
}

link := &ip.Link{Name: parent}
err := link.SetVfSpoofchk(volatile["last_state.vf.id"], mode)
err := link.SetVfSpoofchk(volatile["last_state.vf.id"], util.IsTrue(volatile["last_state.vf.spoofcheck"]))
if err != nil {
return err
}
Expand Down
12 changes: 6 additions & 6 deletions internal/server/device/nic_bridged.go
Original file line number Diff line number Diff line change
Expand Up @@ -1985,22 +1985,22 @@ func (d *nicBridged) State() (*api.InstanceStateNetwork, error) {
// Get IP addresses from IP neighbour cache if present.
neighIPs, err := network.GetNeighbourIPs(d.config["parent"], hwAddr)
if err == nil {
validStates := []string{
string(ip.NeighbourIPStatePermanent),
string(ip.NeighbourIPStateNoARP),
string(ip.NeighbourIPStateReachable),
validStates := []ip.NeighbourIPState{
ip.NeighbourIPStatePermanent,
ip.NeighbourIPStateNoARP,
ip.NeighbourIPStateReachable,
}

// Add any valid-state neighbour IP entries first.
for _, neighIP := range neighIPs {
if slices.Contains(validStates, string(neighIP.State)) {
if slices.Contains(validStates, neighIP.State) {
ipStore(neighIP.Addr)
}
}

// Add any non-failed-state entries.
for _, neighIP := range neighIPs {
if neighIP.State != ip.NeighbourIPStateFailed && !slices.Contains(validStates, string(neighIP.State)) {
if neighIP.State != ip.NeighbourIPStateFailed && !slices.Contains(validStates, neighIP.State) {
ipStore(neighIP.Addr)
}
}
Expand Down
20 changes: 10 additions & 10 deletions internal/server/device/nic_ipvlan.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,13 +364,13 @@ func (d *nicIPVLAN) Start() (*deviceConfig.RunConfig, error) {

// Perform network configuration.
for _, keyPrefix := range []string{"ipv4", "ipv6"} {
var ipFamilyArg string
var ipFamily ip.Family

switch keyPrefix {
case "ipv4":
ipFamilyArg = ip.FamilyV4
ipFamily = ip.FamilyV4
case "ipv6":
ipFamilyArg = ip.FamilyV6
ipFamily = ip.FamilyV6
}

addresses := util.SplitNTrimSpace(d.config[fmt.Sprintf("%s.address", keyPrefix)], ",", -1, true)
Expand All @@ -394,7 +394,7 @@ func (d *nicIPVLAN) Start() (*deviceConfig.RunConfig, error) {
DevName: "lo",
Route: addr.String(),
Table: "main",
Family: ipFamilyArg,
Family: ipFamily,
}

err = r.Add()
Expand All @@ -411,7 +411,7 @@ func (d *nicIPVLAN) Start() (*deviceConfig.RunConfig, error) {
DevName: "lo",
Route: addr.String(),
Table: d.config[hostTableKey],
Family: ipFamilyArg,
Family: ipFamily,
}

err := r.Add()
Expand Down Expand Up @@ -538,13 +538,13 @@ func (d *nicIPVLAN) postStop() error {

// Clean up host-side network configuration.
for _, keyPrefix := range []string{"ipv4", "ipv6"} {
var ipFamilyArg string
var ipFamily ip.Family

switch keyPrefix {
case "ipv4":
ipFamilyArg = ip.FamilyV4
ipFamily = ip.FamilyV4
case "ipv6":
ipFamilyArg = ip.FamilyV6
ipFamily = ip.FamilyV6
}

addresses := util.SplitNTrimSpace(d.config[fmt.Sprintf("%s.address", keyPrefix)], ",", -1, true)
Expand All @@ -563,7 +563,7 @@ func (d *nicIPVLAN) postStop() error {
DevName: "lo",
Route: addr.String(),
Table: "main",
Family: ipFamilyArg,
Family: ipFamily,
}

err := r.Delete()
Expand All @@ -588,7 +588,7 @@ func (d *nicIPVLAN) postStop() error {
DevName: "lo",
Route: addr.String(),
Table: d.config[hostTableKey],
Family: ipFamilyArg,
Family: ipFamily,
}

err := r.Delete()
Expand Down
72 changes: 62 additions & 10 deletions internal/server/ip/addr.go
Original file line number Diff line number Diff line change
@@ -1,43 +1,95 @@
package ip

import (
"github.com/lxc/incus/v6/shared/subprocess"
"fmt"

"github.com/vishvananda/netlink"
)

// Addr represents arguments for address protocol manipulation.
type Addr struct {
DevName string
Address string
Scope string
Family string
Family Family
}

// Add adds new protocol address.
func (a *Addr) Add() error {
_, err := subprocess.RunCommand("ip", a.Family, "addr", "add", "dev", a.DevName, a.Address)
addr, err := netlink.ParseIPNet(a.Address)
if err != nil {
return err
}

scope, err := a.scopeNum()
if err != nil {
return err
}

err = netlink.AddrAdd(&netlink.GenericLink{
LinkAttrs: netlink.LinkAttrs{
Name: a.DevName,
},
}, &netlink.Addr{
IPNet: addr,
Scope: scope,
})
if err != nil {
return fmt.Errorf("failed to add address %v: %w", addr, err)
}

return nil
}

func (a *Addr) scopeNum() (int, error) {
var scope netlink.Scope
switch a.Scope {
case "global", "universe", "":
scope = netlink.SCOPE_UNIVERSE
case "site":
scope = netlink.SCOPE_SITE
case "link":
scope = netlink.SCOPE_LINK
case "host":
scope = netlink.SCOPE_HOST
case "nowhere":
scope = netlink.SCOPE_NOWHERE
default:
return 0, fmt.Errorf("unknown address scope %q", a.Scope)
}

return int(scope), nil
}

// Flush flushes protocol addresses.
func (a *Addr) Flush() error {
cmd := []string{}
if a.Family != "" {
cmd = append(cmd, a.Family)
link, err := linkByName(a.DevName)
if err != nil {
return err
}

cmd = append(cmd, "addr", "flush", "dev", a.DevName)
if a.Scope != "" {
cmd = append(cmd, "scope", a.Scope)
addrs, err := netlink.AddrList(link, int(a.Family))
if err != nil {
return fmt.Errorf("failed to get addresses for device %s: %w", a.DevName, err)
}

_, err := subprocess.RunCommand("ip", cmd...)
scope, err := a.scopeNum()
if err != nil {
return err
}

// TODO: iproute2 supposedly does some kind of batch delete in multiple rounds, which is probably more performant than deleting addresses one by one, but the iproute2 code is arcane and uncommented and I haven't yet figured out how it works

for _, addr := range addrs {
if a.Scope != "" && scope != addr.Scope {
continue
}

err := netlink.AddrDel(link, &addr)
if err != nil {
return fmt.Errorf("failed to delete address %v: %w", addr, err)
}
}

return nil
}
44 changes: 37 additions & 7 deletions internal/server/ip/class.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package ip

import (
"github.com/lxc/incus/v6/shared/subprocess"
"fmt"

"github.com/vishvananda/netlink"

"github.com/lxc/incus/v6/shared/units"
)

// Class represents qdisc class object.
Expand All @@ -19,19 +23,45 @@ type ClassHTB struct {

// Add adds class to a node.
func (class *ClassHTB) Add() error {
cmd := []string{"class", "add", "dev", class.Dev, "parent", class.Parent}
link, err := linkByName(class.Dev)
if err != nil {
return err
}

parent, err := parseHandle(class.Parent)
if err != nil {
return err
}

classAttrs := netlink.ClassAttrs{
LinkIndex: link.Attrs().Index,
Parent: parent,
Statistics: nil,
}

htbClassAttrs := netlink.HtbClassAttrs{}

if class.Classid != "" {
cmd = append(cmd, "classid", class.Classid)
handle, err := parseHandle(class.Classid)
if err != nil {
return err
}

classAttrs.Handle = handle
}

cmd = append(cmd, "htb")
if class.Rate != "" {
cmd = append(cmd, "rate", class.Rate)
rate, err := units.ParseBitSizeString(class.Rate)
if err != nil {
return fmt.Errorf("invalid rate %q: %w", class.Rate, err)
}

htbClassAttrs.Rate = uint64(rate)
}

_, err := subprocess.RunCommand("tc", cmd...)
err = netlink.ClassAdd(netlink.NewHtbClass(classAttrs, htbClassAttrs))
if err != nil {
return err
return fmt.Errorf("failed to add htb class: %w", err)
}

return nil
Expand Down
Loading