{
"ImportPath": "github.com/containernetworking/cni",
"GoVersion": "go1.6",
+ "GodepVersion": "v74",
"Packages": [
"./..."
],
},
{
"ImportPath": "github.com/vishvananda/netlink",
- "Rev": "ecf47fd5739b3d2c3daf7c89c4b9715a2605c21b"
+ "Rev": "9dee363ad4abbc3c9a4a24a9f1e33363e224b111"
},
{
"ImportPath": "github.com/vishvananda/netlink/nl",
- "Rev": "ecf47fd5739b3d2c3daf7c89c4b9715a2605c21b"
+ "Rev": "9dee363ad4abbc3c9a4a24a9f1e33363e224b111"
},
{
"ImportPath": "golang.org/x/sys/unix",
"Rev": "e11762ca30adc5b39fdbfd8c4250dabeb8e456d3"
+ },
+ {
+ "ImportPath": "github.com/vishvananda/netns",
+ "Rev": "8ba1072b58e0c2a240eb5f6120165c7776c3e7b8"
}
]
}
language: go
+before_script:
+ # make sure we keep path in tact when we sudo
+ - sudo sed -i -e 's/^Defaults\tsecure_path.*$//' /etc/sudoers
+ # modprobe ip_gre or else the first gre device can't be deleted
+ - sudo modprobe ip_gre
install:
- - go get github.com/vishvananda/netns
+ - go get github.com/vishvananda/netns
unroot = $(subst ../../../,,$(1))
fmt = $(addprefix fmt-,$(1))
-all: fmt
+all: test
$(call goroot,$(DEPS)):
go get $(call unroot,$@)
.PHONY: $(call testdirs,$(DIRS))
$(call testdirs,$(DIRS)):
- sudo -E go test -v github.com/vishvananda/netlink/$@
+ sudo -E go test -test.parallel 4 -timeout 60s -v github.com/vishvananda/netlink/$@
$(call fmt,$(call testdirs,$(DIRS))):
! gofmt -l $(subst fmt-,,$@)/*.go | grep ''
and routes, and configure ipsec. Netlink communication requires elevated
privileges, so in most cases this code needs to be run as root. Since
low-level netlink messages are inscrutable at best, the library attempts
-to provide an api that is loosely modeled on the CLI provied by iproute2.
+to provide an api that is loosely modeled on the CLI provided by iproute2.
Actions like `ip link add` will be accomplished via a similarly named
function like AddLink(). This library began its life as a fork of the
netlink functionality in
type Addr struct {
*net.IPNet
Label string
+ Flags int
+ Scope int
}
// String returns $ip/$netmask $label
func (a Addr) String() string {
- return fmt.Sprintf("%s %s", a.IPNet, a.Label)
+ return strings.TrimSpace(fmt.Sprintf("%s %s", a.IPNet, a.Label))
}
// ParseAddr parses the string representation of an address in the
import (
"fmt"
+ "log"
"net"
"strings"
"syscall"
"github.com/vishvananda/netlink/nl"
+ "github.com/vishvananda/netns"
)
+// IFA_FLAGS is a u32 attribute.
+const IFA_FLAGS = 0x8
+
// AddrAdd will add an IP address to a link device.
// Equivalent to: `ip addr add $addr dev $link`
func AddrAdd(link Link, addr *Addr) error {
+ return pkgHandle.AddrAdd(link, addr)
+}
- req := nl.NewNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
- return addrHandle(link, addr, req)
+// AddrAdd will add an IP address to a link device.
+// Equivalent to: `ip addr add $addr dev $link`
+func (h *Handle) AddrAdd(link Link, addr *Addr) error {
+ req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ return h.addrHandle(link, addr, req)
}
// AddrDel will delete an IP address from a link device.
// Equivalent to: `ip addr del $addr dev $link`
func AddrDel(link Link, addr *Addr) error {
- req := nl.NewNetlinkRequest(syscall.RTM_DELADDR, syscall.NLM_F_ACK)
- return addrHandle(link, addr, req)
+ return pkgHandle.AddrDel(link, addr)
+}
+
+// AddrDel will delete an IP address from a link device.
+// Equivalent to: `ip addr del $addr dev $link`
+func (h *Handle) AddrDel(link Link, addr *Addr) error {
+ req := h.newNetlinkRequest(syscall.RTM_DELADDR, syscall.NLM_F_ACK)
+ return h.addrHandle(link, addr, req)
}
-func addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error {
+func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error {
base := link.Attrs()
if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) {
return fmt.Errorf("label must begin with interface name")
}
- ensureIndex(base)
+ h.ensureIndex(base)
family := nl.GetIPFamily(addr.IP)
msg := nl.NewIfAddrmsg(family)
msg.Index = uint32(base.Index)
+ msg.Scope = uint8(addr.Scope)
prefixlen, _ := addr.Mask.Size()
msg.Prefixlen = uint8(prefixlen)
req.AddData(msg)
addressData := nl.NewRtAttr(syscall.IFA_ADDRESS, addrData)
req.AddData(addressData)
+ if addr.Flags != 0 {
+ if addr.Flags <= 0xff {
+ msg.IfAddrmsg.Flags = uint8(addr.Flags)
+ } else {
+ b := make([]byte, 4)
+ native.PutUint32(b, uint32(addr.Flags))
+ flagsData := nl.NewRtAttr(IFA_FLAGS, b)
+ req.AddData(flagsData)
+ }
+ }
+
if addr.Label != "" {
labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label))
req.AddData(labelData)
// Equivalent to: `ip addr show`.
// The list can be filtered by link and ip family.
func AddrList(link Link, family int) ([]Addr, error) {
- req := nl.NewNetlinkRequest(syscall.RTM_GETADDR, syscall.NLM_F_DUMP)
+ return pkgHandle.AddrList(link, family)
+}
+
+// AddrList gets a list of IP addresses in the system.
+// Equivalent to: `ip addr show`.
+// The list can be filtered by link and ip family.
+func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETADDR, syscall.NLM_F_DUMP)
msg := nl.NewIfInfomsg(family)
req.AddData(msg)
return nil, err
}
- index := 0
+ indexFilter := 0
if link != nil {
base := link.Attrs()
- ensureIndex(base)
- index = base.Index
+ h.ensureIndex(base)
+ indexFilter = base.Index
}
var res []Addr
for _, m := range msgs {
- msg := nl.DeserializeIfAddrmsg(m)
+ addr, msgFamily, ifindex, err := parseAddr(m)
+ if err != nil {
+ return res, err
+ }
- if link != nil && msg.Index != uint32(index) {
+ if link != nil && ifindex != indexFilter {
// Ignore messages from other interfaces
continue
}
- attrs, err := nl.ParseRouteAttr(m[msg.Len():])
- if err != nil {
- return nil, err
+ if family != FAMILY_ALL && msgFamily != family {
+ continue
}
- var local, dst *net.IPNet
- var addr Addr
- for _, attr := range attrs {
- switch attr.Attr.Type {
- case syscall.IFA_ADDRESS:
- dst = &net.IPNet{
- IP: attr.Value,
- Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
- }
- case syscall.IFA_LOCAL:
- local = &net.IPNet{
- IP: attr.Value,
- Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
- }
- case syscall.IFA_LABEL:
- addr.Label = string(attr.Value[:len(attr.Value)-1])
+ res = append(res, addr)
+ }
+
+ return res, nil
+}
+
+func parseAddr(m []byte) (addr Addr, family, index int, err error) {
+ msg := nl.DeserializeIfAddrmsg(m)
+
+ family = -1
+ index = -1
+
+ attrs, err1 := nl.ParseRouteAttr(m[msg.Len():])
+ if err1 != nil {
+ err = err1
+ return
+ }
+
+ family = int(msg.Family)
+ index = int(msg.Index)
+
+ var local, dst *net.IPNet
+ for _, attr := range attrs {
+ switch attr.Attr.Type {
+ case syscall.IFA_ADDRESS:
+ dst = &net.IPNet{
+ IP: attr.Value,
+ Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
}
+ case syscall.IFA_LOCAL:
+ local = &net.IPNet{
+ IP: attr.Value,
+ Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
+ }
+ case syscall.IFA_LABEL:
+ addr.Label = string(attr.Value[:len(attr.Value)-1])
+ case IFA_FLAGS:
+ addr.Flags = int(native.Uint32(attr.Value[0:4]))
}
+ }
- // IFA_LOCAL should be there but if not, fall back to IFA_ADDRESS
- if local != nil {
- addr.IPNet = local
- } else {
- addr.IPNet = dst
- }
+ // IFA_LOCAL should be there but if not, fall back to IFA_ADDRESS
+ if local != nil {
+ addr.IPNet = local
+ } else {
+ addr.IPNet = dst
+ }
+ addr.Scope = int(msg.Scope)
- res = append(res, addr)
+ return
+}
+
+type AddrUpdate struct {
+ LinkAddress net.IPNet
+ LinkIndex int
+ NewAddr bool // true=added false=deleted
+}
+
+// AddrSubscribe takes a chan down which notifications will be sent
+// when addresses change. Close the 'done' chan to stop subscription.
+func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error {
+ return addrSubscribe(netns.None(), netns.None(), ch, done)
+}
+
+// AddrSubscribeAt works like AddrSubscribe plus it allows the caller
+// to choose the network namespace in which to subscribe (ns).
+func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
+ return addrSubscribe(ns, netns.None(), ch, done)
+}
+
+func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
+ s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR)
+ if err != nil {
+ return err
+ }
+ if done != nil {
+ go func() {
+ <-done
+ s.Close()
+ }()
}
+ go func() {
+ defer close(ch)
+ for {
+ msgs, err := s.Receive()
+ if err != nil {
+ log.Printf("netlink.AddrSubscribe: Receive() error: %v", err)
+ return
+ }
+ for _, m := range msgs {
+ msgType := m.Header.Type
+ if msgType != syscall.RTM_NEWADDR && msgType != syscall.RTM_DELADDR {
+ log.Printf("netlink.AddrSubscribe: bad message type: %d", msgType)
+ continue
+ }
- return res, nil
+ addr, _, ifindex, err := parseAddr(m.Data)
+ if err != nil {
+ log.Printf("netlink.AddrSubscribe: could not parse address: %v", err)
+ continue
+ }
+
+ ch <- AddrUpdate{LinkAddress: *addr.IPNet, LinkIndex: ifindex, NewAddr: msgType == syscall.RTM_NEWADDR}
+ }
+ }
+ }()
+
+ return nil
}
--- /dev/null
+package netlink
+
+/*
+#include <asm/types.h>
+#include <asm/unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+
+static int load_simple_bpf(int prog_type) {
+#ifdef __NR_bpf
+ // { return 1; }
+ __u64 __attribute__((aligned(8))) insns[] = {
+ 0x00000001000000b7ull,
+ 0x0000000000000095ull,
+ };
+ __u8 __attribute__((aligned(8))) license[] = "ASL2";
+ // Copied from a header file since libc is notoriously slow to update.
+ // The call will succeed or fail and that will be our indication on
+ // whether or not it is supported.
+ struct {
+ __u32 prog_type;
+ __u32 insn_cnt;
+ __u64 insns;
+ __u64 license;
+ __u32 log_level;
+ __u32 log_size;
+ __u64 log_buf;
+ __u32 kern_version;
+ } __attribute__((aligned(8))) attr = {
+ .prog_type = prog_type,
+ .insn_cnt = 2,
+ .insns = (uintptr_t)&insns,
+ .license = (uintptr_t)&license,
+ };
+ return syscall(__NR_bpf, 5, &attr, sizeof(attr));
+#else
+ errno = EINVAL;
+ return -1;
+#endif
+}
+*/
+import "C"
+
+type BpfProgType C.int
+
+const (
+ BPF_PROG_TYPE_UNSPEC BpfProgType = iota
+ BPF_PROG_TYPE_SOCKET_FILTER
+ BPF_PROG_TYPE_KPROBE
+ BPF_PROG_TYPE_SCHED_CLS
+ BPF_PROG_TYPE_SCHED_ACT
+)
+
+// loadSimpleBpf loads a trivial bpf program for testing purposes
+func loadSimpleBpf(progType BpfProgType) (int, error) {
+ fd, err := C.load_simple_bpf(C.int(progType))
+ return int(fd), err
+}
Type() string
}
-// Class represents a netlink class. A filter is associated with a link,
+// ClassAttrs represents a netlink class. A filter is associated with a link,
// has a handle and a parent. The root filter of a device should have a
// parent == HANDLE_ROOT.
type ClassAttrs struct {
}
func (q ClassAttrs) String() string {
- return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
+ return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
}
type HtbClassAttrs struct {
return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
}
-// Htb class
+// HtbClass represents an Htb class
type HtbClass struct {
ClassAttrs
Rate uint64
Prio uint32
}
-func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
- mtu := 1600
- rate := cattrs.Rate / 8
- ceil := cattrs.Ceil / 8
- buffer := cattrs.Buffer
- cbuffer := cattrs.Cbuffer
- if ceil == 0 {
- ceil = rate
- }
-
- if buffer == 0 {
- buffer = uint32(float64(rate)/Hz() + float64(mtu))
- }
- buffer = uint32(Xmittime(rate, buffer))
-
- if cbuffer == 0 {
- cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
- }
- cbuffer = uint32(Xmittime(ceil, cbuffer))
-
- return &HtbClass{
- ClassAttrs: attrs,
- Rate: rate,
- Ceil: ceil,
- Buffer: buffer,
- Cbuffer: cbuffer,
- Quantum: 10,
- Level: 0,
- Prio: 0,
- }
-}
-
func (q HtbClass) String() string {
return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
}
-func (class *HtbClass) Attrs() *ClassAttrs {
- return &class.ClassAttrs
+func (q *HtbClass) Attrs() *ClassAttrs {
+ return &q.ClassAttrs
}
-func (class *HtbClass) Type() string {
+func (q *HtbClass) Type() string {
return "htb"
}
package netlink
import (
+ "errors"
"syscall"
"github.com/vishvananda/netlink/nl"
)
+// NOTE: function is in here because it uses other linux functions
+func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
+ mtu := 1600
+ rate := cattrs.Rate / 8
+ ceil := cattrs.Ceil / 8
+ buffer := cattrs.Buffer
+ cbuffer := cattrs.Cbuffer
+
+ if ceil == 0 {
+ ceil = rate
+ }
+
+ if buffer == 0 {
+ buffer = uint32(float64(rate)/Hz() + float64(mtu))
+ }
+ buffer = uint32(Xmittime(rate, buffer))
+
+ if cbuffer == 0 {
+ cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
+ }
+ cbuffer = uint32(Xmittime(ceil, cbuffer))
+
+ return &HtbClass{
+ ClassAttrs: attrs,
+ Rate: rate,
+ Ceil: ceil,
+ Buffer: buffer,
+ Cbuffer: cbuffer,
+ Quantum: 10,
+ Level: 0,
+ Prio: 0,
+ }
+}
+
// ClassDel will delete a class from the system.
// Equivalent to: `tc class del $class`
func ClassDel(class Class) error {
- req := nl.NewNetlinkRequest(syscall.RTM_DELTCLASS, syscall.NLM_F_ACK)
- base := class.Attrs()
- msg := &nl.TcMsg{
- Family: nl.FAMILY_ALL,
- Ifindex: int32(base.LinkIndex),
- Handle: base.Handle,
- Parent: base.Parent,
- }
- req.AddData(msg)
+ return pkgHandle.ClassDel(class)
+}
- _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
- return err
+// ClassDel will delete a class from the system.
+// Equivalent to: `tc class del $class`
+func (h *Handle) ClassDel(class Class) error {
+ return h.classModify(syscall.RTM_DELTCLASS, 0, class)
+}
+
+// ClassChange will change a class in place
+// Equivalent to: `tc class change $class`
+// The parent and handle MUST NOT be changed.
+func ClassChange(class Class) error {
+ return pkgHandle.ClassChange(class)
+}
+
+// ClassChange will change a class in place
+// Equivalent to: `tc class change $class`
+// The parent and handle MUST NOT be changed.
+func (h *Handle) ClassChange(class Class) error {
+ return h.classModify(syscall.RTM_NEWTCLASS, 0, class)
+}
+
+// ClassReplace will replace a class to the system.
+// quivalent to: `tc class replace $class`
+// The handle MAY be changed.
+// If a class already exist with this parent/handle pair, the class is changed.
+// If a class does not already exist with this parent/handle, a new class is created.
+func ClassReplace(class Class) error {
+ return pkgHandle.ClassReplace(class)
+}
+
+// ClassReplace will replace a class to the system.
+// quivalent to: `tc class replace $class`
+// The handle MAY be changed.
+// If a class already exist with this parent/handle pair, the class is changed.
+// If a class does not already exist with this parent/handle, a new class is created.
+func (h *Handle) ClassReplace(class Class) error {
+ return h.classModify(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE, class)
}
// ClassAdd will add a class to the system.
// Equivalent to: `tc class add $class`
func ClassAdd(class Class) error {
- req := nl.NewNetlinkRequest(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ return pkgHandle.ClassAdd(class)
+}
+
+// ClassAdd will add a class to the system.
+// Equivalent to: `tc class add $class`
+func (h *Handle) ClassAdd(class Class) error {
+ return h.classModify(
+ syscall.RTM_NEWTCLASS,
+ syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
+ class,
+ )
+}
+
+func (h *Handle) classModify(cmd, flags int, class Class) error {
+ req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
base := class.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: base.Parent,
}
req.AddData(msg)
+
+ if cmd != syscall.RTM_DELTCLASS {
+ if err := classPayload(req, class); err != nil {
+ return err
+ }
+ }
+ _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+ return err
+}
+
+func classPayload(req *nl.NetlinkRequest, class Class) error {
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if htb, ok := class.(*HtbClass); ok {
opt := nl.TcHtbCopt{}
- opt.Rate.Rate = uint32(htb.Rate)
- opt.Ceil.Rate = uint32(htb.Ceil)
opt.Buffer = htb.Buffer
opt.Cbuffer = htb.Cbuffer
opt.Quantum = htb.Quantum
opt.Level = htb.Level
opt.Prio = htb.Prio
// TODO: Handle Debug properly. For now default to 0
+ /* Calculate {R,C}Tab and set Rate and Ceil */
+ cellLog := -1
+ ccellLog := -1
+ linklayer := nl.LINKLAYER_ETHERNET
+ mtu := 1600
+ var rtab [256]uint32
+ var ctab [256]uint32
+ tcrate := nl.TcRateSpec{Rate: uint32(htb.Rate)}
+ if CalcRtable(&tcrate, rtab, cellLog, uint32(mtu), linklayer) < 0 {
+ return errors.New("HTB: failed to calculate rate table")
+ }
+ opt.Rate = tcrate
+ tcceil := nl.TcRateSpec{Rate: uint32(htb.Ceil)}
+ if CalcRtable(&tcceil, ctab, ccellLog, uint32(mtu), linklayer) < 0 {
+ return errors.New("HTB: failed to calculate ceil rate table")
+ }
+ opt.Ceil = tcceil
nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
+ nl.NewRtAttrChild(options, nl.TCA_HTB_RTAB, SerializeRtab(rtab))
+ nl.NewRtAttrChild(options, nl.TCA_HTB_CTAB, SerializeRtab(ctab))
}
req.AddData(options)
- _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
- return err
+ return nil
}
// ClassList gets a list of classes in the system.
// Equivalent to: `tc class show`.
-// Generally retunrs nothing if link and parent are not specified.
+// Generally returns nothing if link and parent are not specified.
func ClassList(link Link, parent uint32) ([]Class, error) {
- req := nl.NewNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
+ return pkgHandle.ClassList(link, parent)
+}
+
+// ClassList gets a list of classes in the system.
+// Equivalent to: `tc class show`.
+// Generally returns nothing if link and parent are not specified.
+func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
}
if link != nil {
base := link.Attrs()
- ensureIndex(base)
+ h.ensureIndex(base)
msg.Ifindex = int32(base.Index)
}
req.AddData(msg)
package netlink
-import (
- "fmt"
-)
+import "fmt"
type Filter interface {
Attrs() *FilterAttrs
Type() string
}
-// Filter represents a netlink filter. A filter is associated with a link,
+// FilterAttrs represents a netlink filter. A filter is associated with a link,
// has a handle and a parent. The root filter of a device should have a
// parent == HANDLE_ROOT.
type FilterAttrs struct {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol)
}
+type TcAct int32
+
+const (
+ TC_ACT_UNSPEC TcAct = -1
+ TC_ACT_OK TcAct = 0
+ TC_ACT_RECLASSIFY TcAct = 1
+ TC_ACT_SHOT TcAct = 2
+ TC_ACT_PIPE TcAct = 3
+ TC_ACT_STOLEN TcAct = 4
+ TC_ACT_QUEUED TcAct = 5
+ TC_ACT_REPEAT TcAct = 6
+ TC_ACT_REDIRECT TcAct = 7
+ TC_ACT_JUMP TcAct = 0x10000000
+)
+
+func (a TcAct) String() string {
+ switch a {
+ case TC_ACT_UNSPEC:
+ return "unspec"
+ case TC_ACT_OK:
+ return "ok"
+ case TC_ACT_RECLASSIFY:
+ return "reclassify"
+ case TC_ACT_SHOT:
+ return "shot"
+ case TC_ACT_PIPE:
+ return "pipe"
+ case TC_ACT_STOLEN:
+ return "stolen"
+ case TC_ACT_QUEUED:
+ return "queued"
+ case TC_ACT_REPEAT:
+ return "repeat"
+ case TC_ACT_REDIRECT:
+ return "redirect"
+ case TC_ACT_JUMP:
+ return "jump"
+ }
+ return fmt.Sprintf("0x%x", a)
+}
+
+type TcPolAct int32
+
+const (
+ TC_POLICE_UNSPEC TcPolAct = TcPolAct(TC_ACT_UNSPEC)
+ TC_POLICE_OK TcPolAct = TcPolAct(TC_ACT_OK)
+ TC_POLICE_RECLASSIFY TcPolAct = TcPolAct(TC_ACT_RECLASSIFY)
+ TC_POLICE_SHOT TcPolAct = TcPolAct(TC_ACT_SHOT)
+ TC_POLICE_PIPE TcPolAct = TcPolAct(TC_ACT_PIPE)
+)
+
+func (a TcPolAct) String() string {
+ switch a {
+ case TC_POLICE_UNSPEC:
+ return "unspec"
+ case TC_POLICE_OK:
+ return "ok"
+ case TC_POLICE_RECLASSIFY:
+ return "reclassify"
+ case TC_POLICE_SHOT:
+ return "shot"
+ case TC_POLICE_PIPE:
+ return "pipe"
+ }
+ return fmt.Sprintf("0x%x", a)
+}
+
+type ActionAttrs struct {
+ Index int
+ Capab int
+ Action TcAct
+ Refcnt int
+ Bindcnt int
+}
+
+func (q ActionAttrs) String() string {
+ return fmt.Sprintf("{Index: %d, Capab: %x, Action: %s, Refcnt: %d, Bindcnt: %d}", q.Index, q.Capab, q.Action.String(), q.Refcnt, q.Bindcnt)
+}
+
+// Action represents an action in any supported filter.
+type Action interface {
+ Attrs() *ActionAttrs
+ Type() string
+}
+
+type GenericAction struct {
+ ActionAttrs
+}
+
+func (action *GenericAction) Type() string {
+ return "generic"
+}
+
+func (action *GenericAction) Attrs() *ActionAttrs {
+ return &action.ActionAttrs
+}
+
+type BpfAction struct {
+ ActionAttrs
+ Fd int
+ Name string
+}
+
+func (action *BpfAction) Type() string {
+ return "bpf"
+}
+
+func (action *BpfAction) Attrs() *ActionAttrs {
+ return &action.ActionAttrs
+}
+
+type MirredAct uint8
+
+func (a MirredAct) String() string {
+ switch a {
+ case TCA_EGRESS_REDIR:
+ return "egress redir"
+ case TCA_EGRESS_MIRROR:
+ return "egress mirror"
+ case TCA_INGRESS_REDIR:
+ return "ingress redir"
+ case TCA_INGRESS_MIRROR:
+ return "ingress mirror"
+ }
+ return "unknown"
+}
+
+const (
+ TCA_EGRESS_REDIR MirredAct = 1 /* packet redirect to EGRESS*/
+ TCA_EGRESS_MIRROR MirredAct = 2 /* mirror packet to EGRESS */
+ TCA_INGRESS_REDIR MirredAct = 3 /* packet redirect to INGRESS*/
+ TCA_INGRESS_MIRROR MirredAct = 4 /* mirror packet to INGRESS */
+)
+
+type MirredAction struct {
+ ActionAttrs
+ MirredAction MirredAct
+ Ifindex int
+}
+
+func (action *MirredAction) Type() string {
+ return "mirred"
+}
+
+func (action *MirredAction) Attrs() *ActionAttrs {
+ return &action.ActionAttrs
+}
+
+func NewMirredAction(redirIndex int) *MirredAction {
+ return &MirredAction{
+ ActionAttrs: ActionAttrs{
+ Action: TC_ACT_STOLEN,
+ },
+ MirredAction: TCA_EGRESS_REDIR,
+ Ifindex: redirIndex,
+ }
+}
+
// U32 filters on many packet related properties
type U32 struct {
FilterAttrs
- // Currently only supports redirecting to another interface
+ ClassId uint32
RedirIndex int
+ Actions []Action
}
func (filter *U32) Attrs() *FilterAttrs {
return "u32"
}
+type FilterFwAttrs struct {
+ ClassId uint32
+ InDev string
+ Mask uint32
+ Index uint32
+ Buffer uint32
+ Mtu uint32
+ Mpu uint16
+ Rate uint32
+ AvRate uint32
+ PeakRate uint32
+ Action TcPolAct
+ Overhead uint16
+ LinkLayer int
+}
+
+type BpfFilter struct {
+ FilterAttrs
+ ClassId uint32
+ Fd int
+ Name string
+ DirectAction bool
+}
+
+func (filter *BpfFilter) Type() string {
+ return "bpf"
+}
+
+func (filter *BpfFilter) Attrs() *FilterAttrs {
+ return &filter.FilterAttrs
+}
+
// GenericFilter filters represent types that are not currently understood
// by this netlink library.
type GenericFilter struct {
package netlink
import (
+ "bytes"
+ "encoding/binary"
+ "errors"
"fmt"
"syscall"
"github.com/vishvananda/netlink/nl"
)
+// Fw filter filters on firewall marks
+// NOTE: this is in filter_linux because it refers to nl.TcPolice which
+// is defined in nl/tc_linux.go
+type Fw struct {
+ FilterAttrs
+ ClassId uint32
+ // TODO remove nl type from interface
+ Police nl.TcPolice
+ InDev string
+ // TODO Action
+ Mask uint32
+ AvRate uint32
+ Rtab [256]uint32
+ Ptab [256]uint32
+}
+
+func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
+ var rtab [256]uint32
+ var ptab [256]uint32
+ rcellLog := -1
+ pcellLog := -1
+ avrate := fattrs.AvRate / 8
+ police := nl.TcPolice{}
+ police.Rate.Rate = fattrs.Rate / 8
+ police.PeakRate.Rate = fattrs.PeakRate / 8
+ buffer := fattrs.Buffer
+ linklayer := nl.LINKLAYER_ETHERNET
+
+ if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
+ linklayer = fattrs.LinkLayer
+ }
+
+ police.Action = int32(fattrs.Action)
+ if police.Rate.Rate != 0 {
+ police.Rate.Mpu = fattrs.Mpu
+ police.Rate.Overhead = fattrs.Overhead
+ if CalcRtable(&police.Rate, rtab, rcellLog, fattrs.Mtu, linklayer) < 0 {
+ return nil, errors.New("TBF: failed to calculate rate table")
+ }
+ police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
+ }
+ police.Mtu = fattrs.Mtu
+ if police.PeakRate.Rate != 0 {
+ police.PeakRate.Mpu = fattrs.Mpu
+ police.PeakRate.Overhead = fattrs.Overhead
+ if CalcRtable(&police.PeakRate, ptab, pcellLog, fattrs.Mtu, linklayer) < 0 {
+ return nil, errors.New("POLICE: failed to calculate peak rate table")
+ }
+ }
+
+ return &Fw{
+ FilterAttrs: attrs,
+ ClassId: fattrs.ClassId,
+ InDev: fattrs.InDev,
+ Mask: fattrs.Mask,
+ Police: police,
+ AvRate: avrate,
+ Rtab: rtab,
+ Ptab: ptab,
+ }, nil
+}
+
+func (filter *Fw) Attrs() *FilterAttrs {
+ return &filter.FilterAttrs
+}
+
+func (filter *Fw) Type() string {
+ return "fw"
+}
+
// FilterDel will delete a filter from the system.
// Equivalent to: `tc filter del $filter`
func FilterDel(filter Filter) error {
- req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
+ return pkgHandle.FilterDel(filter)
+}
+
+// FilterDel will delete a filter from the system.
+// Equivalent to: `tc filter del $filter`
+func (h *Handle) FilterDel(filter Filter) error {
+ req := h.newNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
// FilterAdd will add a filter to the system.
// Equivalent to: `tc filter add $filter`
func FilterAdd(filter Filter) error {
- req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ return pkgHandle.FilterAdd(filter)
+}
+
+// FilterAdd will add a filter to the system.
+// Equivalent to: `tc filter add $filter`
+func (h *Handle) FilterAdd(filter Filter) error {
+ native = nl.NativeEndian()
+ req := h.newNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
}
sel.Keys = append(sel.Keys, nl.TcU32Key{})
nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
- actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
- table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil)
- nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred"))
- // redirect to other interface
- mir := nl.TcMirred{
- Action: nl.TC_ACT_STOLEN,
- Eaction: nl.TCA_EGRESS_REDIR,
- Ifindex: uint32(u32.RedirIndex),
- }
- aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil)
- nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize())
+ if u32.ClassId != 0 {
+ nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(u32.ClassId))
+ }
+ actionsAttr := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
+ // backwards compatibility
+ if u32.RedirIndex != 0 {
+ u32.Actions = append([]Action{NewMirredAction(u32.RedirIndex)}, u32.Actions...)
+ }
+ if err := EncodeActions(actionsAttr, u32.Actions); err != nil {
+ return err
+ }
+ } else if fw, ok := filter.(*Fw); ok {
+ if fw.Mask != 0 {
+ b := make([]byte, 4)
+ native.PutUint32(b, fw.Mask)
+ nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
+ }
+ if fw.InDev != "" {
+ nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev))
+ }
+ if (fw.Police != nl.TcPolice{}) {
+
+ police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
+ nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize())
+ if (fw.Police.Rate != nl.TcRateSpec{}) {
+ payload := SerializeRtab(fw.Rtab)
+ nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
+ }
+ if (fw.Police.PeakRate != nl.TcRateSpec{}) {
+ payload := SerializeRtab(fw.Ptab)
+ nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
+ }
+ }
+ if fw.ClassId != 0 {
+ b := make([]byte, 4)
+ native.PutUint32(b, fw.ClassId)
+ nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
+ }
+ } else if bpf, ok := filter.(*BpfFilter); ok {
+ var bpfFlags uint32
+ if bpf.ClassId != 0 {
+ nl.NewRtAttrChild(options, nl.TCA_BPF_CLASSID, nl.Uint32Attr(bpf.ClassId))
+ }
+ if bpf.Fd >= 0 {
+ nl.NewRtAttrChild(options, nl.TCA_BPF_FD, nl.Uint32Attr((uint32(bpf.Fd))))
+ }
+ if bpf.Name != "" {
+ nl.NewRtAttrChild(options, nl.TCA_BPF_NAME, nl.ZeroTerminated(bpf.Name))
+ }
+ if bpf.DirectAction {
+ bpfFlags |= nl.TCA_BPF_FLAG_ACT_DIRECT
+ }
+ nl.NewRtAttrChild(options, nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags))
}
+
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
// FilterList gets a list of filters in the system.
// Equivalent to: `tc filter show`.
-// Generally retunrs nothing if link and parent are not specified.
+// Generally returns nothing if link and parent are not specified.
func FilterList(link Link, parent uint32) ([]Filter, error) {
- req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
+ return pkgHandle.FilterList(link, parent)
+}
+
+// FilterList gets a list of filters in the system.
+// Equivalent to: `tc filter show`.
+// Generally returns nothing if link and parent are not specified.
+func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
}
if link != nil {
base := link.Attrs()
- ensureIndex(base)
+ h.ensureIndex(base)
msg.Ifindex = int32(base.Index)
}
req.AddData(msg)
switch filterType {
case "u32":
filter = &U32{}
+ case "fw":
+ filter = &Fw{}
+ case "bpf":
+ filter = &BpfFilter{}
default:
filter = &GenericFilter{FilterType: filterType}
}
case nl.TCA_OPTIONS:
+ data, err := nl.ParseRouteAttr(attr.Value)
+ if err != nil {
+ return nil, err
+ }
switch filterType {
case "u32":
- data, err := nl.ParseRouteAttr(attr.Value)
+ detailed, err = parseU32Data(filter, data)
if err != nil {
return nil, err
}
- detailed, err = parseU32Data(filter, data)
+ case "fw":
+ detailed, err = parseFwData(filter, data)
+ if err != nil {
+ return nil, err
+ }
+ case "bpf":
+ detailed, err = parseBpfData(filter, data)
if err != nil {
return nil, err
}
+ default:
+ detailed = true
}
}
}
return res, nil
}
+func toTcGen(attrs *ActionAttrs, tcgen *nl.TcGen) {
+ tcgen.Index = uint32(attrs.Index)
+ tcgen.Capab = uint32(attrs.Capab)
+ tcgen.Action = int32(attrs.Action)
+ tcgen.Refcnt = int32(attrs.Refcnt)
+ tcgen.Bindcnt = int32(attrs.Bindcnt)
+}
+
+func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) {
+ attrs.Index = int(tcgen.Index)
+ attrs.Capab = int(tcgen.Capab)
+ attrs.Action = TcAct(tcgen.Action)
+ attrs.Refcnt = int(tcgen.Refcnt)
+ attrs.Bindcnt = int(tcgen.Bindcnt)
+}
+
+func EncodeActions(attr *nl.RtAttr, actions []Action) error {
+ tabIndex := int(nl.TCA_ACT_TAB)
+
+ for _, action := range actions {
+ switch action := action.(type) {
+ default:
+ return fmt.Errorf("unknown action type %s", action.Type())
+ case *MirredAction:
+ table := nl.NewRtAttrChild(attr, tabIndex, nil)
+ tabIndex++
+ nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("mirred"))
+ aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
+ mirred := nl.TcMirred{
+ Eaction: int32(action.MirredAction),
+ Ifindex: uint32(action.Ifindex),
+ }
+ toTcGen(action.Attrs(), &mirred.TcGen)
+ nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mirred.Serialize())
+ case *BpfAction:
+ table := nl.NewRtAttrChild(attr, tabIndex, nil)
+ tabIndex++
+ nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("bpf"))
+ aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
+ gen := nl.TcGen{}
+ toTcGen(action.Attrs(), &gen)
+ nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_PARMS, gen.Serialize())
+ nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_FD, nl.Uint32Attr(uint32(action.Fd)))
+ nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_NAME, nl.ZeroTerminated(action.Name))
+ case *GenericAction:
+ table := nl.NewRtAttrChild(attr, tabIndex, nil)
+ tabIndex++
+ nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("gact"))
+ aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
+ gen := nl.TcGen{}
+ toTcGen(action.Attrs(), &gen)
+ nl.NewRtAttrChild(aopts, nl.TCA_GACT_PARMS, gen.Serialize())
+ }
+ }
+ return nil
+}
+
+func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
+ var actions []Action
+ for _, table := range tables {
+ var action Action
+ var actionType string
+ aattrs, err := nl.ParseRouteAttr(table.Value)
+ if err != nil {
+ return nil, err
+ }
+ nextattr:
+ for _, aattr := range aattrs {
+ switch aattr.Attr.Type {
+ case nl.TCA_KIND:
+ actionType = string(aattr.Value[:len(aattr.Value)-1])
+ // only parse if the action is mirred or bpf
+ switch actionType {
+ case "mirred":
+ action = &MirredAction{}
+ case "bpf":
+ action = &BpfAction{}
+ case "gact":
+ action = &GenericAction{}
+ default:
+ break nextattr
+ }
+ case nl.TCA_OPTIONS:
+ adata, err := nl.ParseRouteAttr(aattr.Value)
+ if err != nil {
+ return nil, err
+ }
+ for _, adatum := range adata {
+ switch actionType {
+ case "mirred":
+ switch adatum.Attr.Type {
+ case nl.TCA_MIRRED_PARMS:
+ mirred := *nl.DeserializeTcMirred(adatum.Value)
+ toAttrs(&mirred.TcGen, action.Attrs())
+ action.(*MirredAction).ActionAttrs = ActionAttrs{}
+ action.(*MirredAction).Ifindex = int(mirred.Ifindex)
+ action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction)
+ }
+ case "bpf":
+ switch adatum.Attr.Type {
+ case nl.TCA_ACT_BPF_PARMS:
+ gen := *nl.DeserializeTcGen(adatum.Value)
+ toAttrs(&gen, action.Attrs())
+ case nl.TCA_ACT_BPF_FD:
+ action.(*BpfAction).Fd = int(native.Uint32(adatum.Value[0:4]))
+ case nl.TCA_ACT_BPF_NAME:
+ action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1])
+ }
+ case "gact":
+ switch adatum.Attr.Type {
+ case nl.TCA_GACT_PARMS:
+ gen := *nl.DeserializeTcGen(adatum.Value)
+ toAttrs(&gen, action.Attrs())
+ }
+ }
+ }
+ }
+ }
+ actions = append(actions, action)
+ }
+ return actions, nil
+}
+
func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
u32 := filter.(*U32)
return detailed, nil
}
case nl.TCA_U32_ACT:
- table, err := nl.ParseRouteAttr(datum.Value)
+ tables, err := nl.ParseRouteAttr(datum.Value)
+ if err != nil {
+ return detailed, err
+ }
+ u32.Actions, err = parseActions(tables)
if err != nil {
return detailed, err
}
- if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB {
- return detailed, fmt.Errorf("Action table not formed properly")
+ for _, action := range u32.Actions {
+ if action, ok := action.(*MirredAction); ok {
+ u32.RedirIndex = int(action.Ifindex)
+ }
}
- aattrs, err := nl.ParseRouteAttr(table[0].Value)
- for _, aattr := range aattrs {
+ }
+ }
+ return detailed, nil
+}
+
+func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
+ native = nl.NativeEndian()
+ fw := filter.(*Fw)
+ detailed := true
+ for _, datum := range data {
+ switch datum.Attr.Type {
+ case nl.TCA_FW_MASK:
+ fw.Mask = native.Uint32(datum.Value[0:4])
+ case nl.TCA_FW_CLASSID:
+ fw.ClassId = native.Uint32(datum.Value[0:4])
+ case nl.TCA_FW_INDEV:
+ fw.InDev = string(datum.Value[:len(datum.Value)-1])
+ case nl.TCA_FW_POLICE:
+ adata, _ := nl.ParseRouteAttr(datum.Value)
+ for _, aattr := range adata {
switch aattr.Attr.Type {
- case nl.TCA_KIND:
- actionType := string(aattr.Value[:len(aattr.Value)-1])
- // only parse if the action is mirred
- if actionType != "mirred" {
- return detailed, nil
- }
- case nl.TCA_OPTIONS:
- adata, err := nl.ParseRouteAttr(aattr.Value)
- if err != nil {
- return detailed, err
- }
- for _, adatum := range adata {
- switch adatum.Attr.Type {
- case nl.TCA_MIRRED_PARMS:
- mir := nl.DeserializeTcMirred(adatum.Value)
- u32.RedirIndex = int(mir.Ifindex)
- }
- }
+ case nl.TCA_POLICE_TBF:
+ fw.Police = *nl.DeserializeTcPolice(aattr.Value)
+ case nl.TCA_POLICE_RATE:
+ fw.Rtab = DeserializeRtab(aattr.Value)
+ case nl.TCA_POLICE_PEAKRATE:
+ fw.Ptab = DeserializeRtab(aattr.Value)
}
}
}
}
return detailed, nil
}
+
+func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
+ native = nl.NativeEndian()
+ bpf := filter.(*BpfFilter)
+ detailed := true
+ for _, datum := range data {
+ switch datum.Attr.Type {
+ case nl.TCA_BPF_FD:
+ bpf.Fd = int(native.Uint32(datum.Value[0:4]))
+ case nl.TCA_BPF_NAME:
+ bpf.Name = string(datum.Value[:len(datum.Value)-1])
+ case nl.TCA_BPF_CLASSID:
+ bpf.ClassId = native.Uint32(datum.Value[0:4])
+ case nl.TCA_BPF_FLAGS:
+ flags := native.Uint32(datum.Value[0:4])
+ if (flags & nl.TCA_BPF_FLAG_ACT_DIRECT) != 0 {
+ bpf.DirectAction = true
+ }
+ }
+ }
+ return detailed, nil
+}
+
+func AlignToAtm(size uint) uint {
+ var linksize, cells int
+ cells = int(size / nl.ATM_CELL_PAYLOAD)
+ if (size % nl.ATM_CELL_PAYLOAD) > 0 {
+ cells++
+ }
+ linksize = cells * nl.ATM_CELL_SIZE
+ return uint(linksize)
+}
+
+func AdjustSize(sz uint, mpu uint, linklayer int) uint {
+ if sz < mpu {
+ sz = mpu
+ }
+ switch linklayer {
+ case nl.LINKLAYER_ATM:
+ return AlignToAtm(sz)
+ default:
+ return sz
+ }
+}
+
+func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cellLog int, mtu uint32, linklayer int) int {
+ bps := rate.Rate
+ mpu := rate.Mpu
+ var sz uint
+ if mtu == 0 {
+ mtu = 2047
+ }
+ if cellLog < 0 {
+ cellLog = 0
+ for (mtu >> uint(cellLog)) > 255 {
+ cellLog++
+ }
+ }
+ for i := 0; i < 256; i++ {
+ sz = AdjustSize(uint((i+1)<<uint32(cellLog)), uint(mpu), linklayer)
+ rtab[i] = uint32(Xmittime(uint64(bps), uint32(sz)))
+ }
+ rate.CellAlign = -1
+ rate.CellLog = uint8(cellLog)
+ rate.Linklayer = uint8(linklayer & nl.TC_LINKLAYER_MASK)
+ return cellLog
+}
+
+func DeserializeRtab(b []byte) [256]uint32 {
+ var rtab [256]uint32
+ native := nl.NativeEndian()
+ r := bytes.NewReader(b)
+ _ = binary.Read(r, native, &rtab)
+ return rtab
+}
+
+func SerializeRtab(rtab [256]uint32) []byte {
+ native := nl.NativeEndian()
+ var w bytes.Buffer
+ _ = binary.Write(&w, native, rtab)
+ return w.Bytes()
+}
--- /dev/null
+package netlink
+
+import (
+ "syscall"
+
+ "github.com/vishvananda/netlink/nl"
+ "github.com/vishvananda/netns"
+)
+
+// Empty handle used by the netlink package methods
+var pkgHandle = &Handle{}
+
+// Handle is an handle for the netlink requests on a
+// specific network namespace. All the requests on the
+// same netlink family share the same netlink socket,
+// which gets released when the handle is deleted.
+type Handle struct {
+ sockets map[int]*nl.SocketHandle
+ lookupByDump bool
+}
+
+// SupportsNetlinkFamily reports whether the passed netlink family is supported by this Handle
+func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
+ _, ok := h.sockets[nlFamily]
+ return ok
+}
+
+// NewHandle returns a netlink handle on the current network namespace.
+// Caller may specify the netlink families the handle should support.
+// If no families are specified, all the families the netlink package
+// supports will be automatically added.
+func NewHandle(nlFamilies ...int) (*Handle, error) {
+ return newHandle(netns.None(), netns.None(), nlFamilies...)
+}
+
+// NewHandle returns a netlink handle on the network namespace
+// specified by ns. If ns=netns.None(), current network namespace
+// will be assumed
+func NewHandleAt(ns netns.NsHandle, nlFamilies ...int) (*Handle, error) {
+ return newHandle(ns, netns.None(), nlFamilies...)
+}
+
+// NewHandleAtFrom works as NewHandle but allows client to specify the
+// new and the origin netns Handle.
+func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
+ return newHandle(newNs, curNs)
+}
+
+func newHandle(newNs, curNs netns.NsHandle, nlFamilies ...int) (*Handle, error) {
+ h := &Handle{sockets: map[int]*nl.SocketHandle{}}
+ fams := nl.SupportedNlFamilies
+ if len(nlFamilies) != 0 {
+ fams = nlFamilies
+ }
+ for _, f := range fams {
+ s, err := nl.GetNetlinkSocketAt(newNs, curNs, f)
+ if err != nil {
+ return nil, err
+ }
+ h.sockets[f] = &nl.SocketHandle{Socket: s}
+ }
+ return h, nil
+}
+
+// Delete releases the resources allocated to this handle
+func (h *Handle) Delete() {
+ for _, sh := range h.sockets {
+ sh.Close()
+ }
+ h.sockets = nil
+}
+
+func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
+ // Do this so that package API still use nl package variable nextSeqNr
+ if h.sockets == nil {
+ return nl.NewNetlinkRequest(proto, flags)
+ }
+ return &nl.NetlinkRequest{
+ NlMsghdr: syscall.NlMsghdr{
+ Len: uint32(syscall.SizeofNlMsghdr),
+ Type: uint16(proto),
+ Flags: syscall.NLM_F_REQUEST | uint16(flags),
+ },
+ Sockets: h.sockets,
+ }
+}
package netlink
-import "net"
+import (
+ "fmt"
+ "net"
+)
// Link represents a link device from netlink. Shared link attributes
// like name may be retrieved using the Attrs() method. Unique data
ParentIndex int // index of the parent link device
MasterIndex int // must be the index of a bridge
Namespace interface{} // nil | NsPid | NsFd
+ Alias string
+ Statistics *LinkStatistics
}
// NewLinkAttrs returns LinkAttrs structure filled with default values
}
}
+/*
+Ref: struct rtnl_link_stats {...}
+*/
+type LinkStatistics struct {
+ RxPackets uint32
+ TxPackets uint32
+ RxBytes uint32
+ TxBytes uint32
+ RxErrors uint32
+ TxErrors uint32
+ RxDropped uint32
+ TxDropped uint32
+ Multicast uint32
+ Collisions uint32
+ RxLengthErrors uint32
+ RxOverErrors uint32
+ RxCrcErrors uint32
+ RxFrameErrors uint32
+ RxFifoErrors uint32
+ RxMissedErrors uint32
+ TxAbortedErrors uint32
+ TxCarrierErrors uint32
+ TxFifoErrors uint32
+ TxHeartbeatErrors uint32
+ TxWindowErrors uint32
+ RxCompressed uint32
+ TxCompressed uint32
+}
+
// Device links cannot be created via netlink. These links
// are links created by udev like 'lo' and 'etho0'
type Device struct {
return "macvtap"
}
+type TuntapMode uint16
+type TuntapFlag uint16
+
+// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
+type Tuntap struct {
+ LinkAttrs
+ Mode TuntapMode
+ Flags TuntapFlag
+}
+
+func (tuntap *Tuntap) Attrs() *LinkAttrs {
+ return &tuntap.LinkAttrs
+}
+
+func (tuntap *Tuntap) Type() string {
+ return "tuntap"
+}
+
// Veth devices must specify PeerName on create
type Veth struct {
LinkAttrs
RSC bool
L2miss bool
L3miss bool
+ UDPCSum bool
NoAge bool
GBP bool
Age int
return "ipvlan"
}
+// BondMode type
+type BondMode int
+
+func (b BondMode) String() string {
+ s, ok := bondModeToString[b]
+ if !ok {
+ return fmt.Sprintf("BondMode(%d)", b)
+ }
+ return s
+}
+
+// StringToBondMode returns bond mode, or uknonw is the s is invalid.
+func StringToBondMode(s string) BondMode {
+ mode, ok := StringToBondModeMap[s]
+ if !ok {
+ return BOND_MODE_UNKNOWN
+ }
+ return mode
+}
+
+// Possible BondMode
+const (
+ BOND_MODE_802_3AD BondMode = iota
+ BOND_MODE_BALANCE_RR
+ BOND_MODE_ACTIVE_BACKUP
+ BOND_MODE_BALANCE_XOR
+ BOND_MODE_BROADCAST
+ BOND_MODE_BALANCE_TLB
+ BOND_MODE_BALANCE_ALB
+ BOND_MODE_UNKNOWN
+)
+
+var bondModeToString = map[BondMode]string{
+ BOND_MODE_802_3AD: "802.3ad",
+ BOND_MODE_BALANCE_RR: "balance-rr",
+ BOND_MODE_ACTIVE_BACKUP: "active-backup",
+ BOND_MODE_BALANCE_XOR: "balance-xor",
+ BOND_MODE_BROADCAST: "broadcast",
+ BOND_MODE_BALANCE_TLB: "balance-tlb",
+ BOND_MODE_BALANCE_ALB: "balance-alb",
+}
+var StringToBondModeMap = map[string]BondMode{
+ "802.3ad": BOND_MODE_802_3AD,
+ "balance-rr": BOND_MODE_BALANCE_RR,
+ "active-backup": BOND_MODE_ACTIVE_BACKUP,
+ "balance-xor": BOND_MODE_BALANCE_XOR,
+ "broadcast": BOND_MODE_BROADCAST,
+ "balance-tlb": BOND_MODE_BALANCE_TLB,
+ "balance-alb": BOND_MODE_BALANCE_ALB,
+}
+
+// BondArpValidate type
+type BondArpValidate int
+
+// Possible BondArpValidate value
+const (
+ BOND_ARP_VALIDATE_NONE BondArpValidate = iota
+ BOND_ARP_VALIDATE_ACTIVE
+ BOND_ARP_VALIDATE_BACKUP
+ BOND_ARP_VALIDATE_ALL
+)
+
+// BondPrimaryReselect type
+type BondPrimaryReselect int
+
+// Possible BondPrimaryReselect value
+const (
+ BOND_PRIMARY_RESELECT_ALWAYS BondPrimaryReselect = iota
+ BOND_PRIMARY_RESELECT_BETTER
+ BOND_PRIMARY_RESELECT_FAILURE
+)
+
+// BondArpAllTargets type
+type BondArpAllTargets int
+
+// Possible BondArpAllTargets value
+const (
+ BOND_ARP_ALL_TARGETS_ANY BondArpAllTargets = iota
+ BOND_ARP_ALL_TARGETS_ALL
+)
+
+// BondFailOverMac type
+type BondFailOverMac int
+
+// Possible BondFailOverMac value
+const (
+ BOND_FAIL_OVER_MAC_NONE BondFailOverMac = iota
+ BOND_FAIL_OVER_MAC_ACTIVE
+ BOND_FAIL_OVER_MAC_FOLLOW
+)
+
+// BondXmitHashPolicy type
+type BondXmitHashPolicy int
+
+func (b BondXmitHashPolicy) String() string {
+ s, ok := bondXmitHashPolicyToString[b]
+ if !ok {
+ return fmt.Sprintf("XmitHashPolicy(%d)", b)
+ }
+ return s
+}
+
+// StringToBondXmitHashPolicy returns bond lacp arte, or uknonw is the s is invalid.
+func StringToBondXmitHashPolicy(s string) BondXmitHashPolicy {
+ lacp, ok := StringToBondXmitHashPolicyMap[s]
+ if !ok {
+ return BOND_XMIT_HASH_POLICY_UNKNOWN
+ }
+ return lacp
+}
+
+// Possible BondXmitHashPolicy value
+const (
+ BOND_XMIT_HASH_POLICY_LAYER2 BondXmitHashPolicy = iota
+ BOND_XMIT_HASH_POLICY_LAYER3_4
+ BOND_XMIT_HASH_POLICY_LAYER2_3
+ BOND_XMIT_HASH_POLICY_ENCAP2_3
+ BOND_XMIT_HASH_POLICY_ENCAP3_4
+ BOND_XMIT_HASH_POLICY_UNKNOWN
+)
+
+var bondXmitHashPolicyToString = map[BondXmitHashPolicy]string{
+ BOND_XMIT_HASH_POLICY_LAYER2: "layer2",
+ BOND_XMIT_HASH_POLICY_LAYER3_4: "layer3+4",
+ BOND_XMIT_HASH_POLICY_LAYER2_3: "layer2+3",
+ BOND_XMIT_HASH_POLICY_ENCAP2_3: "encap2+3",
+ BOND_XMIT_HASH_POLICY_ENCAP3_4: "encap3+4",
+}
+var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{
+ "layer2": BOND_XMIT_HASH_POLICY_LAYER2,
+ "layer3+4": BOND_XMIT_HASH_POLICY_LAYER3_4,
+ "layer2+3": BOND_XMIT_HASH_POLICY_LAYER2_3,
+ "encap2+3": BOND_XMIT_HASH_POLICY_ENCAP2_3,
+ "encap3+4": BOND_XMIT_HASH_POLICY_ENCAP3_4,
+}
+
+// BondLacpRate type
+type BondLacpRate int
+
+func (b BondLacpRate) String() string {
+ s, ok := bondLacpRateToString[b]
+ if !ok {
+ return fmt.Sprintf("LacpRate(%d)", b)
+ }
+ return s
+}
+
+// StringToBondLacpRate returns bond lacp arte, or uknonw is the s is invalid.
+func StringToBondLacpRate(s string) BondLacpRate {
+ lacp, ok := StringToBondLacpRateMap[s]
+ if !ok {
+ return BOND_LACP_RATE_UNKNOWN
+ }
+ return lacp
+}
+
+// Possible BondLacpRate value
+const (
+ BOND_LACP_RATE_SLOW BondLacpRate = iota
+ BOND_LACP_RATE_FAST
+ BOND_LACP_RATE_UNKNOWN
+)
+
+var bondLacpRateToString = map[BondLacpRate]string{
+ BOND_LACP_RATE_SLOW: "slow",
+ BOND_LACP_RATE_FAST: "fast",
+}
+var StringToBondLacpRateMap = map[string]BondLacpRate{
+ "slow": BOND_LACP_RATE_SLOW,
+ "fast": BOND_LACP_RATE_FAST,
+}
+
+// BondAdSelect type
+type BondAdSelect int
+
+// Possible BondAdSelect value
+const (
+ BOND_AD_SELECT_STABLE BondAdSelect = iota
+ BOND_AD_SELECT_BANDWIDTH
+ BOND_AD_SELECT_COUNT
+)
+
+// BondAdInfo represents ad info for bond
+type BondAdInfo struct {
+ AggregatorId int
+ NumPorts int
+ ActorKey int
+ PartnerKey int
+ PartnerMac net.HardwareAddr
+}
+
+// Bond representation
+type Bond struct {
+ LinkAttrs
+ Mode BondMode
+ ActiveSlave int
+ Miimon int
+ UpDelay int
+ DownDelay int
+ UseCarrier int
+ ArpInterval int
+ ArpIpTargets []net.IP
+ ArpValidate BondArpValidate
+ ArpAllTargets BondArpAllTargets
+ Primary int
+ PrimaryReselect BondPrimaryReselect
+ FailOverMac BondFailOverMac
+ XmitHashPolicy BondXmitHashPolicy
+ ResendIgmp int
+ NumPeerNotif int
+ AllSlavesActive int
+ MinLinks int
+ LpInterval int
+ PackersPerSlave int
+ LacpRate BondLacpRate
+ AdSelect BondAdSelect
+ // looking at iproute tool AdInfo can only be retrived. It can't be set.
+ AdInfo *BondAdInfo
+}
+
+func NewLinkBond(atr LinkAttrs) *Bond {
+ return &Bond{
+ LinkAttrs: atr,
+ Mode: -1,
+ ActiveSlave: -1,
+ Miimon: -1,
+ UpDelay: -1,
+ DownDelay: -1,
+ UseCarrier: -1,
+ ArpInterval: -1,
+ ArpIpTargets: nil,
+ ArpValidate: -1,
+ ArpAllTargets: -1,
+ Primary: -1,
+ PrimaryReselect: -1,
+ FailOverMac: -1,
+ XmitHashPolicy: -1,
+ ResendIgmp: -1,
+ NumPeerNotif: -1,
+ AllSlavesActive: -1,
+ MinLinks: -1,
+ LpInterval: -1,
+ PackersPerSlave: -1,
+ LacpRate: -1,
+ AdSelect: -1,
+ }
+}
+
+// Flag mask for bond options. Bond.Flagmask must be set to on for option to work.
+const (
+ BOND_MODE_MASK uint64 = 1 << (1 + iota)
+ BOND_ACTIVE_SLAVE_MASK
+ BOND_MIIMON_MASK
+ BOND_UPDELAY_MASK
+ BOND_DOWNDELAY_MASK
+ BOND_USE_CARRIER_MASK
+ BOND_ARP_INTERVAL_MASK
+ BOND_ARP_VALIDATE_MASK
+ BOND_ARP_ALL_TARGETS_MASK
+ BOND_PRIMARY_MASK
+ BOND_PRIMARY_RESELECT_MASK
+ BOND_FAIL_OVER_MAC_MASK
+ BOND_XMIT_HASH_POLICY_MASK
+ BOND_RESEND_IGMP_MASK
+ BOND_NUM_PEER_NOTIF_MASK
+ BOND_ALL_SLAVES_ACTIVE_MASK
+ BOND_MIN_LINKS_MASK
+ BOND_LP_INTERVAL_MASK
+ BOND_PACKETS_PER_SLAVE_MASK
+ BOND_LACP_RATE_MASK
+ BOND_AD_SELECT_MASK
+)
+
+// Attrs implementation.
+func (bond *Bond) Attrs() *LinkAttrs {
+ return &bond.LinkAttrs
+}
+
+// Type implementation fro Vxlan.
+func (bond *Bond) Type() string {
+ return "bond"
+}
+
+// Gretap devices must specify LocalIP and RemoteIP on create
+type Gretap struct {
+ LinkAttrs
+ IKey uint32
+ OKey uint32
+ EncapSport uint16
+ EncapDport uint16
+ Local net.IP
+ Remote net.IP
+ IFlags uint16
+ OFlags uint16
+ PMtuDisc uint8
+ Ttl uint8
+ Tos uint8
+ EncapType uint16
+ EncapFlags uint16
+ Link uint32
+}
+
+func (gretap *Gretap) Attrs() *LinkAttrs {
+ return &gretap.LinkAttrs
+}
+
+func (gretap *Gretap) Type() string {
+ return "gretap"
+}
+
// iproute2 supported devices;
// vlan | veth | vcan | dummy | ifb | macvlan | macvtap |
// bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |
"encoding/binary"
"fmt"
"net"
+ "os"
"syscall"
+ "unsafe"
"github.com/vishvananda/netlink/nl"
+ "github.com/vishvananda/netns"
+)
+
+const SizeofLinkStats = 0x5c
+
+const (
+ TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
+ TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
+ TUNTAP_DEFAULTS TuntapFlag = syscall.IFF_TUN_EXCL | syscall.IFF_ONE_QUEUE
+ TUNTAP_VNET_HDR TuntapFlag = syscall.IFF_VNET_HDR
+ TUNTAP_TUN_EXCL TuntapFlag = syscall.IFF_TUN_EXCL
+ TUNTAP_NO_PI TuntapFlag = syscall.IFF_NO_PI
+ TUNTAP_ONE_QUEUE TuntapFlag = syscall.IFF_ONE_QUEUE
)
var native = nl.NativeEndian()
}
}
+func (h *Handle) ensureIndex(link *LinkAttrs) {
+ if link != nil && link.Index == 0 {
+ newlink, _ := h.LinkByName(link.Name)
+ if newlink != nil {
+ link.Index = newlink.Attrs().Index
+ }
+ }
+}
+
// LinkSetUp enables the link device.
// Equivalent to: `ip link set $link up`
func LinkSetUp(link Link) error {
+ return pkgHandle.LinkSetUp(link)
+}
+
+// LinkSetUp enables the link device.
+// Equivalent to: `ip link set $link up`
+func (h *Handle) LinkSetUp(link Link) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Change = syscall.IFF_UP
// LinkSetDown disables link device.
// Equivalent to: `ip link set $link down`
func LinkSetDown(link Link) error {
+ return pkgHandle.LinkSetDown(link)
+}
+
+// LinkSetDown disables link device.
+// Equivalent to: `ip link set $link down`
+func (h *Handle) LinkSetDown(link Link) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Change = syscall.IFF_UP
// LinkSetMTU sets the mtu of the link device.
// Equivalent to: `ip link set $link mtu $mtu`
func LinkSetMTU(link Link, mtu int) error {
+ return pkgHandle.LinkSetMTU(link, mtu)
+}
+
+// LinkSetMTU sets the mtu of the link device.
+// Equivalent to: `ip link set $link mtu $mtu`
+func (h *Handle) LinkSetMTU(link Link, mtu int) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(base.Index)
// LinkSetName sets the name of the link device.
// Equivalent to: `ip link set $link name $name`
func LinkSetName(link Link, name string) error {
+ return pkgHandle.LinkSetName(link, name)
+}
+
+// LinkSetName sets the name of the link device.
+// Equivalent to: `ip link set $link name $name`
+func (h *Handle) LinkSetName(link Link, name string) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(base.Index)
return err
}
+// LinkSetAlias sets the alias of the link device.
+// Equivalent to: `ip link set dev $link alias $name`
+func LinkSetAlias(link Link, name string) error {
+ return pkgHandle.LinkSetAlias(link, name)
+}
+
+// LinkSetAlias sets the alias of the link device.
+// Equivalent to: `ip link set dev $link alias $name`
+func (h *Handle) LinkSetAlias(link Link, name string) error {
+ base := link.Attrs()
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+ msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+ msg.Index = int32(base.Index)
+ req.AddData(msg)
+
+ data := nl.NewRtAttr(syscall.IFLA_IFALIAS, []byte(name))
+ req.AddData(data)
+
+ _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+ return err
+}
+
// LinkSetHardwareAddr sets the hardware address of the link device.
// Equivalent to: `ip link set $link address $hwaddr`
func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
+ return pkgHandle.LinkSetHardwareAddr(link, hwaddr)
+}
+
+// LinkSetHardwareAddr sets the hardware address of the link device.
+// Equivalent to: `ip link set $link address $hwaddr`
+func (h *Handle) LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(base.Index)
return err
}
+// LinkSetVfHardwareAddr sets the hardware address of a vf for the link.
+// Equivalent to: `ip link set $link vf $vf mac $hwaddr`
+func LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error {
+ return pkgHandle.LinkSetVfHardwareAddr(link, vf, hwaddr)
+}
+
+// LinkSetVfHardwareAddr sets the hardware address of a vf for the link.
+// Equivalent to: `ip link set $link vf $vf mac $hwaddr`
+func (h *Handle) LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error {
+ base := link.Attrs()
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+ msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+ msg.Index = int32(base.Index)
+ req.AddData(msg)
+
+ data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil)
+ info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
+ vfmsg := nl.VfMac{
+ Vf: uint32(vf),
+ }
+ copy(vfmsg.Mac[:], []byte(hwaddr))
+ nl.NewRtAttrChild(info, nl.IFLA_VF_MAC, vfmsg.Serialize())
+ req.AddData(data)
+
+ _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+ return err
+}
+
+// LinkSetVfVlan sets the vlan of a vf for the link.
+// Equivalent to: `ip link set $link vf $vf vlan $vlan`
+func LinkSetVfVlan(link Link, vf, vlan int) error {
+ return pkgHandle.LinkSetVfVlan(link, vf, vlan)
+}
+
+// LinkSetVfVlan sets the vlan of a vf for the link.
+// Equivalent to: `ip link set $link vf $vf vlan $vlan`
+func (h *Handle) LinkSetVfVlan(link Link, vf, vlan int) error {
+ base := link.Attrs()
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+ msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+ msg.Index = int32(base.Index)
+ req.AddData(msg)
+
+ data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil)
+ info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
+ vfmsg := nl.VfVlan{
+ Vf: uint32(vf),
+ Vlan: uint32(vlan),
+ }
+ nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize())
+ req.AddData(data)
+
+ _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+ return err
+}
+
// LinkSetMaster sets the master of the link device.
// Equivalent to: `ip link set $link master $master`
func LinkSetMaster(link Link, master *Bridge) error {
+ return pkgHandle.LinkSetMaster(link, master)
+}
+
+// LinkSetMaster sets the master of the link device.
+// Equivalent to: `ip link set $link master $master`
+func (h *Handle) LinkSetMaster(link Link, master *Bridge) error {
index := 0
if master != nil {
masterBase := master.Attrs()
- ensureIndex(masterBase)
+ h.ensureIndex(masterBase)
index = masterBase.Index
}
- return LinkSetMasterByIndex(link, index)
+ if index <= 0 {
+ return fmt.Errorf("Device does not exist")
+ }
+ return h.LinkSetMasterByIndex(link, index)
+}
+
+// LinkSetNoMaster removes the master of the link device.
+// Equivalent to: `ip link set $link nomaster`
+func LinkSetNoMaster(link Link) error {
+ return pkgHandle.LinkSetNoMaster(link)
+}
+
+// LinkSetNoMaster removes the master of the link device.
+// Equivalent to: `ip link set $link nomaster`
+func (h *Handle) LinkSetNoMaster(link Link) error {
+ return h.LinkSetMasterByIndex(link, 0)
}
// LinkSetMasterByIndex sets the master of the link device.
// Equivalent to: `ip link set $link master $master`
func LinkSetMasterByIndex(link Link, masterIndex int) error {
+ return pkgHandle.LinkSetMasterByIndex(link, masterIndex)
+}
+
+// LinkSetMasterByIndex sets the master of the link device.
+// Equivalent to: `ip link set $link master $master`
+func (h *Handle) LinkSetMasterByIndex(link Link, masterIndex int) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(base.Index)
// pid must be a pid of a running process.
// Equivalent to: `ip link set $link netns $pid`
func LinkSetNsPid(link Link, nspid int) error {
+ return pkgHandle.LinkSetNsPid(link, nspid)
+}
+
+// LinkSetNsPid puts the device into a new network namespace. The
+// pid must be a pid of a running process.
+// Equivalent to: `ip link set $link netns $pid`
+func (h *Handle) LinkSetNsPid(link Link, nspid int) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(base.Index)
// fd must be an open file descriptor to a network namespace.
// Similar to: `ip link set $link netns $ns`
func LinkSetNsFd(link Link, fd int) error {
+ return pkgHandle.LinkSetNsFd(link, fd)
+}
+
+// LinkSetNsFd puts the device into a new network namespace. The
+// fd must be an open file descriptor to a network namespace.
+// Similar to: `ip link set $link netns $ns`
+func (h *Handle) LinkSetNsFd(link Link, fd int) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(base.Index)
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss))
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss))
+ if vxlan.UDPCSum {
+ nl.NewRtAttrChild(data, nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum))
+ }
if vxlan.GBP {
- nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, boolAttr(vxlan.GBP))
+ nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, []byte{})
}
-
if vxlan.NoAge {
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0))
} else if vxlan.Age > 0 {
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LIMIT, nl.Uint32Attr(uint32(vxlan.Limit)))
}
if vxlan.Port > 0 {
- nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT, nl.Uint16Attr(uint16(vxlan.Port)))
+ nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT, htons(uint16(vxlan.Port)))
}
if vxlan.PortLow > 0 || vxlan.PortHigh > 0 {
pr := vxlanPortRange{uint16(vxlan.PortLow), uint16(vxlan.PortHigh)}
}
}
+func addBondAttrs(bond *Bond, linkInfo *nl.RtAttr) {
+ data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+ if bond.Mode >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_MODE, nl.Uint8Attr(uint8(bond.Mode)))
+ }
+ if bond.ActiveSlave >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_ACTIVE_SLAVE, nl.Uint32Attr(uint32(bond.ActiveSlave)))
+ }
+ if bond.Miimon >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_MIIMON, nl.Uint32Attr(uint32(bond.Miimon)))
+ }
+ if bond.UpDelay >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_UPDELAY, nl.Uint32Attr(uint32(bond.UpDelay)))
+ }
+ if bond.DownDelay >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_DOWNDELAY, nl.Uint32Attr(uint32(bond.DownDelay)))
+ }
+ if bond.UseCarrier >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_USE_CARRIER, nl.Uint8Attr(uint8(bond.UseCarrier)))
+ }
+ if bond.ArpInterval >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_INTERVAL, nl.Uint32Attr(uint32(bond.ArpInterval)))
+ }
+ if bond.ArpIpTargets != nil {
+ msg := nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_IP_TARGET, nil)
+ for i := range bond.ArpIpTargets {
+ ip := bond.ArpIpTargets[i].To4()
+ if ip != nil {
+ nl.NewRtAttrChild(msg, i, []byte(ip))
+ continue
+ }
+ ip = bond.ArpIpTargets[i].To16()
+ if ip != nil {
+ nl.NewRtAttrChild(msg, i, []byte(ip))
+ }
+ }
+ }
+ if bond.ArpValidate >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_VALIDATE, nl.Uint32Attr(uint32(bond.ArpValidate)))
+ }
+ if bond.ArpAllTargets >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_ALL_TARGETS, nl.Uint32Attr(uint32(bond.ArpAllTargets)))
+ }
+ if bond.Primary >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_PRIMARY, nl.Uint32Attr(uint32(bond.Primary)))
+ }
+ if bond.PrimaryReselect >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_PRIMARY_RESELECT, nl.Uint8Attr(uint8(bond.PrimaryReselect)))
+ }
+ if bond.FailOverMac >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_FAIL_OVER_MAC, nl.Uint8Attr(uint8(bond.FailOverMac)))
+ }
+ if bond.XmitHashPolicy >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_XMIT_HASH_POLICY, nl.Uint8Attr(uint8(bond.XmitHashPolicy)))
+ }
+ if bond.ResendIgmp >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_RESEND_IGMP, nl.Uint32Attr(uint32(bond.ResendIgmp)))
+ }
+ if bond.NumPeerNotif >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_NUM_PEER_NOTIF, nl.Uint8Attr(uint8(bond.NumPeerNotif)))
+ }
+ if bond.AllSlavesActive >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_ALL_SLAVES_ACTIVE, nl.Uint8Attr(uint8(bond.AllSlavesActive)))
+ }
+ if bond.MinLinks >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_MIN_LINKS, nl.Uint32Attr(uint32(bond.MinLinks)))
+ }
+ if bond.LpInterval >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval)))
+ }
+ if bond.PackersPerSlave >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PackersPerSlave)))
+ }
+ if bond.LacpRate >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate)))
+ }
+ if bond.AdSelect >= 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_SELECT, nl.Uint8Attr(uint8(bond.AdSelect)))
+ }
+}
+
// LinkAdd adds a new link device. The type and features of the device
-// are taken fromt the parameters in the link object.
+// are taken from the parameters in the link object.
// Equivalent to: `ip link add $link`
func LinkAdd(link Link) error {
+ return pkgHandle.LinkAdd(link)
+}
+
+// LinkAdd adds a new link device. The type and features of the device
+// are taken fromt the parameters in the link object.
+// Equivalent to: `ip link add $link`
+func (h *Handle) LinkAdd(link Link) error {
// TODO: set mtu and hardware address
// TODO: support extra data for macvlan
base := link.Attrs()
return fmt.Errorf("LinkAttrs.Name cannot be empty!")
}
- req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ if tuntap, ok := link.(*Tuntap); ok {
+ // TODO: support user
+ // TODO: support group
+ // TODO: multi_queue
+ // TODO: support non- persistent
+ if tuntap.Mode < syscall.IFF_TUN || tuntap.Mode > syscall.IFF_TAP {
+ return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode)
+ }
+ file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+ var req ifReq
+ if tuntap.Flags == 0 {
+ req.Flags = uint16(TUNTAP_DEFAULTS)
+ } else {
+ req.Flags = uint16(tuntap.Flags)
+ }
+ req.Flags |= uint16(tuntap.Mode)
+ copy(req.Name[:15], base.Name)
+ _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETIFF), uintptr(unsafe.Pointer(&req)))
+ if errno != 0 {
+ return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed, errno %v", errno)
+ }
+ _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETPERSIST), 1)
+ if errno != 0 {
+ return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
+ }
+ h.ensureIndex(base)
+
+ // can't set master during create, so set it afterwards
+ if base.MasterIndex != 0 {
+ // TODO: verify MasterIndex is actually a bridge?
+ return h.LinkSetMasterByIndex(link, base.MasterIndex)
+ }
+ return nil
+ }
+
+ req := h.newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+ // TODO: make it shorter
+ if base.Flags&net.FlagUp != 0 {
+ msg.Change = syscall.IFF_UP
+ msg.Flags = syscall.IFF_UP
+ }
+ if base.Flags&net.FlagBroadcast != 0 {
+ msg.Change |= syscall.IFF_BROADCAST
+ msg.Flags |= syscall.IFF_BROADCAST
+ }
+ if base.Flags&net.FlagLoopback != 0 {
+ msg.Change |= syscall.IFF_LOOPBACK
+ msg.Flags |= syscall.IFF_LOOPBACK
+ }
+ if base.Flags&net.FlagPointToPoint != 0 {
+ msg.Change |= syscall.IFF_POINTOPOINT
+ msg.Flags |= syscall.IFF_POINTOPOINT
+ }
+ if base.Flags&net.FlagMulticast != 0 {
+ msg.Change |= syscall.IFF_MULTICAST
+ msg.Flags |= syscall.IFF_MULTICAST
+ }
req.AddData(msg)
if base.ParentIndex != 0 {
} else if vxlan, ok := link.(*Vxlan); ok {
addVxlanAttrs(vxlan, linkInfo)
+ } else if bond, ok := link.(*Bond); ok {
+ addBondAttrs(bond, linkInfo)
} else if ipv, ok := link.(*IPVlan); ok {
data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
nl.NewRtAttrChild(data, nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(ipv.Mode)))
data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode]))
}
+ } else if macv, ok := link.(*Macvtap); ok {
+ if macv.Mode != MACVLAN_MODE_DEFAULT {
+ data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+ nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode]))
+ }
+ } else if gretap, ok := link.(*Gretap); ok {
+ addGretapAttrs(gretap, linkInfo)
}
req.AddData(linkInfo)
return err
}
- ensureIndex(base)
+ h.ensureIndex(base)
// can't set master during create, so set it afterwards
if base.MasterIndex != 0 {
// TODO: verify MasterIndex is actually a bridge?
- return LinkSetMasterByIndex(link, base.MasterIndex)
+ return h.LinkSetMasterByIndex(link, base.MasterIndex)
}
return nil
}
// the link object for it to be deleted. The other values are ignored.
// Equivalent to: `ip link del $link`
func LinkDel(link Link) error {
+ return pkgHandle.LinkDel(link)
+}
+
+// LinkDel deletes link device. Either Index or Name must be set in
+// the link object for it to be deleted. The other values are ignored.
+// Equivalent to: `ip link del $link`
+func (h *Handle) LinkDel(link Link) error {
base := link.Attrs()
- ensureIndex(base)
+ h.ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK)
+ req := h.newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(base.Index)
return err
}
-func linkByNameDump(name string) (Link, error) {
- links, err := LinkList()
+func (h *Handle) linkByNameDump(name string) (Link, error) {
+ links, err := h.LinkList()
if err != nil {
return nil, err
}
return nil, fmt.Errorf("Link %s not found", name)
}
+func (h *Handle) linkByAliasDump(alias string) (Link, error) {
+ links, err := h.LinkList()
+ if err != nil {
+ return nil, err
+ }
+
+ for _, link := range links {
+ if link.Attrs().Alias == alias {
+ return link, nil
+ }
+ }
+ return nil, fmt.Errorf("Link alias %s not found", alias)
+}
+
// LinkByName finds a link by name and returns a pointer to the object.
func LinkByName(name string) (Link, error) {
- if lookupByDump {
- return linkByNameDump(name)
+ return pkgHandle.LinkByName(name)
+}
+
+// LinkByName finds a link by name and returns a pointer to the object.
+func (h *Handle) LinkByName(name string) (Link, error) {
+ if h.lookupByDump {
+ return h.linkByNameDump(name)
}
- req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK)
+ req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
req.AddData(msg)
if err == syscall.EINVAL {
// older kernels don't support looking up via IFLA_IFNAME
// so fall back to dumping all links
- lookupByDump = true
- return linkByNameDump(name)
+ h.lookupByDump = true
+ return h.linkByNameDump(name)
+ }
+
+ return link, err
+}
+
+// LinkByAlias finds a link by its alias and returns a pointer to the object.
+// If there are multiple links with the alias it returns the first one
+func LinkByAlias(alias string) (Link, error) {
+ return pkgHandle.LinkByAlias(alias)
+}
+
+// LinkByAlias finds a link by its alias and returns a pointer to the object.
+// If there are multiple links with the alias it returns the first one
+func (h *Handle) LinkByAlias(alias string) (Link, error) {
+ if h.lookupByDump {
+ return h.linkByAliasDump(alias)
+ }
+
+ req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK)
+
+ msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+ req.AddData(msg)
+
+ nameData := nl.NewRtAttr(syscall.IFLA_IFALIAS, nl.ZeroTerminated(alias))
+ req.AddData(nameData)
+
+ link, err := execGetLink(req)
+ if err == syscall.EINVAL {
+ // older kernels don't support looking up via IFLA_IFALIAS
+ // so fall back to dumping all links
+ h.lookupByDump = true
+ return h.linkByAliasDump(alias)
}
return link, err
// LinkByIndex finds a link by index and returns a pointer to the object.
func LinkByIndex(index int) (Link, error) {
- req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK)
+ return pkgHandle.LinkByIndex(index)
+}
+
+// LinkByIndex finds a link by index and returns a pointer to the object.
+func (h *Handle) LinkByIndex(index int) (Link, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(index)
link = &Veth{}
case "vxlan":
link = &Vxlan{}
+ case "bond":
+ link = &Bond{}
case "ipvlan":
link = &IPVlan{}
case "macvlan":
link = &Macvlan{}
case "macvtap":
link = &Macvtap{}
+ case "gretap":
+ link = &Gretap{}
default:
link = &GenericLink{LinkType: linkType}
}
parseVlanData(link, data)
case "vxlan":
parseVxlanData(link, data)
+ case "bond":
+ parseBondData(link, data)
case "ipvlan":
parseIPVlanData(link, data)
case "macvlan":
parseMacvlanData(link, data)
case "macvtap":
parseMacvtapData(link, data)
+ case "gretap":
+ parseGretapData(link, data)
}
}
}
base.MasterIndex = int(native.Uint32(attr.Value[0:4]))
case syscall.IFLA_TXQLEN:
base.TxQLen = int(native.Uint32(attr.Value[0:4]))
+ case syscall.IFLA_IFALIAS:
+ base.Alias = string(attr.Value[:len(attr.Value)-1])
+ case syscall.IFLA_STATS:
+ base.Statistics = parseLinkStats(attr.Value[:])
}
}
// Links that don't have IFLA_INFO_KIND are hardware devices
// LinkList gets a list of link devices.
// Equivalent to: `ip link show`
func LinkList() ([]Link, error) {
+ return pkgHandle.LinkList()
+}
+
+// LinkList gets a list of link devices.
+// Equivalent to: `ip link show`
+func (h *Handle) LinkList() ([]Link, error) {
// NOTE(vish): This duplicates functionality in net/iface_linux.go, but we need
// to get the message ourselves to parse link type.
- req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
+ req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
req.AddData(msg)
// LinkUpdate is used to pass information back from LinkSubscribe()
type LinkUpdate struct {
nl.IfInfomsg
+ Header syscall.NlMsghdr
Link
}
// LinkSubscribe takes a chan down which notifications will be sent
// when links change. Close the 'done' chan to stop subscription.
func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
- s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
+ return linkSubscribe(netns.None(), netns.None(), ch, done)
+}
+
+// LinkSubscribeAt works like LinkSubscribe plus it allows the caller
+// to choose the network namespace in which to subscribe (ns).
+func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error {
+ return linkSubscribe(ns, netns.None(), ch, done)
+}
+
+func linkSubscribe(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error {
+ s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
if err != nil {
return err
}
if err != nil {
return
}
- ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link}
+ ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: m.Header, Link: link}
}
}
}()
}
func LinkSetHairpin(link Link, mode bool) error {
- return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE)
+ return pkgHandle.LinkSetHairpin(link, mode)
+}
+
+func (h *Handle) LinkSetHairpin(link Link, mode bool) error {
+ return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE)
}
func LinkSetGuard(link Link, mode bool) error {
- return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_GUARD)
+ return pkgHandle.LinkSetGuard(link, mode)
+}
+
+func (h *Handle) LinkSetGuard(link Link, mode bool) error {
+ return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_GUARD)
}
func LinkSetFastLeave(link Link, mode bool) error {
- return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_FAST_LEAVE)
+ return pkgHandle.LinkSetFastLeave(link, mode)
+}
+
+func (h *Handle) LinkSetFastLeave(link Link, mode bool) error {
+ return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_FAST_LEAVE)
}
func LinkSetLearning(link Link, mode bool) error {
- return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_LEARNING)
+ return pkgHandle.LinkSetLearning(link, mode)
+}
+
+func (h *Handle) LinkSetLearning(link Link, mode bool) error {
+ return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_LEARNING)
}
func LinkSetRootBlock(link Link, mode bool) error {
- return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROTECT)
+ return pkgHandle.LinkSetRootBlock(link, mode)
+}
+
+func (h *Handle) LinkSetRootBlock(link Link, mode bool) error {
+ return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROTECT)
}
func LinkSetFlood(link Link, mode bool) error {
- return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD)
+ return pkgHandle.LinkSetFlood(link, mode)
+}
+
+func (h *Handle) LinkSetFlood(link Link, mode bool) error {
+ return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD)
}
-func setProtinfoAttr(link Link, mode bool, attr int) error {
+func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error {
base := link.Attrs()
- ensureIndex(base)
- req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+ h.ensureIndex(base)
+ req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
msg.Index = int32(base.Index)
vxlan.L2miss = int8(datum.Value[0]) != 0
case nl.IFLA_VXLAN_L3MISS:
vxlan.L3miss = int8(datum.Value[0]) != 0
+ case nl.IFLA_VXLAN_UDP_CSUM:
+ vxlan.UDPCSum = int8(datum.Value[0]) != 0
case nl.IFLA_VXLAN_GBP:
- vxlan.GBP = int8(datum.Value[0]) != 0
+ vxlan.GBP = true
case nl.IFLA_VXLAN_AGEING:
vxlan.Age = int(native.Uint32(datum.Value[0:4]))
vxlan.NoAge = vxlan.Age == 0
case nl.IFLA_VXLAN_LIMIT:
vxlan.Limit = int(native.Uint32(datum.Value[0:4]))
case nl.IFLA_VXLAN_PORT:
- vxlan.Port = int(native.Uint16(datum.Value[0:2]))
+ vxlan.Port = int(ntohs(datum.Value[0:2]))
case nl.IFLA_VXLAN_PORT_RANGE:
buf := bytes.NewBuffer(datum.Value[0:4])
var pr vxlanPortRange
}
}
+func parseBondData(link Link, data []syscall.NetlinkRouteAttr) {
+ bond := NewLinkBond(NewLinkAttrs())
+ for i := range data {
+ switch data[i].Attr.Type {
+ case nl.IFLA_BOND_MODE:
+ bond.Mode = BondMode(data[i].Value[0])
+ case nl.IFLA_BOND_ACTIVE_SLAVE:
+ bond.ActiveSlave = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_MIIMON:
+ bond.Miimon = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_UPDELAY:
+ bond.UpDelay = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_DOWNDELAY:
+ bond.DownDelay = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_USE_CARRIER:
+ bond.UseCarrier = int(data[i].Value[0])
+ case nl.IFLA_BOND_ARP_INTERVAL:
+ bond.ArpInterval = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_ARP_IP_TARGET:
+ // TODO: implement
+ case nl.IFLA_BOND_ARP_VALIDATE:
+ bond.ArpValidate = BondArpValidate(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_ARP_ALL_TARGETS:
+ bond.ArpAllTargets = BondArpAllTargets(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_PRIMARY:
+ bond.Primary = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_PRIMARY_RESELECT:
+ bond.PrimaryReselect = BondPrimaryReselect(data[i].Value[0])
+ case nl.IFLA_BOND_FAIL_OVER_MAC:
+ bond.FailOverMac = BondFailOverMac(data[i].Value[0])
+ case nl.IFLA_BOND_XMIT_HASH_POLICY:
+ bond.XmitHashPolicy = BondXmitHashPolicy(data[i].Value[0])
+ case nl.IFLA_BOND_RESEND_IGMP:
+ bond.ResendIgmp = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_NUM_PEER_NOTIF:
+ bond.NumPeerNotif = int(data[i].Value[0])
+ case nl.IFLA_BOND_ALL_SLAVES_ACTIVE:
+ bond.AllSlavesActive = int(data[i].Value[0])
+ case nl.IFLA_BOND_MIN_LINKS:
+ bond.MinLinks = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_LP_INTERVAL:
+ bond.LpInterval = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_PACKETS_PER_SLAVE:
+ bond.PackersPerSlave = int(native.Uint32(data[i].Value[0:4]))
+ case nl.IFLA_BOND_AD_LACP_RATE:
+ bond.LacpRate = BondLacpRate(data[i].Value[0])
+ case nl.IFLA_BOND_AD_SELECT:
+ bond.AdSelect = BondAdSelect(data[i].Value[0])
+ case nl.IFLA_BOND_AD_INFO:
+ // TODO: implement
+ }
+ }
+}
+
func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
ipv := link.(*IPVlan)
for _, datum := range data {
}
return f
}
+
+func htonl(val uint32) []byte {
+ bytes := make([]byte, 4)
+ binary.BigEndian.PutUint32(bytes, val)
+ return bytes
+}
+
+func htons(val uint16) []byte {
+ bytes := make([]byte, 2)
+ binary.BigEndian.PutUint16(bytes, val)
+ return bytes
+}
+
+func ntohl(buf []byte) uint32 {
+ return binary.BigEndian.Uint32(buf)
+}
+
+func ntohs(buf []byte) uint16 {
+ return binary.BigEndian.Uint16(buf)
+}
+
+func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) {
+ data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+
+ ip := gretap.Local.To4()
+ if ip != nil {
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_LOCAL, []byte(ip))
+ }
+ ip = gretap.Remote.To4()
+ if ip != nil {
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_REMOTE, []byte(ip))
+ }
+
+ if gretap.IKey != 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_IKEY, htonl(gretap.IKey))
+ gretap.IFlags |= uint16(nl.GRE_KEY)
+ }
+
+ if gretap.OKey != 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_OKEY, htonl(gretap.OKey))
+ gretap.OFlags |= uint16(nl.GRE_KEY)
+ }
+
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_IFLAGS, htons(gretap.IFlags))
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_OFLAGS, htons(gretap.OFlags))
+
+ if gretap.Link != 0 {
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_LINK, nl.Uint32Attr(gretap.Link))
+ }
+
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gretap.PMtuDisc))
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_TTL, nl.Uint8Attr(gretap.Ttl))
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_TOS, nl.Uint8Attr(gretap.Tos))
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gretap.EncapType))
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gretap.EncapFlags))
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_SPORT, htons(gretap.EncapSport))
+ nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_DPORT, htons(gretap.EncapDport))
+}
+
+func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) {
+ gre := link.(*Gretap)
+ for _, datum := range data {
+ switch datum.Attr.Type {
+ case nl.IFLA_GRE_OKEY:
+ gre.IKey = ntohl(datum.Value[0:4])
+ case nl.IFLA_GRE_IKEY:
+ gre.OKey = ntohl(datum.Value[0:4])
+ case nl.IFLA_GRE_LOCAL:
+ gre.Local = net.IP(datum.Value[0:4])
+ case nl.IFLA_GRE_REMOTE:
+ gre.Remote = net.IP(datum.Value[0:4])
+ case nl.IFLA_GRE_ENCAP_SPORT:
+ gre.EncapSport = ntohs(datum.Value[0:2])
+ case nl.IFLA_GRE_ENCAP_DPORT:
+ gre.EncapDport = ntohs(datum.Value[0:2])
+ case nl.IFLA_GRE_IFLAGS:
+ gre.IFlags = ntohs(datum.Value[0:2])
+ case nl.IFLA_GRE_OFLAGS:
+ gre.OFlags = ntohs(datum.Value[0:2])
+
+ case nl.IFLA_GRE_TTL:
+ gre.Ttl = uint8(datum.Value[0])
+ case nl.IFLA_GRE_TOS:
+ gre.Tos = uint8(datum.Value[0])
+ case nl.IFLA_GRE_PMTUDISC:
+ gre.PMtuDisc = uint8(datum.Value[0])
+ case nl.IFLA_GRE_ENCAP_TYPE:
+ gre.EncapType = native.Uint16(datum.Value[0:2])
+ case nl.IFLA_GRE_ENCAP_FLAGS:
+ gre.EncapFlags = native.Uint16(datum.Value[0:2])
+ }
+ }
+}
+
+func parseLinkStats(data []byte) *LinkStatistics {
+ return (*LinkStatistics)(unsafe.Pointer(&data[0:SizeofLinkStats][0]))
+}
--- /dev/null
+package netlink
+
+// ideally golang.org/x/sys/unix would define IfReq but it only has
+// IFNAMSIZ, hence this minimalistic implementation
+const (
+ SizeOfIfReq = 40
+ IFNAMSIZ = 16
+)
+
+type ifReq struct {
+ Name [IFNAMSIZ]byte
+ Flags uint16
+ pad [SizeOfIfReq - IFNAMSIZ - 2]byte
+}
// NeighAdd will add an IP to MAC mapping to the ARP table
// Equivalent to: `ip neigh add ....`
func NeighAdd(neigh *Neigh) error {
- return neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL)
+ return pkgHandle.NeighAdd(neigh)
}
-// NeighAdd will add or replace an IP to MAC mapping to the ARP table
+// NeighAdd will add an IP to MAC mapping to the ARP table
+// Equivalent to: `ip neigh add ....`
+func (h *Handle) NeighAdd(neigh *Neigh) error {
+ return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL)
+}
+
+// NeighSet will add or replace an IP to MAC mapping to the ARP table
// Equivalent to: `ip neigh replace....`
func NeighSet(neigh *Neigh) error {
- return neighAdd(neigh, syscall.NLM_F_CREATE)
+ return pkgHandle.NeighSet(neigh)
+}
+
+// NeighSet will add or replace an IP to MAC mapping to the ARP table
+// Equivalent to: `ip neigh replace....`
+func (h *Handle) NeighSet(neigh *Neigh) error {
+ return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE)
}
// NeighAppend will append an entry to FDB
// Equivalent to: `bridge fdb append...`
func NeighAppend(neigh *Neigh) error {
- return neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_APPEND)
+ return pkgHandle.NeighAppend(neigh)
}
+// NeighAppend will append an entry to FDB
+// Equivalent to: `bridge fdb append...`
+func (h *Handle) NeighAppend(neigh *Neigh) error {
+ return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_APPEND)
+}
+
+// NeighAppend will append an entry to FDB
+// Equivalent to: `bridge fdb append...`
func neighAdd(neigh *Neigh, mode int) error {
- req := nl.NewNetlinkRequest(syscall.RTM_NEWNEIGH, mode|syscall.NLM_F_ACK)
+ return pkgHandle.neighAdd(neigh, mode)
+}
+
+// NeighAppend will append an entry to FDB
+// Equivalent to: `bridge fdb append...`
+func (h *Handle) neighAdd(neigh *Neigh, mode int) error {
+ req := h.newNetlinkRequest(syscall.RTM_NEWNEIGH, mode|syscall.NLM_F_ACK)
return neighHandle(neigh, req)
}
// NeighDel will delete an IP address from a link device.
// Equivalent to: `ip addr del $addr dev $link`
func NeighDel(neigh *Neigh) error {
- req := nl.NewNetlinkRequest(syscall.RTM_DELNEIGH, syscall.NLM_F_ACK)
+ return pkgHandle.NeighDel(neigh)
+}
+
+// NeighDel will delete an IP address from a link device.
+// Equivalent to: `ip addr del $addr dev $link`
+func (h *Handle) NeighDel(neigh *Neigh) error {
+ req := h.newNetlinkRequest(syscall.RTM_DELNEIGH, syscall.NLM_F_ACK)
return neighHandle(neigh, req)
}
// Equivalent to: `ip neighbor show`.
// The list can be filtered by link and ip family.
func NeighList(linkIndex, family int) ([]Neigh, error) {
- req := nl.NewNetlinkRequest(syscall.RTM_GETNEIGH, syscall.NLM_F_DUMP)
+ return pkgHandle.NeighList(linkIndex, family)
+}
+
+// NeighList gets a list of IP-MAC mappings in the system (ARP table).
+// Equivalent to: `ip neighbor show`.
+// The list can be filtered by link and ip family.
+func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETNEIGH, syscall.NLM_F_DUMP)
msg := Ndmsg{
Family: uint8(family),
+ Index: uint32(linkIndex),
}
req.AddData(&msg)
// interface that is loosly modeled on the iproute2 cli.
package netlink
-import (
- "net"
-
- "github.com/vishvananda/netlink/nl"
-)
-
-const (
- // Family type definitions
- FAMILY_ALL = nl.FAMILY_ALL
- FAMILY_V4 = nl.FAMILY_V4
- FAMILY_V6 = nl.FAMILY_V6
-)
+import "net"
// ParseIPNet parses a string in ip/net format and returns a net.IPNet.
// This is valuable because addresses in netlink are often IPNets and
return &net.IPNet{IP: ip, Mask: ipNet.Mask}, nil
}
-// NewIPNet generates an IPNet from an ip address using a netmask of 32.
+// NewIPNet generates an IPNet from an ip address using a netmask of 32 or 128.
func NewIPNet(ip net.IP) *net.IPNet {
- return &net.IPNet{IP: ip, Mask: net.CIDRMask(32, 32)}
+ if ip.To4() != nil {
+ return &net.IPNet{IP: ip, Mask: net.CIDRMask(32, 32)}
+ }
+ return &net.IPNet{IP: ip, Mask: net.CIDRMask(128, 128)}
}
--- /dev/null
+package netlink
+
+import "github.com/vishvananda/netlink/nl"
+
+// Family type definitions
+const (
+ FAMILY_ALL = nl.FAMILY_ALL
+ FAMILY_V4 = nl.FAMILY_V4
+ FAMILY_V6 = nl.FAMILY_V6
+)
return nil, ErrNotImplemented
}
-func NeighDeserialize(m []byte) (*Ndmsg, *Neigh, error) {
- return nil, nil, ErrNotImplemented
+func NeighDeserialize(m []byte) (*Neigh, error) {
+ return nil, ErrNotImplemented
}
package nl
+import (
+ "unsafe"
+)
+
const (
DEFAULT_CHANGE = 0xFFFFFFFF
+ // doesn't exist in syscall
+ IFLA_VFINFO_LIST = 0x16
)
const (
MACVLAN_MODE_PASSTHRU = 8
MACVLAN_MODE_SOURCE = 16
)
+
+const (
+ IFLA_BOND_UNSPEC = iota
+ IFLA_BOND_MODE
+ IFLA_BOND_ACTIVE_SLAVE
+ IFLA_BOND_MIIMON
+ IFLA_BOND_UPDELAY
+ IFLA_BOND_DOWNDELAY
+ IFLA_BOND_USE_CARRIER
+ IFLA_BOND_ARP_INTERVAL
+ IFLA_BOND_ARP_IP_TARGET
+ IFLA_BOND_ARP_VALIDATE
+ IFLA_BOND_ARP_ALL_TARGETS
+ IFLA_BOND_PRIMARY
+ IFLA_BOND_PRIMARY_RESELECT
+ IFLA_BOND_FAIL_OVER_MAC
+ IFLA_BOND_XMIT_HASH_POLICY
+ IFLA_BOND_RESEND_IGMP
+ IFLA_BOND_NUM_PEER_NOTIF
+ IFLA_BOND_ALL_SLAVES_ACTIVE
+ IFLA_BOND_MIN_LINKS
+ IFLA_BOND_LP_INTERVAL
+ IFLA_BOND_PACKETS_PER_SLAVE
+ IFLA_BOND_AD_LACP_RATE
+ IFLA_BOND_AD_SELECT
+ IFLA_BOND_AD_INFO
+)
+
+const (
+ IFLA_BOND_AD_INFO_UNSPEC = iota
+ IFLA_BOND_AD_INFO_AGGREGATOR
+ IFLA_BOND_AD_INFO_NUM_PORTS
+ IFLA_BOND_AD_INFO_ACTOR_KEY
+ IFLA_BOND_AD_INFO_PARTNER_KEY
+ IFLA_BOND_AD_INFO_PARTNER_MAC
+)
+
+const (
+ IFLA_BOND_SLAVE_UNSPEC = iota
+ IFLA_BOND_SLAVE_STATE
+ IFLA_BOND_SLAVE_MII_STATUS
+ IFLA_BOND_SLAVE_LINK_FAILURE_COUNT
+ IFLA_BOND_SLAVE_PERM_HWADDR
+ IFLA_BOND_SLAVE_QUEUE_ID
+ IFLA_BOND_SLAVE_AD_AGGREGATOR_ID
+)
+
+const (
+ IFLA_GRE_UNSPEC = iota
+ IFLA_GRE_LINK
+ IFLA_GRE_IFLAGS
+ IFLA_GRE_OFLAGS
+ IFLA_GRE_IKEY
+ IFLA_GRE_OKEY
+ IFLA_GRE_LOCAL
+ IFLA_GRE_REMOTE
+ IFLA_GRE_TTL
+ IFLA_GRE_TOS
+ IFLA_GRE_PMTUDISC
+ IFLA_GRE_ENCAP_LIMIT
+ IFLA_GRE_FLOWINFO
+ IFLA_GRE_FLAGS
+ IFLA_GRE_ENCAP_TYPE
+ IFLA_GRE_ENCAP_FLAGS
+ IFLA_GRE_ENCAP_SPORT
+ IFLA_GRE_ENCAP_DPORT
+ IFLA_GRE_COLLECT_METADATA
+ IFLA_GRE_MAX = IFLA_GRE_COLLECT_METADATA
+)
+
+const (
+ GRE_CSUM = 0x8000
+ GRE_ROUTING = 0x4000
+ GRE_KEY = 0x2000
+ GRE_SEQ = 0x1000
+ GRE_STRICT = 0x0800
+ GRE_REC = 0x0700
+ GRE_FLAGS = 0x00F8
+ GRE_VERSION = 0x0007
+)
+
+const (
+ IFLA_VF_INFO_UNSPEC = iota
+ IFLA_VF_INFO
+ IFLA_VF_INFO_MAX = IFLA_VF_INFO
+)
+
+const (
+ IFLA_VF_UNSPEC = iota
+ IFLA_VF_MAC /* Hardware queue specific attributes */
+ IFLA_VF_VLAN
+ IFLA_VF_TX_RATE /* Max TX Bandwidth Allocation */
+ IFLA_VF_SPOOFCHK /* Spoof Checking on/off switch */
+ IFLA_VF_LINK_STATE /* link state enable/disable/auto switch */
+ IFLA_VF_RATE /* Min and Max TX Bandwidth Allocation */
+ IFLA_VF_RSS_QUERY_EN /* RSS Redirection Table and Hash Key query
+ * on/off switch
+ */
+ IFLA_VF_STATS /* network device statistics */
+ IFLA_VF_MAX = IFLA_VF_STATS
+)
+
+const (
+ IFLA_VF_LINK_STATE_AUTO = iota /* link state of the uplink */
+ IFLA_VF_LINK_STATE_ENABLE /* link always up */
+ IFLA_VF_LINK_STATE_DISABLE /* link always down */
+ IFLA_VF_LINK_STATE_MAX = IFLA_VF_LINK_STATE_DISABLE
+)
+
+const (
+ IFLA_VF_STATS_RX_PACKETS = iota
+ IFLA_VF_STATS_TX_PACKETS
+ IFLA_VF_STATS_RX_BYTES
+ IFLA_VF_STATS_TX_BYTES
+ IFLA_VF_STATS_BROADCAST
+ IFLA_VF_STATS_MULTICAST
+ IFLA_VF_STATS_MAX = IFLA_VF_STATS_MULTICAST
+)
+
+const (
+ SizeofVfMac = 0x24
+ SizeofVfVlan = 0x0c
+ SizeofVfTxRate = 0x08
+ SizeofVfRate = 0x0c
+ SizeofVfSpoofchk = 0x08
+ SizeofVfLinkState = 0x08
+ SizeofVfRssQueryEn = 0x08
+)
+
+// struct ifla_vf_mac {
+// __u32 vf;
+// __u8 mac[32]; /* MAX_ADDR_LEN */
+// };
+
+type VfMac struct {
+ Vf uint32
+ Mac [32]byte
+}
+
+func (msg *VfMac) Len() int {
+ return SizeofVfMac
+}
+
+func DeserializeVfMac(b []byte) *VfMac {
+ return (*VfMac)(unsafe.Pointer(&b[0:SizeofVfMac][0]))
+}
+
+func (msg *VfMac) Serialize() []byte {
+ return (*(*[SizeofVfMac]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct ifla_vf_vlan {
+// __u32 vf;
+// __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+// __u32 qos;
+// };
+
+type VfVlan struct {
+ Vf uint32
+ Vlan uint32
+ Qos uint32
+}
+
+func (msg *VfVlan) Len() int {
+ return SizeofVfVlan
+}
+
+func DeserializeVfVlan(b []byte) *VfVlan {
+ return (*VfVlan)(unsafe.Pointer(&b[0:SizeofVfVlan][0]))
+}
+
+func (msg *VfVlan) Serialize() []byte {
+ return (*(*[SizeofVfVlan]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct ifla_vf_tx_rate {
+// __u32 vf;
+// __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
+// };
+
+type VfTxRate struct {
+ Vf uint32
+ Rate uint32
+}
+
+func (msg *VfTxRate) Len() int {
+ return SizeofVfTxRate
+}
+
+func DeserializeVfTxRate(b []byte) *VfTxRate {
+ return (*VfTxRate)(unsafe.Pointer(&b[0:SizeofVfTxRate][0]))
+}
+
+func (msg *VfTxRate) Serialize() []byte {
+ return (*(*[SizeofVfTxRate]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct ifla_vf_rate {
+// __u32 vf;
+// __u32 min_tx_rate; /* Min Bandwidth in Mbps */
+// __u32 max_tx_rate; /* Max Bandwidth in Mbps */
+// };
+
+type VfRate struct {
+ Vf uint32
+ MinTxRate uint32
+ MaxTxRate uint32
+}
+
+func (msg *VfRate) Len() int {
+ return SizeofVfRate
+}
+
+func DeserializeVfRate(b []byte) *VfRate {
+ return (*VfRate)(unsafe.Pointer(&b[0:SizeofVfRate][0]))
+}
+
+func (msg *VfRate) Serialize() []byte {
+ return (*(*[SizeofVfRate]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct ifla_vf_spoofchk {
+// __u32 vf;
+// __u32 setting;
+// };
+
+type VfSpoofchk struct {
+ Vf uint32
+ Setting uint32
+}
+
+func (msg *VfSpoofchk) Len() int {
+ return SizeofVfSpoofchk
+}
+
+func DeserializeVfSpoofchk(b []byte) *VfSpoofchk {
+ return (*VfSpoofchk)(unsafe.Pointer(&b[0:SizeofVfSpoofchk][0]))
+}
+
+func (msg *VfSpoofchk) Serialize() []byte {
+ return (*(*[SizeofVfSpoofchk]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct ifla_vf_link_state {
+// __u32 vf;
+// __u32 link_state;
+// };
+
+type VfLinkState struct {
+ Vf uint32
+ LinkState uint32
+}
+
+func (msg *VfLinkState) Len() int {
+ return SizeofVfLinkState
+}
+
+func DeserializeVfLinkState(b []byte) *VfLinkState {
+ return (*VfLinkState)(unsafe.Pointer(&b[0:SizeofVfLinkState][0]))
+}
+
+func (msg *VfLinkState) Serialize() []byte {
+ return (*(*[SizeofVfLinkState]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct ifla_vf_rss_query_en {
+// __u32 vf;
+// __u32 setting;
+// };
+
+type VfRssQueryEn struct {
+ Vf uint32
+ Setting uint32
+}
+
+func (msg *VfRssQueryEn) Len() int {
+ return SizeofVfRssQueryEn
+}
+
+func DeserializeVfRssQueryEn(b []byte) *VfRssQueryEn {
+ return (*VfRssQueryEn)(unsafe.Pointer(&b[0:SizeofVfRssQueryEn][0]))
+}
+
+func (msg *VfRssQueryEn) Serialize() []byte {
+ return (*(*[SizeofVfRssQueryEn]byte)(unsafe.Pointer(msg)))[:]
+}
"encoding/binary"
"fmt"
"net"
+ "runtime"
+ "sync"
"sync/atomic"
"syscall"
"unsafe"
+
+ "github.com/vishvananda/netns"
)
const (
FAMILY_V6 = syscall.AF_INET6
)
+// SupportedNlFamilies contains the list of netlink families this netlink package supports
+var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM}
+
var nextSeqNr uint32
// GetIPFamily returns the family type of a net.IP.
length := a.Len()
buf := make([]byte, rtaAlignOf(length))
+ next := 4
if a.Data != nil {
- copy(buf[4:], a.Data)
- } else {
- next := 4
+ copy(buf[next:], a.Data)
+ next += rtaAlignOf(len(a.Data))
+ }
+ if len(a.children) > 0 {
for _, child := range a.children {
childBuf := child.Serialize()
copy(buf[next:], childBuf)
type NetlinkRequest struct {
syscall.NlMsghdr
- Data []NetlinkRequestData
+ Data []NetlinkRequestData
+ Sockets map[int]*SocketHandle
}
// Serialize the Netlink Request into a byte array
}
// Execute the request against a the given sockType.
-// Returns a list of netlink messages in seriaized format, optionally filtered
+// Returns a list of netlink messages in serialized format, optionally filtered
// by resType.
func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
- s, err := getNetlinkSocket(sockType)
- if err != nil {
- return nil, err
+ var (
+ s *NetlinkSocket
+ err error
+ )
+
+ if req.Sockets != nil {
+ if sh, ok := req.Sockets[sockType]; ok {
+ s = sh.Socket
+ req.Seq = atomic.AddUint32(&sh.Seq, 1)
+ }
+ }
+ sharedSocket := s != nil
+
+ if s == nil {
+ s, err = getNetlinkSocket(sockType)
+ if err != nil {
+ return nil, err
+ }
+ defer s.Close()
+ } else {
+ s.Lock()
+ defer s.Unlock()
}
- defer s.Close()
if err := s.Send(req); err != nil {
return nil, err
}
for _, m := range msgs {
if m.Header.Seq != req.Seq {
- return nil, fmt.Errorf("Wrong Seq nr %d, expected 1", m.Header.Seq)
+ if sharedSocket {
+ continue
+ }
+ return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq)
}
if m.Header.Pid != pid {
return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
type NetlinkSocket struct {
fd int
lsa syscall.SockaddrNetlink
+ sync.Mutex
}
func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
return s, nil
}
+// GetNetlinkSocketAt opens a netlink socket in the network namespace newNs
+// and positions the thread back into the network namespace specified by curNs,
+// when done. If curNs is close, the function derives the current namespace and
+// moves back into it when done. If newNs is close, the socket will be opened
+// in the current network namespace.
+func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSocket, error) {
+ c, err := executeInNetns(newNs, curNs)
+ if err != nil {
+ return nil, err
+ }
+ defer c()
+ return getNetlinkSocket(protocol)
+}
+
+// executeInNetns sets execution of the code following this call to the
+// network namespace newNs, then moves the thread back to curNs if open,
+// otherwise to the current netns at the time the function was invoked
+// In case of success, the caller is expected to execute the returned function
+// at the end of the code that needs to be executed in the network namespace.
+// Example:
+// func jobAt(...) error {
+// d, err := executeInNetns(...)
+// if err != nil { return err}
+// defer d()
+// < code which needs to be executed in specific netns>
+// }
+// TODO: his function probably belongs to netns pkg.
+func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
+ var (
+ err error
+ moveBack func(netns.NsHandle) error
+ closeNs func() error
+ unlockThd func()
+ )
+ restore := func() {
+ // order matters
+ if moveBack != nil {
+ moveBack(curNs)
+ }
+ if closeNs != nil {
+ closeNs()
+ }
+ if unlockThd != nil {
+ unlockThd()
+ }
+ }
+ if newNs.IsOpen() {
+ runtime.LockOSThread()
+ unlockThd = runtime.UnlockOSThread
+ if !curNs.IsOpen() {
+ if curNs, err = netns.Get(); err != nil {
+ restore()
+ return nil, fmt.Errorf("could not get current namespace while creating netlink socket: %v", err)
+ }
+ closeNs = curNs.Close
+ }
+ if err := netns.Set(newNs); err != nil {
+ restore()
+ return nil, fmt.Errorf("failed to set into network namespace %d while creating netlink socket: %v", newNs, err)
+ }
+ moveBack = netns.Set
+ }
+ return restore, nil
+}
+
// Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE)
// and subscribe it to multicast groups passed in variable argument list.
// Returns the netlink socket on which Receive() method can be called
return s, nil
}
+// SubscribeAt works like Subscribe plus let's the caller choose the network
+// namespace in which the socket would be opened (newNs). Then control goes back
+// to curNs if open, otherwise to the netns at the time this function was called.
+func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*NetlinkSocket, error) {
+ c, err := executeInNetns(newNs, curNs)
+ if err != nil {
+ return nil, err
+ }
+ defer c()
+ return Subscribe(protocol, groups...)
+}
+
func (s *NetlinkSocket) Close() {
syscall.Close(s.fd)
+ s.fd = -1
+}
+
+func (s *NetlinkSocket) GetFd() int {
+ return s.fd
}
func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
+ if s.fd < 0 {
+ return fmt.Errorf("Send called on a closed socket")
+ }
if err := syscall.Sendto(s.fd, request.Serialize(), 0, &s.lsa); err != nil {
return err
}
}
func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) {
+ if s.fd < 0 {
+ return nil, fmt.Errorf("Receive called on a closed socket")
+ }
rb := make([]byte, syscall.Getpagesize())
nr, _, err := syscall.Recvfrom(s.fd, rb, 0)
if err != nil {
}
return a, b[syscall.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
}
+
+// SocketHandle contains the netlink socket and the associated
+// sequence counter for a specific netlink family
+type SocketHandle struct {
+ Seq uint32
+ Socket *NetlinkSocket
+}
+
+// Close closes the netlink socket
+func (sh *SocketHandle) Close() {
+ if sh.Socket != nil {
+ sh.Socket.Close()
+ }
+}
func (msg *RtMsg) Serialize() []byte {
return (*(*[syscall.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:]
}
+
+type RtNexthop struct {
+ syscall.RtNexthop
+}
+
+func DeserializeRtNexthop(b []byte) *RtNexthop {
+ return (*RtNexthop)(unsafe.Pointer(&b[0:syscall.SizeofRtNexthop][0]))
+}
+
+func (msg *RtNexthop) Serialize() []byte {
+ return (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:]
+}
--- /dev/null
+package nl
+
+// syscall package lack of rule atributes type.
+// Thus there are defined below
+const (
+ FRA_UNSPEC = iota
+ FRA_DST /* destination address */
+ FRA_SRC /* source address */
+ FRA_IIFNAME /* interface name */
+ FRA_GOTO /* target to jump to (FR_ACT_GOTO) */
+ FRA_UNUSED2
+ FRA_PRIORITY /* priority/preference */
+ FRA_UNUSED3
+ FRA_UNUSED4
+ FRA_UNUSED5
+ FRA_FWMARK /* mark */
+ FRA_FLOW /* flow/class id */
+ FRA_TUN_ID
+ FRA_SUPPRESS_IFGROUP
+ FRA_SUPPRESS_PREFIXLEN
+ FRA_TABLE /* Extended table id */
+ FRA_FWMASK /* mask for netfilter mark */
+ FRA_OIFNAME
+)
+
+// ip rule netlink request types
+const (
+ FR_ACT_UNSPEC = iota
+ FR_ACT_TO_TBL /* Pass to fixed table */
+ FR_ACT_GOTO /* Jump to another rule */
+ FR_ACT_NOP /* No operation */
+ FR_ACT_RES3
+ FR_ACT_RES4
+ FR_ACT_BLACKHOLE /* Drop without notification */
+ FR_ACT_UNREACHABLE /* Drop with ENETUNREACH */
+ FR_ACT_PROHIBIT /* Drop with EACCES */
+)
"unsafe"
)
+// LinkLayer
+const (
+ LINKLAYER_UNSPEC = iota
+ LINKLAYER_ETHERNET
+ LINKLAYER_ATM
+)
+
+// ATM
+const (
+ ATM_CELL_PAYLOAD = 48
+ ATM_CELL_SIZE = 53
+)
+
+const TC_LINKLAYER_MASK = 0x0F
+
+// Police
+const (
+ TCA_POLICE_UNSPEC = iota
+ TCA_POLICE_TBF
+ TCA_POLICE_RATE
+ TCA_POLICE_PEAKRATE
+ TCA_POLICE_AVRATE
+ TCA_POLICE_RESULT
+ TCA_POLICE_MAX = TCA_POLICE_RESULT
+)
+
// Message types
const (
TCA_UNSPEC = iota
TCAA_MAX = 1
)
+const (
+ TCA_ACT_UNSPEC = iota
+ TCA_ACT_KIND
+ TCA_ACT_OPTIONS
+ TCA_ACT_INDEX
+ TCA_ACT_STATS
+ TCA_ACT_MAX
+)
+
const (
TCA_PRIO_UNSPEC = iota
TCA_PRIO_MQ
)
const (
- SizeofTcMsg = 0x14
- SizeofTcActionMsg = 0x04
- SizeofTcPrioMap = 0x14
- SizeofTcRateSpec = 0x0c
- SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c
- SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14
- SizeofTcHtbGlob = 0x14
- SizeofTcU32Key = 0x10
- SizeofTcU32Sel = 0x10 // without keys
- SizeofTcMirred = 0x1c
+ SizeofTcMsg = 0x14
+ SizeofTcActionMsg = 0x04
+ SizeofTcPrioMap = 0x14
+ SizeofTcRateSpec = 0x0c
+ SizeofTcNetemQopt = 0x18
+ SizeofTcNetemCorr = 0x0c
+ SizeofTcNetemReorder = 0x08
+ SizeofTcNetemCorrupt = 0x08
+ SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c
+ SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14
+ SizeofTcHtbGlob = 0x14
+ SizeofTcU32Key = 0x10
+ SizeofTcU32Sel = 0x10 // without keys
+ SizeofTcGen = 0x14
+ SizeofTcMirred = SizeofTcGen + 0x08
+ SizeofTcPolice = 2*SizeofTcRateSpec + 0x20
)
// struct tcmsg {
return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:]
}
+/**
+* NETEM
+ */
+
+const (
+ TCA_NETEM_UNSPEC = iota
+ TCA_NETEM_CORR
+ TCA_NETEM_DELAY_DIST
+ TCA_NETEM_REORDER
+ TCA_NETEM_CORRUPT
+ TCA_NETEM_LOSS
+ TCA_NETEM_RATE
+ TCA_NETEM_ECN
+ TCA_NETEM_RATE64
+ TCA_NETEM_MAX = TCA_NETEM_RATE64
+)
+
+// struct tc_netem_qopt {
+// __u32 latency; /* added delay (us) */
+// __u32 limit; /* fifo limit (packets) */
+// __u32 loss; /* random packet loss (0=none ~0=100%) */
+// __u32 gap; /* re-ordering gap (0 for none) */
+// __u32 duplicate; /* random packet dup (0=none ~0=100%) */
+// __u32 jitter; /* random jitter in latency (us) */
+// };
+
+type TcNetemQopt struct {
+ Latency uint32
+ Limit uint32
+ Loss uint32
+ Gap uint32
+ Duplicate uint32
+ Jitter uint32
+}
+
+func (msg *TcNetemQopt) Len() int {
+ return SizeofTcNetemQopt
+}
+
+func DeserializeTcNetemQopt(b []byte) *TcNetemQopt {
+ return (*TcNetemQopt)(unsafe.Pointer(&b[0:SizeofTcNetemQopt][0]))
+}
+
+func (x *TcNetemQopt) Serialize() []byte {
+ return (*(*[SizeofTcNetemQopt]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_netem_corr {
+// __u32 delay_corr; /* delay correlation */
+// __u32 loss_corr; /* packet loss correlation */
+// __u32 dup_corr; /* duplicate correlation */
+// };
+
+type TcNetemCorr struct {
+ DelayCorr uint32
+ LossCorr uint32
+ DupCorr uint32
+}
+
+func (msg *TcNetemCorr) Len() int {
+ return SizeofTcNetemCorr
+}
+
+func DeserializeTcNetemCorr(b []byte) *TcNetemCorr {
+ return (*TcNetemCorr)(unsafe.Pointer(&b[0:SizeofTcNetemCorr][0]))
+}
+
+func (x *TcNetemCorr) Serialize() []byte {
+ return (*(*[SizeofTcNetemCorr]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_netem_reorder {
+// __u32 probability;
+// __u32 correlation;
+// };
+
+type TcNetemReorder struct {
+ Probability uint32
+ Correlation uint32
+}
+
+func (msg *TcNetemReorder) Len() int {
+ return SizeofTcNetemReorder
+}
+
+func DeserializeTcNetemReorder(b []byte) *TcNetemReorder {
+ return (*TcNetemReorder)(unsafe.Pointer(&b[0:SizeofTcNetemReorder][0]))
+}
+
+func (x *TcNetemReorder) Serialize() []byte {
+ return (*(*[SizeofTcNetemReorder]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_netem_corrupt {
+// __u32 probability;
+// __u32 correlation;
+// };
+
+type TcNetemCorrupt struct {
+ Probability uint32
+ Correlation uint32
+}
+
+func (msg *TcNetemCorrupt) Len() int {
+ return SizeofTcNetemCorrupt
+}
+
+func DeserializeTcNetemCorrupt(b []byte) *TcNetemCorrupt {
+ return (*TcNetemCorrupt)(unsafe.Pointer(&b[0:SizeofTcNetemCorrupt][0]))
+}
+
+func (x *TcNetemCorrupt) Serialize() []byte {
+ return (*(*[SizeofTcNetemCorrupt]byte)(unsafe.Pointer(x)))[:]
+}
+
// struct tc_tbf_qopt {
// struct tc_ratespec rate;
// struct tc_ratespec peakrate;
return buf
}
+type TcGen struct {
+ Index uint32
+ Capab uint32
+ Action int32
+ Refcnt int32
+ Bindcnt int32
+}
+
+func (msg *TcGen) Len() int {
+ return SizeofTcGen
+}
+
+func DeserializeTcGen(b []byte) *TcGen {
+ return (*TcGen)(unsafe.Pointer(&b[0:SizeofTcGen][0]))
+}
+
+func (x *TcGen) Serialize() []byte {
+ return (*(*[SizeofTcGen]byte)(unsafe.Pointer(x)))[:]
+}
+
+// #define tc_gen \
+// __u32 index; \
+// __u32 capab; \
+// int action; \
+// int refcnt; \
+// int bindcnt
+
const (
- TCA_ACT_MIRRED = 8
+ TCA_ACT_GACT = 5
)
const (
- TCA_MIRRED_UNSPEC = iota
- TCA_MIRRED_TM
- TCA_MIRRED_PARMS
- TCA_MIRRED_MAX = TCA_MIRRED_PARMS
+ TCA_GACT_UNSPEC = iota
+ TCA_GACT_TM
+ TCA_GACT_PARMS
+ TCA_GACT_PROB
+ TCA_GACT_MAX = TCA_GACT_PROB
)
+type TcGact TcGen
+
const (
- TCA_EGRESS_REDIR = 1 /* packet redirect to EGRESS*/
- TCA_EGRESS_MIRROR = 2 /* mirror packet to EGRESS */
- TCA_INGRESS_REDIR = 3 /* packet redirect to INGRESS*/
- TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */
+ TCA_ACT_BPF = 13
)
const (
- TC_ACT_UNSPEC = int32(-1)
- TC_ACT_OK = 0
- TC_ACT_RECLASSIFY = 1
- TC_ACT_SHOT = 2
- TC_ACT_PIPE = 3
- TC_ACT_STOLEN = 4
- TC_ACT_QUEUED = 5
- TC_ACT_REPEAT = 6
- TC_ACT_JUMP = 0x10000000
+ TCA_ACT_BPF_UNSPEC = iota
+ TCA_ACT_BPF_TM
+ TCA_ACT_BPF_PARMS
+ TCA_ACT_BPF_OPS_LEN
+ TCA_ACT_BPF_OPS
+ TCA_ACT_BPF_FD
+ TCA_ACT_BPF_NAME
+ TCA_ACT_BPF_MAX = TCA_ACT_BPF_NAME
+)
+
+const (
+ TCA_BPF_FLAG_ACT_DIRECT uint32 = 1 << iota
+)
+
+const (
+ TCA_BPF_UNSPEC = iota
+ TCA_BPF_ACT
+ TCA_BPF_POLICE
+ TCA_BPF_CLASSID
+ TCA_BPF_OPS_LEN
+ TCA_BPF_OPS
+ TCA_BPF_FD
+ TCA_BPF_NAME
+ TCA_BPF_FLAGS
+ TCA_BPF_MAX = TCA_BPF_FLAGS
+)
+
+type TcBpf TcGen
+
+const (
+ TCA_ACT_MIRRED = 8
+)
+
+const (
+ TCA_MIRRED_UNSPEC = iota
+ TCA_MIRRED_TM
+ TCA_MIRRED_PARMS
+ TCA_MIRRED_MAX = TCA_MIRRED_PARMS
)
-// #define tc_gen \
-// __u32 index; \
-// __u32 capab; \
-// int action; \
-// int refcnt; \
-// int bindcnt
// struct tc_mirred {
// tc_gen;
// int eaction; /* one of IN/EGRESS_MIRROR/REDIR */
// };
type TcMirred struct {
- Index uint32
- Capab uint32
- Action int32
- Refcnt int32
- Bindcnt int32
+ TcGen
Eaction int32
Ifindex uint32
}
func (x *TcMirred) Serialize() []byte {
return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:]
}
+
+// struct tc_police {
+// __u32 index;
+// int action;
+// __u32 limit;
+// __u32 burst;
+// __u32 mtu;
+// struct tc_ratespec rate;
+// struct tc_ratespec peakrate;
+// int refcnt;
+// int bindcnt;
+// __u32 capab;
+// };
+
+type TcPolice struct {
+ Index uint32
+ Action int32
+ Limit uint32
+ Burst uint32
+ Mtu uint32
+ Rate TcRateSpec
+ PeakRate TcRateSpec
+ Refcnt int32
+ Bindcnt int32
+ Capab uint32
+}
+
+func (msg *TcPolice) Len() int {
+ return SizeofTcPolice
+}
+
+func DeserializeTcPolice(b []byte) *TcPolice {
+ return (*TcPolice)(unsafe.Pointer(&b[0:SizeofTcPolice][0]))
+}
+
+func (x *TcPolice) Serialize() []byte {
+ return (*(*[SizeofTcPolice]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+ TCA_FW_UNSPEC = iota
+ TCA_FW_CLASSID
+ TCA_FW_POLICE
+ TCA_FW_INDEV
+ TCA_FW_ACT
+ TCA_FW_MASK
+ TCA_FW_MAX = TCA_FW_MASK
+)
SizeofXfrmLifetimeCfg = 0x40
SizeofXfrmLifetimeCur = 0x20
SizeofXfrmId = 0x18
+ SizeofXfrmMark = 0x08
)
// typedef union {
func (msg *XfrmId) Serialize() []byte {
return (*(*[SizeofXfrmId]byte)(unsafe.Pointer(msg)))[:]
}
+
+type XfrmMark struct {
+ Value uint32
+ Mask uint32
+}
+
+func (msg *XfrmMark) Len() int {
+ return SizeofXfrmMark
+}
+
+func DeserializeXfrmMark(b []byte) *XfrmMark {
+ return (*XfrmMark)(unsafe.Pointer(&b[0:SizeofXfrmMark][0]))
+}
+
+func (msg *XfrmMark) Serialize() []byte {
+ return (*(*[SizeofXfrmMark]byte)(unsafe.Pointer(msg)))[:]
+}
)
const (
- SizeofXfrmUsersaId = 0x18
- SizeofXfrmStats = 0x0c
- SizeofXfrmUsersaInfo = 0xe0
- SizeofXfrmAlgo = 0x44
- SizeofXfrmAlgoAuth = 0x48
- SizeofXfrmEncapTmpl = 0x18
+ SizeofXfrmUsersaId = 0x18
+ SizeofXfrmStats = 0x0c
+ SizeofXfrmUsersaInfo = 0xe0
+ SizeofXfrmAlgo = 0x44
+ SizeofXfrmAlgoAuth = 0x48
+ SizeofXfrmAlgoAEAD = 0x48
+ SizeofXfrmEncapTmpl = 0x18
+ SizeofXfrmUsersaFlush = 0x8
)
// struct xfrm_usersa_id {
// char alg_key[0];
// }
+type XfrmAlgoAEAD struct {
+ AlgName [64]byte
+ AlgKeyLen uint32
+ AlgICVLen uint32
+ AlgKey []byte
+}
+
+func (msg *XfrmAlgoAEAD) Len() int {
+ return SizeofXfrmAlgoAEAD + int(msg.AlgKeyLen/8)
+}
+
+func DeserializeXfrmAlgoAEAD(b []byte) *XfrmAlgoAEAD {
+ ret := XfrmAlgoAEAD{}
+ copy(ret.AlgName[:], b[0:64])
+ ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64]))
+ ret.AlgICVLen = *(*uint32)(unsafe.Pointer(&b[68]))
+ ret.AlgKey = b[72:ret.Len()]
+ return &ret
+}
+
+func (msg *XfrmAlgoAEAD) Serialize() []byte {
+ b := make([]byte, msg.Len())
+ copy(b[0:64], msg.AlgName[:])
+ copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:])
+ copy(b[68:72], (*(*[4]byte)(unsafe.Pointer(&msg.AlgICVLen)))[:])
+ copy(b[72:msg.Len()], msg.AlgKey[:])
+ return b
+}
+
// struct xfrm_encap_tmpl {
// __u16 encap_type;
// __be16 encap_sport;
func (msg *XfrmEncapTmpl) Serialize() []byte {
return (*(*[SizeofXfrmEncapTmpl]byte)(unsafe.Pointer(msg)))[:]
}
+
+// struct xfrm_usersa_flush {
+// __u8 proto;
+// };
+
+type XfrmUsersaFlush struct {
+ Proto uint8
+}
+
+func (msg *XfrmUsersaFlush) Len() int {
+ return SizeofXfrmUsersaFlush
+}
+
+func DeserializeXfrmUsersaFlush(b []byte) *XfrmUsersaFlush {
+ return (*XfrmUsersaFlush)(unsafe.Pointer(&b[0:SizeofXfrmUsersaFlush][0]))
+}
+
+func (msg *XfrmUsersaFlush) Serialize() []byte {
+ return (*(*[SizeofXfrmUsersaFlush]byte)(unsafe.Pointer(msg)))[:]
+}
)
func LinkGetProtinfo(link Link) (Protinfo, error) {
+ return pkgHandle.LinkGetProtinfo(link)
+}
+
+func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
base := link.Attrs()
- ensureIndex(base)
+ h.ensureIndex(base)
var pi Protinfo
- req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
+ req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, 0)
import (
"fmt"
+ "math"
)
const (
HANDLE_NONE = 0
HANDLE_INGRESS = 0xFFFFFFF1
+ HANDLE_CLSACT = HANDLE_INGRESS
HANDLE_ROOT = 0xFFFFFFFF
PRIORITY_MAP_LEN = 16
)
+const (
+ HANDLE_MIN_INGRESS = 0xFFFFFFF2
+ HANDLE_MIN_EGRESS = 0xFFFFFFF3
+)
type Qdisc interface {
Attrs() *QdiscAttrs
Type() string
}
-// Qdisc represents a netlink qdisc. A qdisc is associated with a link,
+// QdiscAttrs represents a netlink qdisc. A qdisc is associated with a link,
// has a handle, a parent and a refcnt. The root qdisc of a device should
// have parent == HANDLE_ROOT.
type QdiscAttrs struct {
}
func (q QdiscAttrs) String() string {
- return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt)
+ return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt)
}
func MakeHandle(major, minor uint16) uint32 {
}
}
+func Percentage2u32(percentage float32) uint32 {
+ // FIXME this is most likely not the best way to convert from % to uint32
+ if percentage == 100 {
+ return math.MaxUint32
+ }
+ return uint32(math.MaxUint32 * (percentage / 100))
+}
+
// PfifoFast is the default qdisc created by the kernel if one has not
// been defined for the interface
type PfifoFast struct {
return "htb"
}
+// Netem is a classless qdisc that rate limits based on tokens
+
+type NetemQdiscAttrs struct {
+ Latency uint32 // in us
+ DelayCorr float32 // in %
+ Limit uint32
+ Loss float32 // in %
+ LossCorr float32 // in %
+ Gap uint32
+ Duplicate float32 // in %
+ DuplicateCorr float32 // in %
+ Jitter uint32 // in us
+ ReorderProb float32 // in %
+ ReorderCorr float32 // in %
+ CorruptProb float32 // in %
+ CorruptCorr float32 // in %
+}
+
+func (q NetemQdiscAttrs) String() string {
+ return fmt.Sprintf(
+ "{Latency: %d, Limit: %d, Loss: %f, Gap: %d, Duplicate: %f, Jitter: %d}",
+ q.Latency, q.Limit, q.Loss, q.Gap, q.Duplicate, q.Jitter,
+ )
+}
+
+type Netem struct {
+ QdiscAttrs
+ Latency uint32
+ DelayCorr uint32
+ Limit uint32
+ Loss uint32
+ LossCorr uint32
+ Gap uint32
+ Duplicate uint32
+ DuplicateCorr uint32
+ Jitter uint32
+ ReorderProb uint32
+ ReorderCorr uint32
+ CorruptProb uint32
+ CorruptCorr uint32
+}
+
+func (qdisc *Netem) Attrs() *QdiscAttrs {
+ return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Netem) Type() string {
+ return "netem"
+}
+
// Tbf is a classless qdisc that rate limits based on tokens
type Tbf struct {
QdiscAttrs
"github.com/vishvananda/netlink/nl"
)
+// NOTE function is here because it uses other linux functions
+func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
+ var limit uint32 = 1000
+ var lossCorr, delayCorr, duplicateCorr uint32
+ var reorderProb, reorderCorr uint32
+ var corruptProb, corruptCorr uint32
+
+ latency := nattrs.Latency
+ loss := Percentage2u32(nattrs.Loss)
+ gap := nattrs.Gap
+ duplicate := Percentage2u32(nattrs.Duplicate)
+ jitter := nattrs.Jitter
+
+ // Correlation
+ if latency > 0 && jitter > 0 {
+ delayCorr = Percentage2u32(nattrs.DelayCorr)
+ }
+ if loss > 0 {
+ lossCorr = Percentage2u32(nattrs.LossCorr)
+ }
+ if duplicate > 0 {
+ duplicateCorr = Percentage2u32(nattrs.DuplicateCorr)
+ }
+ // FIXME should validate values(like loss/duplicate are percentages...)
+ latency = time2Tick(latency)
+
+ if nattrs.Limit != 0 {
+ limit = nattrs.Limit
+ }
+ // Jitter is only value if latency is > 0
+ if latency > 0 {
+ jitter = time2Tick(jitter)
+ }
+
+ reorderProb = Percentage2u32(nattrs.ReorderProb)
+ reorderCorr = Percentage2u32(nattrs.ReorderCorr)
+
+ if reorderProb > 0 {
+ // ERROR if lantency == 0
+ if gap == 0 {
+ gap = 1
+ }
+ }
+
+ corruptProb = Percentage2u32(nattrs.CorruptProb)
+ corruptCorr = Percentage2u32(nattrs.CorruptCorr)
+
+ return &Netem{
+ QdiscAttrs: attrs,
+ Latency: latency,
+ DelayCorr: delayCorr,
+ Limit: limit,
+ Loss: loss,
+ LossCorr: lossCorr,
+ Gap: gap,
+ Duplicate: duplicate,
+ DuplicateCorr: duplicateCorr,
+ Jitter: jitter,
+ ReorderProb: reorderProb,
+ ReorderCorr: reorderCorr,
+ CorruptProb: corruptProb,
+ CorruptCorr: corruptCorr,
+ }
+}
+
// QdiscDel will delete a qdisc from the system.
// Equivalent to: `tc qdisc del $qdisc`
func QdiscDel(qdisc Qdisc) error {
- req := nl.NewNetlinkRequest(syscall.RTM_DELQDISC, syscall.NLM_F_ACK)
- base := qdisc.Attrs()
- msg := &nl.TcMsg{
- Family: nl.FAMILY_ALL,
- Ifindex: int32(base.LinkIndex),
- Handle: base.Handle,
- Parent: base.Parent,
- }
- req.AddData(msg)
+ return pkgHandle.QdiscDel(qdisc)
+}
- _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
- return err
+// QdiscDel will delete a qdisc from the system.
+// Equivalent to: `tc qdisc del $qdisc`
+func (h *Handle) QdiscDel(qdisc Qdisc) error {
+ return h.qdiscModify(syscall.RTM_DELQDISC, 0, qdisc)
+}
+
+// QdiscChange will change a qdisc in place
+// Equivalent to: `tc qdisc change $qdisc`
+// The parent and handle MUST NOT be changed.
+func QdiscChange(qdisc Qdisc) error {
+ return pkgHandle.QdiscChange(qdisc)
+}
+
+// QdiscChange will change a qdisc in place
+// Equivalent to: `tc qdisc change $qdisc`
+// The parent and handle MUST NOT be changed.
+func (h *Handle) QdiscChange(qdisc Qdisc) error {
+ return h.qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc)
+}
+
+// QdiscReplace will replace a qdisc to the system.
+// Equivalent to: `tc qdisc replace $qdisc`
+// The handle MUST change.
+func QdiscReplace(qdisc Qdisc) error {
+ return pkgHandle.QdiscReplace(qdisc)
+}
+
+// QdiscReplace will replace a qdisc to the system.
+// Equivalent to: `tc qdisc replace $qdisc`
+// The handle MUST change.
+func (h *Handle) QdiscReplace(qdisc Qdisc) error {
+ return h.qdiscModify(
+ syscall.RTM_NEWQDISC,
+ syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE,
+ qdisc)
}
// QdiscAdd will add a qdisc to the system.
// Equivalent to: `tc qdisc add $qdisc`
func QdiscAdd(qdisc Qdisc) error {
- req := nl.NewNetlinkRequest(syscall.RTM_NEWQDISC, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ return pkgHandle.QdiscAdd(qdisc)
+}
+
+// QdiscAdd will add a qdisc to the system.
+// Equivalent to: `tc qdisc add $qdisc`
+func (h *Handle) QdiscAdd(qdisc Qdisc) error {
+ return h.qdiscModify(
+ syscall.RTM_NEWQDISC,
+ syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
+ qdisc)
+}
+
+func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error {
+ req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
base := qdisc.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: base.Parent,
}
req.AddData(msg)
+
+ // When deleting don't bother building the rest of the netlink payload
+ if cmd != syscall.RTM_DELQDISC {
+ if err := qdiscPayload(req, qdisc); err != nil {
+ return err
+ }
+ }
+
+ _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+ return err
+}
+
+func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
+
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
opt.DirectPkts = htb.DirectPkts
nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
// nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
+ } else if netem, ok := qdisc.(*Netem); ok {
+ opt := nl.TcNetemQopt{}
+ opt.Latency = netem.Latency
+ opt.Limit = netem.Limit
+ opt.Loss = netem.Loss
+ opt.Gap = netem.Gap
+ opt.Duplicate = netem.Duplicate
+ opt.Jitter = netem.Jitter
+ options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
+ // Correlation
+ corr := nl.TcNetemCorr{}
+ corr.DelayCorr = netem.DelayCorr
+ corr.LossCorr = netem.LossCorr
+ corr.DupCorr = netem.DuplicateCorr
+
+ if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 {
+ nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize())
+ }
+ // Corruption
+ corruption := nl.TcNetemCorrupt{}
+ corruption.Probability = netem.CorruptProb
+ corruption.Correlation = netem.CorruptCorr
+ if corruption.Probability > 0 {
+ nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize())
+ }
+ // Reorder
+ reorder := nl.TcNetemReorder{}
+ reorder.Probability = netem.ReorderProb
+ reorder.Correlation = netem.ReorderCorr
+ if reorder.Probability > 0 {
+ nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize())
+ }
} else if _, ok := qdisc.(*Ingress); ok {
// ingress filters must use the proper handle
- if msg.Parent != HANDLE_INGRESS {
+ if qdisc.Attrs().Parent != HANDLE_INGRESS {
return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
}
}
+
req.AddData(options)
- _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
- return err
+ return nil
}
// QdiscList gets a list of qdiscs in the system.
// Equivalent to: `tc qdisc show`.
// The list can be filtered by link.
func QdiscList(link Link) ([]Qdisc, error) {
- req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
+ return pkgHandle.QdiscList(link)
+}
+
+// QdiscList gets a list of qdiscs in the system.
+// Equivalent to: `tc qdisc show`.
+// The list can be filtered by link.
+func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
index := int32(0)
if link != nil {
base := link.Attrs()
- ensureIndex(base)
+ h.ensureIndex(base)
index = int32(base.Index)
}
msg := &nl.TcMsg{
qdisc = &Ingress{}
case "htb":
qdisc = &Htb{}
+ case "netem":
+ qdisc = &Netem{}
default:
qdisc = &GenericQdisc{QdiscType: qdiscType}
}
if err := parseHtbData(qdisc, data); err != nil {
return nil, err
}
+ case "netem":
+ if err := parseNetemData(qdisc, attr.Value); err != nil {
+ return nil, err
+ }
// no options for ingress
}
}
return nil
}
+
+func parseNetemData(qdisc Qdisc, value []byte) error {
+ netem := qdisc.(*Netem)
+ opt := nl.DeserializeTcNetemQopt(value)
+ netem.Latency = opt.Latency
+ netem.Limit = opt.Limit
+ netem.Loss = opt.Loss
+ netem.Gap = opt.Gap
+ netem.Duplicate = opt.Duplicate
+ netem.Jitter = opt.Jitter
+ data, err := nl.ParseRouteAttr(value[nl.SizeofTcNetemQopt:])
+ if err != nil {
+ return err
+ }
+ for _, datum := range data {
+ switch datum.Attr.Type {
+ case nl.TCA_NETEM_CORR:
+ opt := nl.DeserializeTcNetemCorr(datum.Value)
+ netem.DelayCorr = opt.DelayCorr
+ netem.LossCorr = opt.LossCorr
+ netem.DuplicateCorr = opt.DupCorr
+ case nl.TCA_NETEM_CORRUPT:
+ opt := nl.DeserializeTcNetemCorrupt(datum.Value)
+ netem.CorruptProb = opt.Probability
+ netem.CorruptCorr = opt.Correlation
+ case nl.TCA_NETEM_REORDER:
+ opt := nl.DeserializeTcNetemReorder(datum.Value)
+ netem.ReorderProb = opt.Probability
+ netem.ReorderCorr = opt.Correlation
+ }
+ }
+ return nil
+}
+
func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
tbf := qdisc.(*Tbf)
)
var (
- tickInUsec float64 = 0.0
- clockFactor float64 = 0.0
- hz float64 = 0.0
+ tickInUsec float64
+ clockFactor float64
+ hz float64
)
func initClock() {
import (
"fmt"
"net"
- "syscall"
)
// Scope is an enum representing a route scope.
type Scope uint8
-const (
- SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE
- SCOPE_SITE Scope = syscall.RT_SCOPE_SITE
- SCOPE_LINK Scope = syscall.RT_SCOPE_LINK
- SCOPE_HOST Scope = syscall.RT_SCOPE_HOST
- SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE
-)
+type NextHopFlag int
-// Route represents a netlink route. A route is associated with a link,
-// has a destination network, an optional source ip, and optional
-// gateway. Advanced route parameters and non-main routing tables are
-// currently not supported.
+// Route represents a netlink route.
type Route struct {
- LinkIndex int
- Scope Scope
- Dst *net.IPNet
- Src net.IP
- Gw net.IP
+ LinkIndex int
+ ILinkIndex int
+ Scope Scope
+ Dst *net.IPNet
+ Src net.IP
+ Gw net.IP
+ MultiPath []*NexthopInfo
+ Protocol int
+ Priority int
+ Table int
+ Type int
+ Tos int
+ Flags int
}
func (r Route) String() string {
- return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst,
- r.Src, r.Gw)
+ if len(r.MultiPath) > 0 {
+ return fmt.Sprintf("{Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.Dst,
+ r.Src, r.MultiPath, r.ListFlags(), r.Table)
+ }
+ return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.LinkIndex, r.Dst,
+ r.Src, r.Gw, r.ListFlags(), r.Table)
+}
+
+func (r *Route) SetFlag(flag NextHopFlag) {
+ r.Flags |= int(flag)
+}
+
+func (r *Route) ClearFlag(flag NextHopFlag) {
+ r.Flags &^= int(flag)
+}
+
+type flagString struct {
+ f NextHopFlag
+ s string
}
// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
Type uint16
Route
}
+
+type NexthopInfo struct {
+ LinkIndex int
+ Hops int
+ Gw net.IP
+}
+
+func (n *NexthopInfo) String() string {
+ return fmt.Sprintf("{Ifindex: %d Weight: %d, Gw: %s}", n.LinkIndex, n.Hops+1, n.Gw)
+}
"syscall"
"github.com/vishvananda/netlink/nl"
+ "github.com/vishvananda/netns"
)
// RtAttr is shared so it is in netlink_linux.go
+const (
+ SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE
+ SCOPE_SITE Scope = syscall.RT_SCOPE_SITE
+ SCOPE_LINK Scope = syscall.RT_SCOPE_LINK
+ SCOPE_HOST Scope = syscall.RT_SCOPE_HOST
+ SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE
+)
+
+const (
+ RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
+ RT_FILTER_SCOPE
+ RT_FILTER_TYPE
+ RT_FILTER_TOS
+ RT_FILTER_IIF
+ RT_FILTER_OIF
+ RT_FILTER_DST
+ RT_FILTER_SRC
+ RT_FILTER_GW
+ RT_FILTER_TABLE
+)
+
+const (
+ FLAG_ONLINK NextHopFlag = syscall.RTNH_F_ONLINK
+ FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
+)
+
+var testFlags = []flagString{
+ {f: FLAG_ONLINK, s: "onlink"},
+ {f: FLAG_PERVASIVE, s: "pervasive"},
+}
+
+func (r *Route) ListFlags() []string {
+ var flags []string
+ for _, tf := range testFlags {
+ if r.Flags&int(tf.f) != 0 {
+ flags = append(flags, tf.s)
+ }
+ }
+ return flags
+}
+
// RouteAdd will add a route to the system.
// Equivalent to: `ip route add $route`
func RouteAdd(route *Route) error {
- req := nl.NewNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
- return routeHandle(route, req, nl.NewRtMsg())
+ return pkgHandle.RouteAdd(route)
+}
+
+// RouteAdd will add a route to the system.
+// Equivalent to: `ip route add $route`
+func (h *Handle) RouteAdd(route *Route) error {
+ req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ return h.routeHandle(route, req, nl.NewRtMsg())
}
// RouteDel will delete a route from the system.
// Equivalent to: `ip route del $route`
func RouteDel(route *Route) error {
- req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
- return routeHandle(route, req, nl.NewRtDelMsg())
+ return pkgHandle.RouteDel(route)
}
-func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
+// RouteDel will delete a route from the system.
+// Equivalent to: `ip route del $route`
+func (h *Handle) RouteDel(route *Route) error {
+ req := h.newNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
+ return h.routeHandle(route, req, nl.NewRtDelMsg())
+}
+
+func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil {
return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil")
}
- msg.Scope = uint8(route.Scope)
family := -1
var rtAttrs []*nl.RtAttr
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_GATEWAY, gwData))
}
- msg.Family = uint8(family)
+ if len(route.MultiPath) > 0 {
+ buf := []byte{}
+ for _, nh := range route.MultiPath {
+ rtnh := &nl.RtNexthop{
+ RtNexthop: syscall.RtNexthop{
+ Hops: uint8(nh.Hops),
+ Ifindex: int32(nh.LinkIndex),
+ Len: uint16(syscall.SizeofRtNexthop),
+ },
+ }
+ var gwData []byte
+ if nh.Gw != nil {
+ gwFamily := nl.GetIPFamily(nh.Gw)
+ if family != -1 && family != gwFamily {
+ return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
+ }
+ var gw *nl.RtAttr
+ if gwFamily == FAMILY_V4 {
+ gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4()))
+ } else {
+ gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16()))
+ }
+ gwData := gw.Serialize()
+ rtnh.Len += uint16(len(gwData))
+ }
+ buf = append(buf, rtnh.Serialize()...)
+ buf = append(buf, gwData...)
+ }
+ rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_MULTIPATH, buf))
+ }
+ if route.Table > 0 {
+ if route.Table >= 256 {
+ msg.Table = syscall.RT_TABLE_UNSPEC
+ b := make([]byte, 4)
+ native.PutUint32(b, uint32(route.Table))
+ rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_TABLE, b))
+ } else {
+ msg.Table = uint8(route.Table)
+ }
+ }
+
+ if route.Priority > 0 {
+ b := make([]byte, 4)
+ native.PutUint32(b, uint32(route.Priority))
+ rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_PRIORITY, b))
+ }
+ if route.Tos > 0 {
+ msg.Tos = uint8(route.Tos)
+ }
+ if route.Protocol > 0 {
+ msg.Protocol = uint8(route.Protocol)
+ }
+ if route.Type > 0 {
+ msg.Type = uint8(route.Type)
+ }
+
+ msg.Flags = uint32(route.Flags)
+ msg.Scope = uint8(route.Scope)
+ msg.Family = uint8(family)
req.AddData(msg)
for _, attr := range rtAttrs {
req.AddData(attr)
// Equivalent to: `ip route show`.
// The list can be filtered by link and ip family.
func RouteList(link Link, family int) ([]Route, error) {
- req := nl.NewNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP)
- msg := nl.NewIfInfomsg(family)
- req.AddData(msg)
+ return pkgHandle.RouteList(link, family)
+}
+
+// RouteList gets a list of routes in the system.
+// Equivalent to: `ip route show`.
+// The list can be filtered by link and ip family.
+func (h *Handle) RouteList(link Link, family int) ([]Route, error) {
+ var routeFilter *Route
+ if link != nil {
+ routeFilter = &Route{
+ LinkIndex: link.Attrs().Index,
+ }
+ }
+ return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF)
+}
+
+// RouteListFiltered gets a list of routes in the system filtered with specified rules.
+// All rules must be defined in RouteFilter struct
+func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
+ return pkgHandle.RouteListFiltered(family, filter, filterMask)
+}
+
+// RouteListFiltered gets a list of routes in the system filtered with specified rules.
+// All rules must be defined in RouteFilter struct
+func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP)
+ infmsg := nl.NewIfInfomsg(family)
+ req.AddData(infmsg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE)
if err != nil {
return nil, err
}
- index := 0
- if link != nil {
- base := link.Attrs()
- ensureIndex(base)
- index = base.Index
- }
-
var res []Route
for _, m := range msgs {
msg := nl.DeserializeRtMsg(m)
-
if msg.Flags&syscall.RTM_F_CLONED != 0 {
// Ignore cloned routes
continue
}
-
if msg.Table != syscall.RT_TABLE_MAIN {
- // Ignore non-main tables
- continue
+ if filter == nil || filter != nil && filterMask&RT_FILTER_TABLE == 0 {
+ // Ignore non-main tables
+ continue
+ }
}
-
route, err := deserializeRoute(m)
if err != nil {
return nil, err
}
-
- if link != nil && route.LinkIndex != index {
- // Ignore routes from other interfaces
- continue
+ if filter != nil {
+ switch {
+ case filterMask&RT_FILTER_TABLE != 0 && route.Table != filter.Table:
+ continue
+ case filterMask&RT_FILTER_PROTOCOL != 0 && route.Protocol != filter.Protocol:
+ continue
+ case filterMask&RT_FILTER_SCOPE != 0 && route.Scope != filter.Scope:
+ continue
+ case filterMask&RT_FILTER_TYPE != 0 && route.Type != filter.Type:
+ continue
+ case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos:
+ continue
+ case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex:
+ continue
+ case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex:
+ continue
+ case filterMask&RT_FILTER_GW != 0 && !route.Gw.Equal(filter.Gw):
+ continue
+ case filterMask&RT_FILTER_SRC != 0 && !route.Src.Equal(filter.Src):
+ continue
+ case filterMask&RT_FILTER_DST != 0 && filter.Dst != nil:
+ if route.Dst == nil {
+ continue
+ }
+ aMaskLen, aMaskBits := route.Dst.Mask.Size()
+ bMaskLen, bMaskBits := filter.Dst.Mask.Size()
+ if !(route.Dst.IP.Equal(filter.Dst.IP) && aMaskLen == bMaskLen && aMaskBits == bMaskBits) {
+ continue
+ }
+ }
}
res = append(res, route)
}
-
return res, nil
}
// deserializeRoute decodes a binary netlink message into a Route struct
func deserializeRoute(m []byte) (Route, error) {
- route := Route{}
msg := nl.DeserializeRtMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
- return route, err
+ return Route{}, err
+ }
+ route := Route{
+ Scope: Scope(msg.Scope),
+ Protocol: int(msg.Protocol),
+ Table: int(msg.Table),
+ Type: int(msg.Type),
+ Tos: int(msg.Tos),
+ Flags: int(msg.Flags),
}
- route.Scope = Scope(msg.Scope)
native := nl.NativeEndian()
for _, attr := range attrs {
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
}
case syscall.RTA_OIF:
- routeIndex := int(native.Uint32(attr.Value[0:4]))
- route.LinkIndex = routeIndex
+ route.LinkIndex = int(native.Uint32(attr.Value[0:4]))
+ case syscall.RTA_IIF:
+ route.ILinkIndex = int(native.Uint32(attr.Value[0:4]))
+ case syscall.RTA_PRIORITY:
+ route.Priority = int(native.Uint32(attr.Value[0:4]))
+ case syscall.RTA_TABLE:
+ route.Table = int(native.Uint32(attr.Value[0:4]))
+ case syscall.RTA_MULTIPATH:
+ parseRtNexthop := func(value []byte) (*NexthopInfo, []byte, error) {
+ if len(value) < syscall.SizeofRtNexthop {
+ return nil, nil, fmt.Errorf("Lack of bytes")
+ }
+ nh := nl.DeserializeRtNexthop(value)
+ if len(value) < int(nh.RtNexthop.Len) {
+ return nil, nil, fmt.Errorf("Lack of bytes")
+ }
+ info := &NexthopInfo{
+ LinkIndex: int(nh.RtNexthop.Ifindex),
+ Hops: int(nh.RtNexthop.Hops),
+ }
+ attrs, err := nl.ParseRouteAttr(value[syscall.SizeofRtNexthop:int(nh.RtNexthop.Len)])
+ if err != nil {
+ return nil, nil, err
+ }
+ for _, attr := range attrs {
+ switch attr.Attr.Type {
+ case syscall.RTA_GATEWAY:
+ info.Gw = net.IP(attr.Value)
+ }
+ }
+ return info, value[int(nh.RtNexthop.Len):], nil
+ }
+ rest := attr.Value
+ for len(rest) > 0 {
+ info, buf, err := parseRtNexthop(rest)
+ if err != nil {
+ return route, err
+ }
+ route.MultiPath = append(route.MultiPath, info)
+ rest = buf
+ }
}
}
return route, nil
// RouteGet gets a route to a specific destination from the host system.
// Equivalent to: 'ip route get'.
func RouteGet(destination net.IP) ([]Route, error) {
- req := nl.NewNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_REQUEST)
+ return pkgHandle.RouteGet(destination)
+}
+
+// RouteGet gets a route to a specific destination from the host system.
+// Equivalent to: 'ip route get'.
+func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_REQUEST)
family := nl.GetIPFamily(destination)
var destinationData []byte
var bitlen uint8
// RouteSubscribe takes a chan down which notifications will be sent
// when routes are added or deleted. Close the 'done' chan to stop subscription.
func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
- s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
+ return routeSubscribeAt(netns.None(), netns.None(), ch, done)
+}
+
+// RouteSubscribeAt works like RouteSubscribe plus it allows the caller
+// to choose the network namespace in which to subscribe (ns).
+func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
+ return routeSubscribeAt(ns, netns.None(), ch, done)
+}
+
+func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
+ s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
if err != nil {
return err
}
--- /dev/null
+// +build !linux
+
+package netlink
+
+func (r *Route) ListFlags() []string {
+ return []string{}
+}
--- /dev/null
+package netlink
+
+import (
+ "fmt"
+ "net"
+)
+
+// Rule represents a netlink rule.
+type Rule struct {
+ Priority int
+ Table int
+ Mark int
+ Mask int
+ TunID uint
+ Goto int
+ Src *net.IPNet
+ Dst *net.IPNet
+ Flow int
+ IifName string
+ OifName string
+ SuppressIfgroup int
+ SuppressPrefixlen int
+}
+
+func (r Rule) String() string {
+ return fmt.Sprintf("ip rule %d: from %s table %d", r.Priority, r.Src, r.Table)
+}
+
+// NewRule return empty rules.
+func NewRule() *Rule {
+ return &Rule{
+ SuppressIfgroup: -1,
+ SuppressPrefixlen: -1,
+ Priority: -1,
+ Mark: -1,
+ Mask: -1,
+ Goto: -1,
+ Flow: -1,
+ }
+}
--- /dev/null
+package netlink
+
+import (
+ "fmt"
+ "net"
+ "syscall"
+
+ "github.com/vishvananda/netlink/nl"
+)
+
+// RuleAdd adds a rule to the system.
+// Equivalent to: ip rule add
+func RuleAdd(rule *Rule) error {
+ return pkgHandle.RuleAdd(rule)
+}
+
+// RuleAdd adds a rule to the system.
+// Equivalent to: ip rule add
+func (h *Handle) RuleAdd(rule *Rule) error {
+ req := h.newNetlinkRequest(syscall.RTM_NEWRULE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ return ruleHandle(rule, req)
+}
+
+// RuleDel deletes a rule from the system.
+// Equivalent to: ip rule del
+func RuleDel(rule *Rule) error {
+ return pkgHandle.RuleDel(rule)
+}
+
+// RuleDel deletes a rule from the system.
+// Equivalent to: ip rule del
+func (h *Handle) RuleDel(rule *Rule) error {
+ req := h.newNetlinkRequest(syscall.RTM_DELRULE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ return ruleHandle(rule, req)
+}
+
+func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
+ msg := nl.NewRtMsg()
+ msg.Family = syscall.AF_INET
+ var dstFamily uint8
+
+ var rtAttrs []*nl.RtAttr
+ if rule.Dst != nil && rule.Dst.IP != nil {
+ dstLen, _ := rule.Dst.Mask.Size()
+ msg.Dst_len = uint8(dstLen)
+ msg.Family = uint8(nl.GetIPFamily(rule.Dst.IP))
+ dstFamily = msg.Family
+ var dstData []byte
+ if msg.Family == syscall.AF_INET {
+ dstData = rule.Dst.IP.To4()
+ } else {
+ dstData = rule.Dst.IP.To16()
+ }
+ rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData))
+ }
+
+ if rule.Src != nil && rule.Src.IP != nil {
+ msg.Family = uint8(nl.GetIPFamily(rule.Src.IP))
+ if dstFamily != 0 && dstFamily != msg.Family {
+ return fmt.Errorf("source and destination ip are not the same IP family")
+ }
+ srcLen, _ := rule.Src.Mask.Size()
+ msg.Src_len = uint8(srcLen)
+ var srcData []byte
+ if msg.Family == syscall.AF_INET {
+ srcData = rule.Src.IP.To4()
+ } else {
+ srcData = rule.Src.IP.To16()
+ }
+ rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_SRC, srcData))
+ }
+
+ if rule.Table >= 0 {
+ msg.Table = uint8(rule.Table)
+ if rule.Table >= 256 {
+ msg.Table = syscall.RT_TABLE_UNSPEC
+ }
+ }
+
+ req.AddData(msg)
+ for i := range rtAttrs {
+ req.AddData(rtAttrs[i])
+ }
+
+ var (
+ b = make([]byte, 4)
+ native = nl.NativeEndian()
+ )
+
+ if rule.Priority >= 0 {
+ native.PutUint32(b, uint32(rule.Priority))
+ req.AddData(nl.NewRtAttr(nl.FRA_PRIORITY, b))
+ }
+ if rule.Mark >= 0 {
+ native.PutUint32(b, uint32(rule.Mark))
+ req.AddData(nl.NewRtAttr(nl.FRA_FWMARK, b))
+ }
+ if rule.Mask >= 0 {
+ native.PutUint32(b, uint32(rule.Mask))
+ req.AddData(nl.NewRtAttr(nl.FRA_FWMASK, b))
+ }
+ if rule.Flow >= 0 {
+ native.PutUint32(b, uint32(rule.Flow))
+ req.AddData(nl.NewRtAttr(nl.FRA_FLOW, b))
+ }
+ if rule.TunID > 0 {
+ native.PutUint32(b, uint32(rule.TunID))
+ req.AddData(nl.NewRtAttr(nl.FRA_TUN_ID, b))
+ }
+ if rule.Table >= 256 {
+ native.PutUint32(b, uint32(rule.Table))
+ req.AddData(nl.NewRtAttr(nl.FRA_TABLE, b))
+ }
+ if msg.Table > 0 {
+ if rule.SuppressPrefixlen >= 0 {
+ native.PutUint32(b, uint32(rule.SuppressPrefixlen))
+ req.AddData(nl.NewRtAttr(nl.FRA_SUPPRESS_PREFIXLEN, b))
+ }
+ if rule.SuppressIfgroup >= 0 {
+ native.PutUint32(b, uint32(rule.SuppressIfgroup))
+ req.AddData(nl.NewRtAttr(nl.FRA_SUPPRESS_IFGROUP, b))
+ }
+ }
+ if rule.IifName != "" {
+ req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName)))
+ }
+ if rule.OifName != "" {
+ req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName)))
+ }
+ if rule.Goto >= 0 {
+ msg.Type = nl.FR_ACT_NOP
+ native.PutUint32(b, uint32(rule.Goto))
+ req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b))
+ }
+
+ _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+ return err
+}
+
+// RuleList lists rules in the system.
+// Equivalent to: ip rule list
+func RuleList(family int) ([]Rule, error) {
+ return pkgHandle.RuleList(family)
+}
+
+// RuleList lists rules in the system.
+// Equivalent to: ip rule list
+func (h *Handle) RuleList(family int) ([]Rule, error) {
+ req := h.newNetlinkRequest(syscall.RTM_GETRULE, syscall.NLM_F_DUMP|syscall.NLM_F_REQUEST)
+ msg := nl.NewIfInfomsg(family)
+ req.AddData(msg)
+
+ msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWRULE)
+ if err != nil {
+ return nil, err
+ }
+
+ native := nl.NativeEndian()
+ var res = make([]Rule, 0)
+ for i := range msgs {
+ msg := nl.DeserializeRtMsg(msgs[i])
+ attrs, err := nl.ParseRouteAttr(msgs[i][msg.Len():])
+ if err != nil {
+ return nil, err
+ }
+
+ rule := NewRule()
+
+ for j := range attrs {
+ switch attrs[j].Attr.Type {
+ case syscall.RTA_TABLE:
+ rule.Table = int(native.Uint32(attrs[j].Value[0:4]))
+ case nl.FRA_SRC:
+ rule.Src = &net.IPNet{
+ IP: attrs[j].Value,
+ Mask: net.CIDRMask(int(msg.Src_len), 8*len(attrs[j].Value)),
+ }
+ case nl.FRA_DST:
+ rule.Dst = &net.IPNet{
+ IP: attrs[j].Value,
+ Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attrs[j].Value)),
+ }
+ case nl.FRA_FWMARK:
+ rule.Mark = int(native.Uint32(attrs[j].Value[0:4]))
+ case nl.FRA_FWMASK:
+ rule.Mask = int(native.Uint32(attrs[j].Value[0:4]))
+ case nl.FRA_TUN_ID:
+ rule.TunID = uint(native.Uint64(attrs[j].Value[0:4]))
+ case nl.FRA_IIFNAME:
+ rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1])
+ case nl.FRA_OIFNAME:
+ rule.OifName = string(attrs[j].Value[:len(attrs[j].Value)-1])
+ case nl.FRA_SUPPRESS_PREFIXLEN:
+ i := native.Uint32(attrs[j].Value[0:4])
+ if i != 0xffffffff {
+ rule.SuppressPrefixlen = int(i)
+ }
+ case nl.FRA_SUPPRESS_IFGROUP:
+ i := native.Uint32(attrs[j].Value[0:4])
+ if i != 0xffffffff {
+ rule.SuppressIfgroup = int(i)
+ }
+ case nl.FRA_FLOW:
+ rule.Flow = int(native.Uint32(attrs[j].Value[0:4]))
+ case nl.FRA_GOTO:
+ rule.Goto = int(native.Uint32(attrs[j].Value[0:4]))
+ case nl.FRA_PRIORITY:
+ rule.Priority = int(native.Uint32(attrs[j].Value[0:4]))
+ }
+ }
+ res = append(res, *rule)
+ }
+
+ return res, nil
+}
XFRM_PROTO_ESP Proto = syscall.IPPROTO_ESP
XFRM_PROTO_AH Proto = syscall.IPPROTO_AH
XFRM_PROTO_HAO Proto = syscall.IPPROTO_DSTOPTS
- XFRM_PROTO_COMP Proto = syscall.IPPROTO_COMP
+ XFRM_PROTO_COMP Proto = 0x6c // NOTE not defined on darwin
XFRM_PROTO_IPSEC_ANY Proto = syscall.IPPROTO_RAW
)
}
return fmt.Sprintf("%d", m)
}
+
+// XfrmMark represents the mark associated to the state or policy
+type XfrmMark struct {
+ Value uint32
+ Mask uint32
+}
+
+func (m *XfrmMark) String() string {
+ return fmt.Sprintf("(0x%x,0x%x)", m.Value, m.Mask)
+}
Src net.IP
Proto Proto
Mode Mode
+ Spi int
Reqid int
}
+func (t XfrmPolicyTmpl) String() string {
+ return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}",
+ t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid)
+}
+
// XfrmPolicy represents an ipsec policy. It represents the overlay network
// and has a list of XfrmPolicyTmpls representing the base addresses of
// the policy.
type XfrmPolicy struct {
Dst *net.IPNet
Src *net.IPNet
+ Proto Proto
+ DstPort int
+ SrcPort int
Dir Dir
Priority int
Index int
+ Mark *XfrmMark
Tmpls []XfrmPolicyTmpl
}
+
+func (p XfrmPolicy) String() string {
+ return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Mark: %s, Tmpls: %s}",
+ p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Mark, p.Tmpls)
+}
)
func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) {
- sel.Family = uint16(nl.GetIPFamily(policy.Dst.IP))
- sel.Daddr.FromIP(policy.Dst.IP)
- sel.Saddr.FromIP(policy.Src.IP)
- prefixlenD, _ := policy.Dst.Mask.Size()
- sel.PrefixlenD = uint8(prefixlenD)
- prefixlenS, _ := policy.Src.Mask.Size()
- sel.PrefixlenS = uint8(prefixlenS)
+ sel.Family = uint16(nl.FAMILY_V4)
+ if policy.Dst != nil {
+ sel.Family = uint16(nl.GetIPFamily(policy.Dst.IP))
+ sel.Daddr.FromIP(policy.Dst.IP)
+ prefixlenD, _ := policy.Dst.Mask.Size()
+ sel.PrefixlenD = uint8(prefixlenD)
+ }
+ if policy.Src != nil {
+ sel.Saddr.FromIP(policy.Src.IP)
+ prefixlenS, _ := policy.Src.Mask.Size()
+ sel.PrefixlenS = uint8(prefixlenS)
+ }
+ sel.Proto = uint8(policy.Proto)
+ sel.Dport = nl.Swap16(uint16(policy.DstPort))
+ sel.Sport = nl.Swap16(uint16(policy.SrcPort))
+ if sel.Dport != 0 {
+ sel.DportMask = ^uint16(0)
+ }
+ if sel.Sport != 0 {
+ sel.SportMask = ^uint16(0)
+ }
}
// XfrmPolicyAdd will add an xfrm policy to the system.
// Equivalent to: `ip xfrm policy add $policy`
func XfrmPolicyAdd(policy *XfrmPolicy) error {
- req := nl.NewNetlinkRequest(nl.XFRM_MSG_NEWPOLICY, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ return pkgHandle.XfrmPolicyAdd(policy)
+}
+
+// XfrmPolicyAdd will add an xfrm policy to the system.
+// Equivalent to: `ip xfrm policy add $policy`
+func (h *Handle) XfrmPolicyAdd(policy *XfrmPolicy) error {
+ return h.xfrmPolicyAddOrUpdate(policy, nl.XFRM_MSG_NEWPOLICY)
+}
+
+// XfrmPolicyUpdate will update an xfrm policy to the system.
+// Equivalent to: `ip xfrm policy update $policy`
+func XfrmPolicyUpdate(policy *XfrmPolicy) error {
+ return pkgHandle.XfrmPolicyUpdate(policy)
+}
+
+// XfrmPolicyUpdate will update an xfrm policy to the system.
+// Equivalent to: `ip xfrm policy update $policy`
+func (h *Handle) XfrmPolicyUpdate(policy *XfrmPolicy) error {
+ return h.xfrmPolicyAddOrUpdate(policy, nl.XFRM_MSG_UPDPOLICY)
+}
+
+func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
+ req := h.newNetlinkRequest(nlProto, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := &nl.XfrmUserpolicyInfo{}
selFromPolicy(&msg.Sel, policy)
userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst)
userTmpl.Saddr.FromIP(tmpl.Src)
userTmpl.XfrmId.Proto = uint8(tmpl.Proto)
+ userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi))
userTmpl.Mode = uint8(tmpl.Mode)
userTmpl.Reqid = uint32(tmpl.Reqid)
userTmpl.Aalgos = ^uint32(0)
tmpls := nl.NewRtAttr(nl.XFRMA_TMPL, tmplData)
req.AddData(tmpls)
}
+ if policy.Mark != nil {
+ out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(policy.Mark))
+ req.AddData(out)
+ }
_, err := req.Execute(syscall.NETLINK_XFRM, 0)
return err
// the Tmpls are ignored when matching the policy to delete.
// Equivalent to: `ip xfrm policy del $policy`
func XfrmPolicyDel(policy *XfrmPolicy) error {
- req := nl.NewNetlinkRequest(nl.XFRM_MSG_DELPOLICY, syscall.NLM_F_ACK)
-
- msg := &nl.XfrmUserpolicyId{}
- selFromPolicy(&msg.Sel, policy)
- msg.Index = uint32(policy.Index)
- msg.Dir = uint8(policy.Dir)
- req.AddData(msg)
+ return pkgHandle.XfrmPolicyDel(policy)
+}
- _, err := req.Execute(syscall.NETLINK_XFRM, 0)
+// XfrmPolicyDel will delete an xfrm policy from the system. Note that
+// the Tmpls are ignored when matching the policy to delete.
+// Equivalent to: `ip xfrm policy del $policy`
+func (h *Handle) XfrmPolicyDel(policy *XfrmPolicy) error {
+ _, err := h.xfrmPolicyGetOrDelete(policy, nl.XFRM_MSG_DELPOLICY)
return err
}
// Equivalent to: `ip xfrm policy show`.
// The list can be filtered by ip family.
func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
- req := nl.NewNetlinkRequest(nl.XFRM_MSG_GETPOLICY, syscall.NLM_F_DUMP)
+ return pkgHandle.XfrmPolicyList(family)
+}
+
+// XfrmPolicyList gets a list of xfrm policies in the system.
+// Equivalent to: `ip xfrm policy show`.
+// The list can be filtered by ip family.
+func (h *Handle) XfrmPolicyList(family int) ([]XfrmPolicy, error) {
+ req := h.newNetlinkRequest(nl.XFRM_MSG_GETPOLICY, syscall.NLM_F_DUMP)
msg := nl.NewIfInfomsg(family)
req.AddData(msg)
var res []XfrmPolicy
for _, m := range msgs {
- msg := nl.DeserializeXfrmUserpolicyInfo(m)
-
- if family != FAMILY_ALL && family != int(msg.Sel.Family) {
+ if policy, err := parseXfrmPolicy(m, family); err == nil {
+ res = append(res, *policy)
+ } else if err == familyError {
continue
+ } else {
+ return nil, err
}
+ }
+ return res, nil
+}
- var policy XfrmPolicy
+// XfrmPolicyGet gets a the policy described by the index or selector, if found.
+// Equivalent to: `ip xfrm policy get { SELECTOR | index INDEX } dir DIR [ctx CTX ] [ mark MARK [ mask MASK ] ] [ ptype PTYPE ]`.
+func XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) {
+ return pkgHandle.XfrmPolicyGet(policy)
+}
- policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD)
- policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS)
- policy.Priority = int(msg.Priority)
- policy.Index = int(msg.Index)
- policy.Dir = Dir(msg.Dir)
+// XfrmPolicyGet gets a the policy described by the index or selector, if found.
+// Equivalent to: `ip xfrm policy get { SELECTOR | index INDEX } dir DIR [ctx CTX ] [ mark MARK [ mask MASK ] ] [ ptype PTYPE ]`.
+func (h *Handle) XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) {
+ return h.xfrmPolicyGetOrDelete(policy, nl.XFRM_MSG_GETPOLICY)
+}
- attrs, err := nl.ParseRouteAttr(m[msg.Len():])
- if err != nil {
- return nil, err
- }
+// XfrmPolicyFlush will flush the policies on the system.
+// Equivalent to: `ip xfrm policy flush`
+func XfrmPolicyFlush() error {
+ return pkgHandle.XfrmPolicyFlush()
+}
+
+// XfrmPolicyFlush will flush the policies on the system.
+// Equivalent to: `ip xfrm policy flush`
+func (h *Handle) XfrmPolicyFlush() error {
+ req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHPOLICY, syscall.NLM_F_ACK)
+ _, err := req.Execute(syscall.NETLINK_XFRM, 0)
+ return err
+}
+
+func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPolicy, error) {
+ req := h.newNetlinkRequest(nlProto, syscall.NLM_F_ACK)
+
+ msg := &nl.XfrmUserpolicyId{}
+ selFromPolicy(&msg.Sel, policy)
+ msg.Index = uint32(policy.Index)
+ msg.Dir = uint8(policy.Dir)
+ req.AddData(msg)
+
+ if policy.Mark != nil {
+ out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(policy.Mark))
+ req.AddData(out)
+ }
+
+ resType := nl.XFRM_MSG_NEWPOLICY
+ if nlProto == nl.XFRM_MSG_DELPOLICY {
+ resType = 0
+ }
+
+ msgs, err := req.Execute(syscall.NETLINK_XFRM, uint16(resType))
+ if err != nil {
+ return nil, err
+ }
- for _, attr := range attrs {
- switch attr.Attr.Type {
- case nl.XFRMA_TMPL:
- max := len(attr.Value)
- for i := 0; i < max; i += nl.SizeofXfrmUserTmpl {
- var resTmpl XfrmPolicyTmpl
- tmpl := nl.DeserializeXfrmUserTmpl(attr.Value[i : i+nl.SizeofXfrmUserTmpl])
- resTmpl.Dst = tmpl.XfrmId.Daddr.ToIP()
- resTmpl.Src = tmpl.Saddr.ToIP()
- resTmpl.Proto = Proto(tmpl.XfrmId.Proto)
- resTmpl.Mode = Mode(tmpl.Mode)
- resTmpl.Reqid = int(tmpl.Reqid)
- policy.Tmpls = append(policy.Tmpls, resTmpl)
- }
+ if nlProto == nl.XFRM_MSG_DELPOLICY {
+ return nil, err
+ }
+
+ p, err := parseXfrmPolicy(msgs[0], FAMILY_ALL)
+ if err != nil {
+ return nil, err
+ }
+
+ return p, nil
+}
+
+func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) {
+ msg := nl.DeserializeXfrmUserpolicyInfo(m)
+
+ // This is mainly for the policy dump
+ if family != FAMILY_ALL && family != int(msg.Sel.Family) {
+ return nil, familyError
+ }
+
+ var policy XfrmPolicy
+
+ policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD)
+ policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS)
+ policy.Proto = Proto(msg.Sel.Proto)
+ policy.DstPort = int(nl.Swap16(msg.Sel.Dport))
+ policy.SrcPort = int(nl.Swap16(msg.Sel.Sport))
+ policy.Priority = int(msg.Priority)
+ policy.Index = int(msg.Index)
+ policy.Dir = Dir(msg.Dir)
+
+ attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+ if err != nil {
+ return nil, err
+ }
+
+ for _, attr := range attrs {
+ switch attr.Attr.Type {
+ case nl.XFRMA_TMPL:
+ max := len(attr.Value)
+ for i := 0; i < max; i += nl.SizeofXfrmUserTmpl {
+ var resTmpl XfrmPolicyTmpl
+ tmpl := nl.DeserializeXfrmUserTmpl(attr.Value[i : i+nl.SizeofXfrmUserTmpl])
+ resTmpl.Dst = tmpl.XfrmId.Daddr.ToIP()
+ resTmpl.Src = tmpl.Saddr.ToIP()
+ resTmpl.Proto = Proto(tmpl.XfrmId.Proto)
+ resTmpl.Mode = Mode(tmpl.Mode)
+ resTmpl.Spi = int(nl.Swap32(tmpl.XfrmId.Spi))
+ resTmpl.Reqid = int(tmpl.Reqid)
+ policy.Tmpls = append(policy.Tmpls, resTmpl)
}
+ case nl.XFRMA_MARK:
+ mark := nl.DeserializeXfrmMark(attr.Value[:])
+ policy.Mark = new(XfrmMark)
+ policy.Mark.Value = mark.Value
+ policy.Mark.Mask = mark.Mask
}
- res = append(res, policy)
}
- return res, nil
+
+ return &policy, nil
}
package netlink
import (
+ "fmt"
"net"
)
Name string
Key []byte
TruncateLen int // Auth only
+ ICVLen int // AEAD only
}
-// EncapType is an enum representing an ipsec template direction.
+func (a XfrmStateAlgo) String() string {
+ base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key)
+ if a.TruncateLen != 0 {
+ base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen)
+ }
+ if a.ICVLen != 0 {
+ base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen)
+ }
+ return fmt.Sprintf("%s}", base)
+}
+
+// EncapType is an enum representing the optional packet encapsulation.
type EncapType uint8
const (
func (e EncapType) String() string {
switch e {
case XFRM_ENCAP_ESPINUDP_NONIKE:
- return "espinudp-nonike"
+ return "espinudp-non-ike"
case XFRM_ENCAP_ESPINUDP:
return "espinudp"
}
return "unknown"
}
-// XfrmEncap represents the encapsulation to use for the ipsec encryption.
+// XfrmStateEncap represents the encapsulation to use for the ipsec encryption.
type XfrmStateEncap struct {
Type EncapType
SrcPort int
OriginalAddress net.IP
}
+func (e XfrmStateEncap) String() string {
+ return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}",
+ e.Type, e.SrcPort, e.DstPort, e.OriginalAddress)
+}
+
+// XfrmStateLimits represents the configured limits for the state.
+type XfrmStateLimits struct {
+ ByteSoft uint64
+ ByteHard uint64
+ PacketSoft uint64
+ PacketHard uint64
+ TimeSoft uint64
+ TimeHard uint64
+ TimeUseSoft uint64
+ TimeUseHard uint64
+}
+
// XfrmState represents the state of an ipsec policy. It optionally
// contains an XfrmStateAlgo for encryption and one for authentication.
type XfrmState struct {
Spi int
Reqid int
ReplayWindow int
+ Limits XfrmStateLimits
+ Mark *XfrmMark
Auth *XfrmStateAlgo
Crypt *XfrmStateAlgo
+ Aead *XfrmStateAlgo
Encap *XfrmStateEncap
}
+
+func (sa XfrmState) String() string {
+ return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v,Encap: %v",
+ sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap)
+}
+func (sa XfrmState) Print(stats bool) string {
+ if !stats {
+ return sa.String()
+ }
+
+ return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d",
+ sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard),
+ sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard)
+}
+
+func printLimit(lmt uint64) string {
+ if lmt == ^uint64(0) {
+ return "(INF)"
+ }
+ return fmt.Sprintf("%d", lmt)
+}
import (
"fmt"
"syscall"
+ "unsafe"
"github.com/vishvananda/netlink/nl"
)
return algo.Serialize()
}
+func writeStateAlgoAead(a *XfrmStateAlgo) []byte {
+ algo := nl.XfrmAlgoAEAD{
+ AlgKeyLen: uint32(len(a.Key) * 8),
+ AlgICVLen: uint32(a.ICVLen),
+ AlgKey: a.Key,
+ }
+ end := len(a.Name)
+ if end > 64 {
+ end = 64
+ }
+ copy(algo.AlgName[:end], a.Name)
+ return algo.Serialize()
+}
+
+func writeMark(m *XfrmMark) []byte {
+ mark := &nl.XfrmMark{
+ Value: m.Value,
+ Mask: m.Mask,
+ }
+ if mark.Mask == 0 {
+ mark.Mask = ^uint32(0)
+ }
+ return mark.Serialize()
+}
+
// XfrmStateAdd will add an xfrm state to the system.
// Equivalent to: `ip xfrm state add $state`
func XfrmStateAdd(state *XfrmState) error {
+ return pkgHandle.XfrmStateAdd(state)
+}
+
+// XfrmStateAdd will add an xfrm state to the system.
+// Equivalent to: `ip xfrm state add $state`
+func (h *Handle) XfrmStateAdd(state *XfrmState) error {
+ return h.xfrmStateAddOrUpdate(state, nl.XFRM_MSG_NEWSA)
+}
+
+// XfrmStateUpdate will update an xfrm state to the system.
+// Equivalent to: `ip xfrm state update $state`
+func XfrmStateUpdate(state *XfrmState) error {
+ return pkgHandle.XfrmStateUpdate(state)
+}
+
+// XfrmStateUpdate will update an xfrm state to the system.
+// Equivalent to: `ip xfrm state update $state`
+func (h *Handle) XfrmStateUpdate(state *XfrmState) error {
+ return h.xfrmStateAddOrUpdate(state, nl.XFRM_MSG_UPDSA)
+}
+
+func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
// A state with spi 0 can't be deleted so don't allow it to be set
if state.Spi == 0 {
return fmt.Errorf("Spi must be set when adding xfrm state.")
}
- req := nl.NewNetlinkRequest(nl.XFRM_MSG_NEWSA, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+ req := h.newNetlinkRequest(nlProto, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := &nl.XfrmUsersaInfo{}
msg.Family = uint16(nl.GetIPFamily(state.Dst))
msg.Id.Spi = nl.Swap32(uint32(state.Spi))
msg.Reqid = uint32(state.Reqid)
msg.ReplayWindow = uint8(state.ReplayWindow)
- msg.Lft.SoftByteLimit = nl.XFRM_INF
- msg.Lft.HardByteLimit = nl.XFRM_INF
- msg.Lft.SoftPacketLimit = nl.XFRM_INF
- msg.Lft.HardPacketLimit = nl.XFRM_INF
+ limitsToLft(state.Limits, &msg.Lft)
req.AddData(msg)
if state.Auth != nil {
out := nl.NewRtAttr(nl.XFRMA_ALG_CRYPT, writeStateAlgo(state.Crypt))
req.AddData(out)
}
+ if state.Aead != nil {
+ out := nl.NewRtAttr(nl.XFRMA_ALG_AEAD, writeStateAlgoAead(state.Aead))
+ req.AddData(out)
+ }
if state.Encap != nil {
encapData := make([]byte, nl.SizeofXfrmEncapTmpl)
encap := nl.DeserializeXfrmEncapTmpl(encapData)
out := nl.NewRtAttr(nl.XFRMA_ENCAP, encapData)
req.AddData(out)
}
+ if state.Mark != nil {
+ out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark))
+ req.AddData(out)
+ }
_, err := req.Execute(syscall.NETLINK_XFRM, 0)
return err
// the Algos are ignored when matching the state to delete.
// Equivalent to: `ip xfrm state del $state`
func XfrmStateDel(state *XfrmState) error {
- req := nl.NewNetlinkRequest(nl.XFRM_MSG_DELSA, syscall.NLM_F_ACK)
-
- msg := &nl.XfrmUsersaId{}
- msg.Daddr.FromIP(state.Dst)
- msg.Family = uint16(nl.GetIPFamily(state.Dst))
- msg.Proto = uint8(state.Proto)
- msg.Spi = nl.Swap32(uint32(state.Spi))
- req.AddData(msg)
-
- saddr := nl.XfrmAddress{}
- saddr.FromIP(state.Src)
- srcdata := nl.NewRtAttr(nl.XFRMA_SRCADDR, saddr.Serialize())
-
- req.AddData(srcdata)
+ return pkgHandle.XfrmStateDel(state)
+}
- _, err := req.Execute(syscall.NETLINK_XFRM, 0)
+// XfrmStateDel will delete an xfrm state from the system. Note that
+// the Algos are ignored when matching the state to delete.
+// Equivalent to: `ip xfrm state del $state`
+func (h *Handle) XfrmStateDel(state *XfrmState) error {
+ _, err := h.xfrmStateGetOrDelete(state, nl.XFRM_MSG_DELSA)
return err
}
// XfrmStateList gets a list of xfrm states in the system.
-// Equivalent to: `ip xfrm state show`.
+// Equivalent to: `ip [-4|-6] xfrm state show`.
// The list can be filtered by ip family.
func XfrmStateList(family int) ([]XfrmState, error) {
- req := nl.NewNetlinkRequest(nl.XFRM_MSG_GETSA, syscall.NLM_F_DUMP)
+ return pkgHandle.XfrmStateList(family)
+}
- msg := nl.NewIfInfomsg(family)
- req.AddData(msg)
+// XfrmStateList gets a list of xfrm states in the system.
+// Equivalent to: `ip xfrm state show`.
+// The list can be filtered by ip family.
+func (h *Handle) XfrmStateList(family int) ([]XfrmState, error) {
+ req := h.newNetlinkRequest(nl.XFRM_MSG_GETSA, syscall.NLM_F_DUMP)
msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWSA)
if err != nil {
var res []XfrmState
for _, m := range msgs {
- msg := nl.DeserializeXfrmUsersaInfo(m)
-
- if family != FAMILY_ALL && family != int(msg.Family) {
+ if state, err := parseXfrmState(m, family); err == nil {
+ res = append(res, *state)
+ } else if err == familyError {
continue
+ } else {
+ return nil, err
}
+ }
+ return res, nil
+}
- var state XfrmState
+// XfrmStateGet gets the xfrm state described by the ID, if found.
+// Equivalent to: `ip xfrm state get ID [ mark MARK [ mask MASK ] ]`.
+// Only the fields which constitue the SA ID must be filled in:
+// ID := [ src ADDR ] [ dst ADDR ] [ proto XFRM-PROTO ] [ spi SPI ]
+// mark is optional
+func XfrmStateGet(state *XfrmState) (*XfrmState, error) {
+ return pkgHandle.XfrmStateGet(state)
+}
- state.Dst = msg.Id.Daddr.ToIP()
- state.Src = msg.Saddr.ToIP()
- state.Proto = Proto(msg.Id.Proto)
- state.Mode = Mode(msg.Mode)
- state.Spi = int(nl.Swap32(msg.Id.Spi))
- state.Reqid = int(msg.Reqid)
- state.ReplayWindow = int(msg.ReplayWindow)
+// XfrmStateGet gets the xfrm state described by the ID, if found.
+// Equivalent to: `ip xfrm state get ID [ mark MARK [ mask MASK ] ]`.
+// Only the fields which constitue the SA ID must be filled in:
+// ID := [ src ADDR ] [ dst ADDR ] [ proto XFRM-PROTO ] [ spi SPI ]
+// mark is optional
+func (h *Handle) XfrmStateGet(state *XfrmState) (*XfrmState, error) {
+ return h.xfrmStateGetOrDelete(state, nl.XFRM_MSG_GETSA)
+}
- attrs, err := nl.ParseRouteAttr(m[msg.Len():])
- if err != nil {
- return nil, err
- }
+func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState, error) {
+ req := h.newNetlinkRequest(nlProto, syscall.NLM_F_ACK)
- for _, attr := range attrs {
- switch attr.Attr.Type {
- case nl.XFRMA_ALG_AUTH, nl.XFRMA_ALG_CRYPT:
- var resAlgo *XfrmStateAlgo
- if attr.Attr.Type == nl.XFRMA_ALG_AUTH {
- if state.Auth == nil {
- state.Auth = new(XfrmStateAlgo)
- }
- resAlgo = state.Auth
- } else {
- state.Crypt = new(XfrmStateAlgo)
- resAlgo = state.Crypt
- }
- algo := nl.DeserializeXfrmAlgo(attr.Value[:])
- (*resAlgo).Name = nl.BytesToString(algo.AlgName[:])
- (*resAlgo).Key = algo.AlgKey
- case nl.XFRMA_ALG_AUTH_TRUNC:
+ msg := &nl.XfrmUsersaId{}
+ msg.Family = uint16(nl.GetIPFamily(state.Dst))
+ msg.Daddr.FromIP(state.Dst)
+ msg.Proto = uint8(state.Proto)
+ msg.Spi = nl.Swap32(uint32(state.Spi))
+ req.AddData(msg)
+
+ if state.Mark != nil {
+ out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark))
+ req.AddData(out)
+ }
+ if state.Src != nil {
+ out := nl.NewRtAttr(nl.XFRMA_SRCADDR, state.Src.To16())
+ req.AddData(out)
+ }
+
+ resType := nl.XFRM_MSG_NEWSA
+ if nlProto == nl.XFRM_MSG_DELSA {
+ resType = 0
+ }
+
+ msgs, err := req.Execute(syscall.NETLINK_XFRM, uint16(resType))
+ if err != nil {
+ return nil, err
+ }
+
+ if nlProto == nl.XFRM_MSG_DELSA {
+ return nil, nil
+ }
+
+ s, err := parseXfrmState(msgs[0], FAMILY_ALL)
+ if err != nil {
+ return nil, err
+ }
+
+ return s, nil
+}
+
+var familyError = fmt.Errorf("family error")
+
+func parseXfrmState(m []byte, family int) (*XfrmState, error) {
+ msg := nl.DeserializeXfrmUsersaInfo(m)
+
+ // This is mainly for the state dump
+ if family != FAMILY_ALL && family != int(msg.Family) {
+ return nil, familyError
+ }
+
+ var state XfrmState
+
+ state.Dst = msg.Id.Daddr.ToIP()
+ state.Src = msg.Saddr.ToIP()
+ state.Proto = Proto(msg.Id.Proto)
+ state.Mode = Mode(msg.Mode)
+ state.Spi = int(nl.Swap32(msg.Id.Spi))
+ state.Reqid = int(msg.Reqid)
+ state.ReplayWindow = int(msg.ReplayWindow)
+ lftToLimits(&msg.Lft, &state.Limits)
+
+ attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:])
+ if err != nil {
+ return nil, err
+ }
+
+ for _, attr := range attrs {
+ switch attr.Attr.Type {
+ case nl.XFRMA_ALG_AUTH, nl.XFRMA_ALG_CRYPT:
+ var resAlgo *XfrmStateAlgo
+ if attr.Attr.Type == nl.XFRMA_ALG_AUTH {
if state.Auth == nil {
state.Auth = new(XfrmStateAlgo)
}
- algo := nl.DeserializeXfrmAlgoAuth(attr.Value[:])
- state.Auth.Name = nl.BytesToString(algo.AlgName[:])
- state.Auth.Key = algo.AlgKey
- state.Auth.TruncateLen = int(algo.AlgTruncLen)
- case nl.XFRMA_ENCAP:
- encap := nl.DeserializeXfrmEncapTmpl(attr.Value[:])
- state.Encap = new(XfrmStateEncap)
- state.Encap.Type = EncapType(encap.EncapType)
- state.Encap.SrcPort = int(nl.Swap16(encap.EncapSport))
- state.Encap.DstPort = int(nl.Swap16(encap.EncapDport))
- state.Encap.OriginalAddress = encap.EncapOa.ToIP()
+ resAlgo = state.Auth
+ } else {
+ state.Crypt = new(XfrmStateAlgo)
+ resAlgo = state.Crypt
}
-
+ algo := nl.DeserializeXfrmAlgo(attr.Value[:])
+ (*resAlgo).Name = nl.BytesToString(algo.AlgName[:])
+ (*resAlgo).Key = algo.AlgKey
+ case nl.XFRMA_ALG_AUTH_TRUNC:
+ if state.Auth == nil {
+ state.Auth = new(XfrmStateAlgo)
+ }
+ algo := nl.DeserializeXfrmAlgoAuth(attr.Value[:])
+ state.Auth.Name = nl.BytesToString(algo.AlgName[:])
+ state.Auth.Key = algo.AlgKey
+ state.Auth.TruncateLen = int(algo.AlgTruncLen)
+ case nl.XFRMA_ALG_AEAD:
+ state.Aead = new(XfrmStateAlgo)
+ algo := nl.DeserializeXfrmAlgoAEAD(attr.Value[:])
+ state.Aead.Name = nl.BytesToString(algo.AlgName[:])
+ state.Aead.Key = algo.AlgKey
+ state.Aead.ICVLen = int(algo.AlgICVLen)
+ case nl.XFRMA_ENCAP:
+ encap := nl.DeserializeXfrmEncapTmpl(attr.Value[:])
+ state.Encap = new(XfrmStateEncap)
+ state.Encap.Type = EncapType(encap.EncapType)
+ state.Encap.SrcPort = int(nl.Swap16(encap.EncapSport))
+ state.Encap.DstPort = int(nl.Swap16(encap.EncapDport))
+ state.Encap.OriginalAddress = encap.EncapOa.ToIP()
+ case nl.XFRMA_MARK:
+ mark := nl.DeserializeXfrmMark(attr.Value[:])
+ state.Mark = new(XfrmMark)
+ state.Mark.Value = mark.Value
+ state.Mark.Mask = mark.Mask
}
- res = append(res, state)
}
- return res, nil
+
+ return &state, nil
+}
+
+// XfrmStateFlush will flush the xfrm state on the system.
+// proto = 0 means any transformation protocols
+// Equivalent to: `ip xfrm state flush [ proto XFRM-PROTO ]`
+func XfrmStateFlush(proto Proto) error {
+ return pkgHandle.XfrmStateFlush(proto)
+}
+
+// XfrmStateFlush will flush the xfrm state on the system.
+// proto = 0 means any transformation protocols
+// Equivalent to: `ip xfrm state flush [ proto XFRM-PROTO ]`
+func (h *Handle) XfrmStateFlush(proto Proto) error {
+ req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHSA, syscall.NLM_F_ACK)
+
+ req.AddData(&nl.XfrmUsersaFlush{Proto: uint8(proto)})
+
+ _, err := req.Execute(syscall.NETLINK_XFRM, 0)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func limitsToLft(lmts XfrmStateLimits, lft *nl.XfrmLifetimeCfg) {
+ if lmts.ByteSoft != 0 {
+ lft.SoftByteLimit = lmts.ByteSoft
+ } else {
+ lft.SoftByteLimit = nl.XFRM_INF
+ }
+ if lmts.ByteHard != 0 {
+ lft.HardByteLimit = lmts.ByteHard
+ } else {
+ lft.HardByteLimit = nl.XFRM_INF
+ }
+ if lmts.PacketSoft != 0 {
+ lft.SoftPacketLimit = lmts.PacketSoft
+ } else {
+ lft.SoftPacketLimit = nl.XFRM_INF
+ }
+ if lmts.PacketHard != 0 {
+ lft.HardPacketLimit = lmts.PacketHard
+ } else {
+ lft.HardPacketLimit = nl.XFRM_INF
+ }
+ lft.SoftAddExpiresSeconds = lmts.TimeSoft
+ lft.HardAddExpiresSeconds = lmts.TimeHard
+ lft.SoftUseExpiresSeconds = lmts.TimeUseSoft
+ lft.HardUseExpiresSeconds = lmts.TimeUseHard
+}
+
+func lftToLimits(lft *nl.XfrmLifetimeCfg, lmts *XfrmStateLimits) {
+ *lmts = *(*XfrmStateLimits)(unsafe.Pointer(lft))
}
--- /dev/null
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ Copyright 2014 Vishvananda Ishaya.
+ Copyright 2014 Docker, Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
--- /dev/null
+# netns - network namespaces in go #
+
+The netns package provides an ultra-simple interface for handling
+network namespaces in go. Changing namespaces requires elevated
+privileges, so in most cases this code needs to be run as root.
+
+## Local Build and Test ##
+
+You can use go get command:
+
+ go get github.com/vishvananda/netns
+
+Testing (requires root):
+
+ sudo -E go test github.com/vishvananda/netns
+
+## Example ##
+
+```go
+package main
+
+import (
+ "fmt"
+ "net"
+ "runtime"
+ "github.com/vishvananda/netns"
+)
+
+func main() {
+ // Lock the OS Thread so we don't accidentally switch namespaces
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+
+ // Save the current network namespace
+ origns, _ := netns.Get()
+ defer origns.Close()
+
+ // Create a new network namespace
+ newns, _ := netns.New()
+ defer newns.Close()
+
+ // Do something with the network namespace
+ ifaces, _ := net.Interfaces()
+ fmt.Printf("Interfaces: %v\n", ifaces)
+
+ // Switch back to the original namespace
+ netns.Set(origns)
+}
+
+```
--- /dev/null
+// Package netns allows ultra-simple network namespace handling. NsHandles
+// can be retrieved and set. Note that the current namespace is thread
+// local so actions that set and reset namespaces should use LockOSThread
+// to make sure the namespace doesn't change due to a goroutine switch.
+// It is best to close NsHandles when you are done with them. This can be
+// accomplished via a `defer ns.Close()` on the handle. Changing namespaces
+// requires elevated privileges, so in most cases this code needs to be run
+// as root.
+package netns
+
+import (
+ "fmt"
+ "syscall"
+)
+
+// NsHandle is a handle to a network namespace. It can be cast directly
+// to an int and used as a file descriptor.
+type NsHandle int
+
+// Equal determines if two network handles refer to the same network
+// namespace. This is done by comparing the device and inode that the
+// file descripors point to.
+func (ns NsHandle) Equal(other NsHandle) bool {
+ if ns == other {
+ return true
+ }
+ var s1, s2 syscall.Stat_t
+ if err := syscall.Fstat(int(ns), &s1); err != nil {
+ return false
+ }
+ if err := syscall.Fstat(int(other), &s2); err != nil {
+ return false
+ }
+ return (s1.Dev == s2.Dev) && (s1.Ino == s2.Ino)
+}
+
+// String shows the file descriptor number and its dev and inode.
+func (ns NsHandle) String() string {
+ var s syscall.Stat_t
+ if ns == -1 {
+ return "NS(None)"
+ }
+ if err := syscall.Fstat(int(ns), &s); err != nil {
+ return fmt.Sprintf("NS(%d: unknown)", ns)
+ }
+ return fmt.Sprintf("NS(%d: %d, %d)", ns, s.Dev, s.Ino)
+}
+
+// UniqueId returns a string which uniquely identifies the namespace
+// associated with the network handle.
+func (ns NsHandle) UniqueId() string {
+ var s syscall.Stat_t
+ if ns == -1 {
+ return "NS(none)"
+ }
+ if err := syscall.Fstat(int(ns), &s); err != nil {
+ return "NS(unknown)"
+ }
+ return fmt.Sprintf("NS(%d:%d)", s.Dev, s.Ino)
+}
+
+// IsOpen returns true if Close() has not been called.
+func (ns NsHandle) IsOpen() bool {
+ return ns != -1
+}
+
+// Close closes the NsHandle and resets its file descriptor to -1.
+// It is not safe to use an NsHandle after Close() is called.
+func (ns *NsHandle) Close() error {
+ if err := syscall.Close(int(*ns)); err != nil {
+ return err
+ }
+ (*ns) = -1
+ return nil
+}
+
+// None gets an empty (closed) NsHandle.
+func None() NsHandle {
+ return NsHandle(-1)
+}
--- /dev/null
+// +build linux
+
+package netns
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+ "syscall"
+)
+
+// SYS_SETNS syscall allows changing the namespace of the current process.
+var SYS_SETNS = map[string]uintptr{
+ "386": 346,
+ "amd64": 308,
+ "arm64": 268,
+ "arm": 375,
+ "ppc64": 350,
+ "ppc64le": 350,
+ "s390x": 339,
+}[runtime.GOARCH]
+
+// Deprecated: use syscall pkg instead (go >= 1.5 needed).
+const (
+ CLONE_NEWUTS = 0x04000000 /* New utsname group? */
+ CLONE_NEWIPC = 0x08000000 /* New ipcs */
+ CLONE_NEWUSER = 0x10000000 /* New user namespace */
+ CLONE_NEWPID = 0x20000000 /* New pid namespace */
+ CLONE_NEWNET = 0x40000000 /* New network namespace */
+ CLONE_IO = 0x80000000 /* Get io context */
+)
+
+// Setns sets namespace using syscall. Note that this should be a method
+// in syscall but it has not been added.
+func Setns(ns NsHandle, nstype int) (err error) {
+ _, _, e1 := syscall.Syscall(SYS_SETNS, uintptr(ns), uintptr(nstype), 0)
+ if e1 != 0 {
+ err = e1
+ }
+ return
+}
+
+// Set sets the current network namespace to the namespace represented
+// by NsHandle.
+func Set(ns NsHandle) (err error) {
+ return Setns(ns, CLONE_NEWNET)
+}
+
+// New creates a new network namespace and returns a handle to it.
+func New() (ns NsHandle, err error) {
+ if err := syscall.Unshare(CLONE_NEWNET); err != nil {
+ return -1, err
+ }
+ return Get()
+}
+
+// Get gets a handle to the current threads network namespace.
+func Get() (NsHandle, error) {
+ return GetFromThread(os.Getpid(), syscall.Gettid())
+}
+
+// GetFromPath gets a handle to a network namespace
+// identified by the path
+func GetFromPath(path string) (NsHandle, error) {
+ fd, err := syscall.Open(path, syscall.O_RDONLY, 0)
+ if err != nil {
+ return -1, err
+ }
+ return NsHandle(fd), nil
+}
+
+// GetFromName gets a handle to a named network namespace such as one
+// created by `ip netns add`.
+func GetFromName(name string) (NsHandle, error) {
+ return GetFromPath(fmt.Sprintf("/var/run/netns/%s", name))
+}
+
+// GetFromPid gets a handle to the network namespace of a given pid.
+func GetFromPid(pid int) (NsHandle, error) {
+ return GetFromPath(fmt.Sprintf("/proc/%d/ns/net", pid))
+}
+
+// GetFromThread gets a handle to the network namespace of a given pid and tid.
+func GetFromThread(pid, tid int) (NsHandle, error) {
+ return GetFromPath(fmt.Sprintf("/proc/%d/task/%d/ns/net", pid, tid))
+}
+
+// GetFromDocker gets a handle to the network namespace of a docker container.
+// Id is prefixed matched against the running docker containers, so a short
+// identifier can be used as long as it isn't ambiguous.
+func GetFromDocker(id string) (NsHandle, error) {
+ pid, err := getPidForContainer(id)
+ if err != nil {
+ return -1, err
+ }
+ return GetFromPid(pid)
+}
+
+// borrowed from docker/utils/utils.go
+func findCgroupMountpoint(cgroupType string) (string, error) {
+ output, err := ioutil.ReadFile("/proc/mounts")
+ if err != nil {
+ return "", err
+ }
+
+ // /proc/mounts has 6 fields per line, one mount per line, e.g.
+ // cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0
+ for _, line := range strings.Split(string(output), "\n") {
+ parts := strings.Split(line, " ")
+ if len(parts) == 6 && parts[2] == "cgroup" {
+ for _, opt := range strings.Split(parts[3], ",") {
+ if opt == cgroupType {
+ return parts[1], nil
+ }
+ }
+ }
+ }
+
+ return "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType)
+}
+
+// Returns the relative path to the cgroup docker is running in.
+// borrowed from docker/utils/utils.go
+// modified to get the docker pid instead of using /proc/self
+func getThisCgroup(cgroupType string) (string, error) {
+ dockerpid, err := ioutil.ReadFile("/var/run/docker.pid")
+ if err != nil {
+ return "", err
+ }
+ result := strings.Split(string(dockerpid), "\n")
+ if len(result) == 0 || len(result[0]) == 0 {
+ return "", fmt.Errorf("docker pid not found in /var/run/docker.pid")
+ }
+ pid, err := strconv.Atoi(result[0])
+
+ output, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid))
+ if err != nil {
+ return "", err
+ }
+ for _, line := range strings.Split(string(output), "\n") {
+ parts := strings.Split(line, ":")
+ // any type used by docker should work
+ if parts[1] == cgroupType {
+ return parts[2], nil
+ }
+ }
+ return "", fmt.Errorf("cgroup '%s' not found in /proc/%d/cgroup", cgroupType, pid)
+}
+
+// Returns the first pid in a container.
+// borrowed from docker/utils/utils.go
+// modified to only return the first pid
+// modified to glob with id
+// modified to search for newer docker containers
+func getPidForContainer(id string) (int, error) {
+ pid := 0
+
+ // memory is chosen randomly, any cgroup used by docker works
+ cgroupType := "memory"
+
+ cgroupRoot, err := findCgroupMountpoint(cgroupType)
+ if err != nil {
+ return pid, err
+ }
+
+ cgroupThis, err := getThisCgroup(cgroupType)
+ if err != nil {
+ return pid, err
+ }
+
+ id += "*"
+
+ attempts := []string{
+ filepath.Join(cgroupRoot, cgroupThis, id, "tasks"),
+ // With more recent lxc versions use, cgroup will be in lxc/
+ filepath.Join(cgroupRoot, cgroupThis, "lxc", id, "tasks"),
+ // With more recent docker, cgroup will be in docker/
+ filepath.Join(cgroupRoot, cgroupThis, "docker", id, "tasks"),
+ // Even more recent docker versions under systemd use docker-<id>.scope/
+ filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", "tasks"),
+ // Even more recent docker versions under cgroup/systemd/docker/<id>/
+ filepath.Join(cgroupRoot, "..", "systemd", "docker", id, "tasks"),
+ }
+
+ var filename string
+ for _, attempt := range attempts {
+ filenames, _ := filepath.Glob(attempt)
+ if len(filenames) > 1 {
+ return pid, fmt.Errorf("Ambiguous id supplied: %v", filenames)
+ } else if len(filenames) == 1 {
+ filename = filenames[0]
+ break
+ }
+ }
+
+ if filename == "" {
+ return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1])
+ }
+
+ output, err := ioutil.ReadFile(filename)
+ if err != nil {
+ return pid, err
+ }
+
+ result := strings.Split(string(output), "\n")
+ if len(result) == 0 || len(result[0]) == 0 {
+ return pid, fmt.Errorf("No pid found for container")
+ }
+
+ pid, err = strconv.Atoi(result[0])
+ if err != nil {
+ return pid, fmt.Errorf("Invalid pid '%s': %s", result[0], err)
+ }
+
+ return pid, nil
+}
--- /dev/null
+// +build !linux
+
+package netns
+
+import (
+ "errors"
+)
+
+var (
+ ErrNotImplemented = errors.New("not implemented")
+)
+
+func Set(ns NsHandle) (err error) {
+ return ErrNotImplemented
+}
+
+func New() (ns NsHandle, err error) {
+ return -1, ErrNotImplemented
+}
+
+func Get() (NsHandle, error) {
+ return -1, ErrNotImplemented
+}
+
+func GetFromName(name string) (NsHandle, error) {
+ return -1, ErrNotImplemented
+}
+
+func GetFromPid(pid int) (NsHandle, error) {
+ return -1, ErrNotImplemented
+}
+
+func GetFromDocker(id string) (NsHandle, error) {
+ return -1, ErrNotImplemented
+}