Add plugin code
authorEugene Yakubovich <eugene.yakubovich@coreos.com>
Wed, 15 Apr 2015 22:35:02 +0000 (15:35 -0700)
committerEugene Yakubovich <eugene.yakubovich@coreos.com>
Mon, 27 Apr 2015 21:14:29 +0000 (14:14 -0700)
This adds basic plugins.
"main" types: veth, bridge, macvlan
"ipam" type: host-local

The code has been ported over from github.com/coreos/rkt project
and adapted to fit the CNI spec.

74 files changed:
Godeps/Godeps.json [new file with mode: 0644]
Godeps/Readme [new file with mode: 0644]
Godeps/_workspace/.gitignore [new file with mode: 0644]
Godeps/_workspace/src/github.com/coreos/go-iptables/iptables/iptables.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/coreos/go-iptables/iptables/iptables_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/dir.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/dir_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/keylock.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/keylock_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/.travis.yml [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/LICENSE [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/Makefile [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/README.md [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/addr_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/link.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/neigh.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/netlink.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/netlink_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/netlink_unspecified.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/addr_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/addr_linux_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_policy_linux_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_state_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_state_linux_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/route.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_test.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go [new file with mode: 0644]
Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_test.go [new file with mode: 0644]
README.md
build [new file with mode: 0755]
pkg/ip/cidr.go [new file with mode: 0644]
pkg/ip/ipmasq.go [new file with mode: 0644]
pkg/ip/link.go [new file with mode: 0644]
pkg/ip/route.go [new file with mode: 0644]
pkg/ns/ns.go [new file with mode: 0644]
pkg/plugin/ipam.go [new file with mode: 0644]
pkg/plugin/types.go [new file with mode: 0644]
pkg/skel/skel.go [new file with mode: 0644]
plugins/ipam/host-local/README.md [new file with mode: 0644]
plugins/ipam/host-local/allocator.go [new file with mode: 0644]
plugins/ipam/host-local/backend/disk/backend.go [new file with mode: 0644]
plugins/ipam/host-local/backend/disk/lock.go [new file with mode: 0644]
plugins/ipam/host-local/backend/store.go [new file with mode: 0644]
plugins/ipam/host-local/config.go [new file with mode: 0644]
plugins/ipam/host-local/main.go [new file with mode: 0644]
plugins/main/bridge/bridge.go [new file with mode: 0644]
plugins/main/macvlan/macvlan.go [new file with mode: 0644]
plugins/main/veth/veth.go [new file with mode: 0644]
scripts/docker-run.sh [new file with mode: 0755]
scripts/exec-plugins.sh [new file with mode: 0755]
scripts/priv-net-run.sh [new file with mode: 0755]

diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json
new file mode 100644 (file)
index 0000000..bb3f775
--- /dev/null
@@ -0,0 +1,22 @@
+{
+       "ImportPath": "github.com/appc/cni",
+       "GoVersion": "go1.4.2",
+       "Packages": [
+               "./..."
+       ],
+       "Deps": [
+               {
+                       "ImportPath": "github.com/coreos/go-iptables/iptables",
+                       "Rev": "83dfad0f13fd7310fb3c1cb8563248d8d604b95b"
+               },
+               {
+                       "ImportPath": "github.com/coreos/rkt/pkg/lock",
+                       "Comment": "v0.5.3-84-gdc2e480",
+                       "Rev": "dc2e4803c86ad429a43d73a9ee0172afd952a760"
+               },
+               {
+                       "ImportPath": "github.com/vishvananda/netlink",
+                       "Rev": "ae3e7dba57271b4e976c4f91637861ee477135e2"
+               }
+       ]
+}
diff --git a/Godeps/Readme b/Godeps/Readme
new file mode 100644 (file)
index 0000000..4cdaa53
--- /dev/null
@@ -0,0 +1,5 @@
+This directory tree is generated automatically by godep.
+
+Please do not edit.
+
+See https://github.com/tools/godep for more information.
diff --git a/Godeps/_workspace/.gitignore b/Godeps/_workspace/.gitignore
new file mode 100644 (file)
index 0000000..f037d68
--- /dev/null
@@ -0,0 +1,2 @@
+/pkg
+/bin
diff --git a/Godeps/_workspace/src/github.com/coreos/go-iptables/iptables/iptables.go b/Godeps/_workspace/src/github.com/coreos/go-iptables/iptables/iptables.go
new file mode 100644 (file)
index 0000000..36a8ec7
--- /dev/null
@@ -0,0 +1,255 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package iptables
+
+import (
+       "bytes"
+       "fmt"
+       "log"
+       "os/exec"
+       "regexp"
+       "strconv"
+       "strings"
+       "syscall"
+)
+
+// Adds the output of stderr to exec.ExitError
+type Error struct {
+       exec.ExitError
+       msg string
+}
+
+func (e *Error) ExitStatus() int {
+       return e.Sys().(syscall.WaitStatus).ExitStatus()
+}
+
+func (e *Error) Error() string {
+       return fmt.Sprintf("exit status %v: %v", e.ExitStatus(), e.msg)
+}
+
+type IPTables struct {
+       path string
+}
+
+func New() (*IPTables, error) {
+       path, err := exec.LookPath("iptables")
+       if err != nil {
+               return nil, err
+       }
+
+       return &IPTables{path}, nil
+}
+
+// Exists checks if given rulespec in specified table/chain exists
+func (ipt *IPTables) Exists(table, chain string, rulespec...string) (bool, error) {
+       checkPresent, err := getIptablesHasCheckCommand()
+       if err != nil {
+               log.Printf("Error checking iptables version, assuming version at least 1.4.11: %v", err)
+               checkPresent = true
+       }
+
+       if !checkPresent {
+               cmd := append([]string{"-A", chain}, rulespec...)
+               return existsForOldIpTables(table, strings.Join(cmd, " "))
+       } else {
+               cmd := append([]string{"-t", table, "-C", chain}, rulespec...)
+               err := ipt.run(cmd...)
+
+               switch {
+               case err == nil:
+                       return true, nil
+               case err.(*Error).ExitStatus() == 1:
+                       return false, nil
+               default:
+                       return false, err
+               }
+       }
+}
+
+// Insert inserts rulespec to specified table/chain (in specified pos)
+func (ipt *IPTables) Insert(table, chain string, pos int, rulespec ...string) error {
+       cmd := append([]string{"-t", table, "-I", chain, strconv.Itoa(pos)}, rulespec...)
+       return ipt.run(cmd...)
+}
+
+// Append appends rulespec to specified table/chain
+func (ipt *IPTables) Append(table, chain string, rulespec ...string) error {
+       cmd := append([]string{"-t", table, "-A", chain}, rulespec...)
+       return ipt.run(cmd...)
+}
+
+// AppendUnique acts like Append except that it won't add a duplicate
+func (ipt *IPTables) AppendUnique(table, chain string, rulespec ...string) error {
+       exists, err := ipt.Exists(table, chain, rulespec...)
+       if err != nil {
+               return err
+       }
+
+       if !exists {
+               return ipt.Append(table, chain, rulespec...)
+       }
+
+       return nil
+}
+
+// Delete removes rulespec in specified table/chain
+func (ipt *IPTables) Delete(table, chain string, rulespec ...string) error {
+       cmd := append([]string{"-t", table, "-D", chain}, rulespec...)
+       return ipt.run(cmd...)
+}
+
+// List rules in specified table/chain
+func (ipt *IPTables) List(table, chain string) ([]string, error) {
+       var stdout, stderr bytes.Buffer
+       cmd := exec.Cmd{
+               Path: ipt.path,
+               Args: []string{ipt.path, "-t", table, "-S", chain},
+               Stdout: &stdout,
+               Stderr: &stderr,
+       }
+
+       if err := cmd.Run(); err != nil {
+               return nil, &Error{*(err.(*exec.ExitError)), stderr.String()}
+       }
+
+       rules := strings.Split(stdout.String(), "\n")
+       if len(rules) > 0 && rules[len(rules)-1] == "" {
+               rules = rules[:len(rules)-1]
+       }
+
+       return rules, nil
+}
+
+func (ipt *IPTables) NewChain(table, chain string) error {
+       return ipt.run("-t", table, "-N", chain)
+}
+
+// ClearChain flushed (deletes all rules) in the specifed table/chain.
+// If the chain does not exist, new one will be created
+func (ipt *IPTables) ClearChain(table, chain string) error {
+       err := ipt.NewChain(table, chain)
+
+       switch {
+       case err == nil:
+               return nil
+       case err.(*Error).ExitStatus() == 1:
+               // chain already exists. Flush (clear) it.
+               return ipt.run("-t", table, "-F", chain)
+       default:
+               return err
+       }
+}
+
+// DeleteChain deletes the chain in the specified table.
+// The chain must be empty
+func (ipt *IPTables) DeleteChain(table, chain string) error {
+       return ipt.run("-t", table, "-X", chain)
+}
+
+func (ipt *IPTables) run(args... string) error {
+       var stderr bytes.Buffer
+       cmd := exec.Cmd{
+               Path: ipt.path,
+               Args: append([]string{ipt.path}, args...),
+               Stderr: &stderr,
+       }
+
+       if err := cmd.Run(); err != nil {
+               return &Error{*(err.(*exec.ExitError)), stderr.String()}
+       }
+
+       return nil
+}
+
+// Checks if iptables has the "-C" flag
+func getIptablesHasCheckCommand() (bool, error) {
+       vstring, err := getIptablesVersionString()
+       if err != nil {
+               return false, err
+       }
+
+       v1, v2, v3, err := extractIptablesVersion(vstring)
+       if err != nil {
+               return false, err
+       }
+
+       return iptablesHasCheckCommand(v1, v2, v3), nil
+}
+
+// getIptablesVersion returns the first three components of the iptables version.
+// e.g. "iptables v1.3.66" would return (1, 3, 66, nil)
+func extractIptablesVersion(str string) (int, int, int, error) {
+       versionMatcher := regexp.MustCompile("v([0-9]+)\\.([0-9]+)\\.([0-9]+)")
+       result := versionMatcher.FindStringSubmatch(str)
+       if result == nil {
+               return 0, 0, 0, fmt.Errorf("no iptables version found in string: %s", str)
+       }
+
+       v1, err := strconv.Atoi(result[1])
+       if err != nil {
+               return 0, 0, 0, err
+       }
+
+       v2, err := strconv.Atoi(result[2])
+       if err != nil {
+               return 0, 0, 0, err
+       }
+
+       v3, err := strconv.Atoi(result[3])
+       if err != nil {
+               return 0, 0, 0, err
+       }
+
+       return v1, v2, v3, nil
+}
+
+// Runs "iptables --version" to get the version string
+func getIptablesVersionString() (string, error) {
+       cmd := exec.Command("iptables", "--version")
+       var out bytes.Buffer
+       cmd.Stdout = &out
+       err := cmd.Run()
+       if err != nil {
+               return "", err
+       }
+       return out.String(), nil
+}
+
+// Checks if an iptables version is after 1.4.11, when --check was added
+func iptablesHasCheckCommand(v1 int, v2 int, v3 int) bool {
+       if v1 > 1 {
+               return true
+       }
+       if v1 == 1 && v2 > 4 {
+               return true
+       }
+       if v1 == 1 && v2 == 4 && v3 >= 11 {
+               return true
+       }
+       return false
+}
+
+// Checks if a rule specification exists for a table
+func existsForOldIpTables(table string, ruleSpec string) (bool, error) {
+       cmd := exec.Command("iptables", "-t", table, "-S")
+       var out bytes.Buffer
+       cmd.Stdout = &out
+       err := cmd.Run()
+       if err != nil {
+               return false, err
+       }
+       rules := out.String()
+       return strings.Contains(rules, ruleSpec), nil
+}
diff --git a/Godeps/_workspace/src/github.com/coreos/go-iptables/iptables/iptables_test.go b/Godeps/_workspace/src/github.com/coreos/go-iptables/iptables/iptables_test.go
new file mode 100644 (file)
index 0000000..e4f3b17
--- /dev/null
@@ -0,0 +1,136 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package iptables
+
+import (
+       "crypto/rand"
+       "math/big"
+       "reflect"
+       "testing"
+)
+
+func randChain(t *testing.T) string {
+       n, err := rand.Int(rand.Reader, big.NewInt(1000000))
+       if err != nil {
+               t.Fatalf("Failed to generate random chain name: %v", err)
+       }
+
+       return "TEST-" + n.String()
+}
+
+func TestChain(t *testing.T) {
+       chain := randChain(t)
+
+       ipt, err := New()
+       if err != nil {
+               t.Fatalf("New failed: %v", err)
+       }
+
+       // chain shouldn't exist, this will create new
+       err = ipt.ClearChain("filter", chain)
+       if err != nil {
+               t.Fatalf("ClearChain (of missing) failed: %v", err)
+       }
+
+       // chain now exists
+       err = ipt.ClearChain("filter", chain)
+       if err != nil {
+               t.Fatalf("ClearChain (of empty) failed: %v", err)
+       }
+
+       // put a simple rule in
+       err = ipt.Append("filter", chain, "-s", "0.0.0.0/0", "-j", "ACCEPT")
+       if err != nil {
+               t.Fatalf("Append failed: %v", err)
+       }
+
+       // can't delete non-empty chain
+       err = ipt.DeleteChain("filter", chain)
+       if err == nil {
+               t.Fatalf("DeleteChain of non-empty chain did not fail")
+       }
+
+       err = ipt.ClearChain("filter", chain)
+       if err != nil {
+               t.Fatalf("ClearChain (of non-empty) failed: %v", err)
+       }
+
+       // chain empty, should be ok
+       err = ipt.DeleteChain("filter", chain)
+       if err != nil {
+               t.Fatalf("DeleteChain of empty chain failed: %v", err)
+       }
+}
+
+func TestRules(t *testing.T) {
+       chain := randChain(t)
+
+       ipt, err := New()
+       if err != nil {
+               t.Fatalf("New failed: %v", err)
+       }
+
+       // chain shouldn't exist, this will create new
+       err = ipt.ClearChain("filter", chain)
+       if err != nil {
+               t.Fatalf("ClearChain (of missing) failed: %v", err)
+       }
+
+       err = ipt.Append("filter", chain, "-s", "10.1.0.0/16", "-d", "8.8.8.8/32", "-j", "ACCEPT")
+       if err != nil {
+               t.Fatalf("Append failed: %v", err)
+       }
+
+       err = ipt.AppendUnique("filter", chain, "-s", "10.1.0.0/16", "-d", "8.8.8.8/32", "-j", "ACCEPT")
+       if err != nil {
+               t.Fatalf("AppendUnique failed: %v", err)
+       }
+
+       err = ipt.Append("filter", chain, "-s", "10.2.0.0/16", "-d", "8.8.8.8/32", "-j", "ACCEPT")
+       if err != nil {
+               t.Fatalf("Append failed: %v", err)
+       }
+
+       err = ipt.Insert("filter", chain, 2, "-s", "10.2.0.0/16", "-d", "9.9.9.9/32", "-j", "ACCEPT")
+       if err != nil {
+               t.Fatalf("Insert failed: %v", err)
+       }
+
+       err = ipt.Insert("filter", chain, 1, "-s", "10.1.0.0/16", "-d", "9.9.9.9/32", "-j", "ACCEPT")
+       if err != nil {
+               t.Fatalf("Insert failed: %v", err)
+       }
+
+       err = ipt.Delete("filter", chain, "-s", "10.1.0.0/16", "-d", "9.9.9.9/32", "-j", "ACCEPT")
+       if err != nil {
+               t.Fatalf("Insert failed: %v", err)
+       }
+
+       rules, err := ipt.List("filter", chain)
+       if err != nil {
+               t.Fatalf("List failed: %v", err)
+       }
+
+       expected := []string{
+               "-N " + chain,
+               "-A " + chain + " -s 10.1.0.0/16 -d 8.8.8.8/32 -j ACCEPT",
+               "-A " + chain + " -s 10.2.0.0/16 -d 9.9.9.9/32 -j ACCEPT",
+               "-A " + chain + " -s 10.2.0.0/16 -d 8.8.8.8/32 -j ACCEPT",
+       }
+
+       if !reflect.DeepEqual(rules, expected) {
+               t.Fatalf("List mismatch: \ngot  %#v \nneed %#v", rules, expected)
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/dir.go b/Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/dir.go
new file mode 100644 (file)
index 0000000..265a276
--- /dev/null
@@ -0,0 +1,190 @@
+// Copyright 2014 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package lock implements simple locking primitives on a
+// regular file or directory using flock
+package lock
+
+import (
+       "errors"
+       "syscall"
+)
+
+var (
+       ErrLocked     = errors.New("file already locked")
+       ErrNotExist   = errors.New("file does not exist")
+       ErrPermission = errors.New("permission denied")
+       ErrNotRegular = errors.New("not a regular file")
+)
+
+// FileLock represents a lock on a regular file or a directory
+type FileLock struct {
+       path string
+       fd   int
+}
+
+type LockType int
+
+const (
+       Dir LockType = iota
+       RegFile
+)
+
+// TryExclusiveLock takes an exclusive lock without blocking.
+// This is idempotent when the Lock already represents an exclusive lock,
+// and tries promote a shared lock to exclusive atomically.
+// It will return ErrLocked if any lock is already held.
+func (l *FileLock) TryExclusiveLock() error {
+       err := syscall.Flock(l.fd, syscall.LOCK_EX|syscall.LOCK_NB)
+       if err == syscall.EWOULDBLOCK {
+               err = ErrLocked
+       }
+       return err
+}
+
+// TryExclusiveLock takes an exclusive lock on a file/directory without blocking.
+// It will return ErrLocked if any lock is already held on the file/directory.
+func TryExclusiveLock(path string, lockType LockType) (*FileLock, error) {
+       l, err := NewLock(path, lockType)
+       if err != nil {
+               return nil, err
+       }
+       err = l.TryExclusiveLock()
+       if err != nil {
+               return nil, err
+       }
+       return l, err
+}
+
+// ExclusiveLock takes an exclusive lock.
+// This is idempotent when the Lock already represents an exclusive lock,
+// and promotes a shared lock to exclusive atomically.
+// It will block if an exclusive lock is already held.
+func (l *FileLock) ExclusiveLock() error {
+       return syscall.Flock(l.fd, syscall.LOCK_EX)
+}
+
+// ExclusiveLock takes an exclusive lock on a file/directory.
+// It will block if an exclusive lock is already held on the file/directory.
+func ExclusiveLock(path string, lockType LockType) (*FileLock, error) {
+       l, err := NewLock(path, lockType)
+       if err == nil {
+               err = l.ExclusiveLock()
+       }
+       if err != nil {
+               return nil, err
+       }
+       return l, nil
+}
+
+// TrySharedLock takes a co-operative (shared) lock without blocking.
+// This is idempotent when the Lock already represents a shared lock,
+// and tries demote an exclusive lock to shared atomically.
+// It will return ErrLocked if an exclusive lock already exists.
+func (l *FileLock) TrySharedLock() error {
+       err := syscall.Flock(l.fd, syscall.LOCK_SH|syscall.LOCK_NB)
+       if err == syscall.EWOULDBLOCK {
+               err = ErrLocked
+       }
+       return err
+}
+
+// TrySharedLock takes a co-operative (shared) lock on a file/directory without blocking.
+// It will return ErrLocked if an exclusive lock already exists on the file/directory.
+func TrySharedLock(path string, lockType LockType) (*FileLock, error) {
+       l, err := NewLock(path, lockType)
+       if err != nil {
+               return nil, err
+       }
+       err = l.TrySharedLock()
+       if err != nil {
+               return nil, err
+       }
+       return l, nil
+}
+
+// SharedLock takes a co-operative (shared) lock on.
+// This is idempotent when the Lock already represents a shared lock,
+// and demotes an exclusive lock to shared atomically.
+// It will block if an exclusive lock is already held.
+func (l *FileLock) SharedLock() error {
+       return syscall.Flock(l.fd, syscall.LOCK_SH)
+}
+
+// SharedLock takes a co-operative (shared) lock on a file/directory.
+// It will block if an exclusive lock is already held on the file/directory.
+func SharedLock(path string, lockType LockType) (*FileLock, error) {
+       l, err := NewLock(path, lockType)
+       if err != nil {
+               return nil, err
+       }
+       err = l.SharedLock()
+       if err != nil {
+               return nil, err
+       }
+       return l, nil
+}
+
+// Unlock unlocks the lock
+func (l *FileLock) Unlock() error {
+       return syscall.Flock(l.fd, syscall.LOCK_UN)
+}
+
+// Fd returns the lock's file descriptor, or an error if the lock is closed
+func (l *FileLock) Fd() (int, error) {
+       var err error
+       if l.fd == -1 {
+               err = errors.New("lock closed")
+       }
+       return l.fd, err
+}
+
+// Close closes the lock which implicitly unlocks it as well
+func (l *FileLock) Close() error {
+       fd := l.fd
+       l.fd = -1
+       return syscall.Close(fd)
+}
+
+// NewLock opens a new lock on a file without acquisition
+func NewLock(path string, lockType LockType) (*FileLock, error) {
+       l := &FileLock{path: path, fd: -1}
+
+       mode := syscall.O_RDONLY | syscall.O_CLOEXEC
+       if lockType == Dir {
+               mode |= syscall.O_DIRECTORY
+       }
+       lfd, err := syscall.Open(l.path, mode, 0)
+       if err != nil {
+               if err == syscall.ENOENT {
+                       err = ErrNotExist
+               } else if err == syscall.EACCES {
+                       err = ErrPermission
+               }
+               return nil, err
+       }
+       l.fd = lfd
+
+       var stat syscall.Stat_t
+       err = syscall.Fstat(lfd, &stat)
+       if err != nil {
+               return nil, err
+       }
+       // Check if the file is a regular file
+       if lockType == RegFile && !(stat.Mode&syscall.S_IFMT == syscall.S_IFREG) {
+               return nil, ErrNotRegular
+       }
+
+       return l, nil
+}
diff --git a/Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/dir_test.go b/Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/dir_test.go
new file mode 100644 (file)
index 0000000..fb86626
--- /dev/null
@@ -0,0 +1,156 @@
+// Copyright 2014 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lock
+
+import (
+       "io/ioutil"
+       "os"
+       "testing"
+)
+
+func TestNewLock(t *testing.T) {
+       f, err := ioutil.TempFile("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpfile: %v", err)
+       }
+       defer os.Remove(f.Name())
+       f.Close()
+
+       l, err := NewLock(f.Name(), RegFile)
+       if err != nil {
+               t.Fatalf("error creating NewFileLock: %v", err)
+       }
+       l.Close()
+
+       d, err := ioutil.TempDir("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpdir: %v", err)
+       }
+       defer os.Remove(d)
+
+       l, err = NewLock(d, Dir)
+       if err != nil {
+               t.Fatalf("error creating NewLock: %v", err)
+       }
+
+       err = l.Close()
+       if err != nil {
+               t.Fatalf("error unlocking lock: %v", err)
+       }
+
+       if err = os.Remove(d); err != nil {
+               t.Fatalf("error removing tmpdir: %v", err)
+       }
+
+       l, err = NewLock(d, Dir)
+       if err == nil {
+               t.Fatalf("expected error creating lock on nonexistent path")
+       }
+}
+
+func TestExclusiveLock(t *testing.T) {
+       dir, err := ioutil.TempDir("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpdir: %v", err)
+       }
+       defer os.Remove(dir)
+
+       // Set up the initial exclusive lock
+       l, err := ExclusiveLock(dir, Dir)
+       if err != nil {
+               t.Fatalf("error creating lock: %v", err)
+       }
+
+       // reacquire the exclusive lock using the receiver interface
+       err = l.TryExclusiveLock()
+       if err != nil {
+               t.Fatalf("error reacquiring exclusive lock: %v", err)
+       }
+
+       // Now try another exclusive lock, should fail
+       _, err = TryExclusiveLock(dir, Dir)
+       if err == nil {
+               t.Fatalf("expected err trying exclusive lock")
+       }
+
+       // Unlock the original lock
+       err = l.Close()
+       if err != nil {
+               t.Fatalf("error closing lock: %v", err)
+       }
+
+       // Now another exclusive lock should succeed
+       _, err = TryExclusiveLock(dir, Dir)
+       if err != nil {
+               t.Fatalf("error creating lock: %v", err)
+       }
+}
+
+func TestSharedLock(t *testing.T) {
+       dir, err := ioutil.TempDir("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpdir: %v", err)
+       }
+       defer os.Remove(dir)
+
+       // Set up the initial shared lock
+       l1, err := SharedLock(dir, Dir)
+       if err != nil {
+               t.Fatalf("error creating new shared lock: %v", err)
+       }
+
+       err = l1.TrySharedLock()
+       if err != nil {
+               t.Fatalf("error reacquiring shared lock: %v", err)
+       }
+
+       // Subsequent shared locks should succeed
+       l2, err := TrySharedLock(dir, Dir)
+       if err != nil {
+               t.Fatalf("error creating shared lock: %v", err)
+       }
+       l3, err := TrySharedLock(dir, Dir)
+       if err != nil {
+               t.Fatalf("error creating shared lock: %v", err)
+       }
+
+       // But an exclusive lock should fail
+       _, err = TryExclusiveLock(dir, Dir)
+       if err == nil {
+               t.Fatal("expected exclusive lock to fail")
+       }
+
+       // Close the locks
+       err = l1.Close()
+       if err != nil {
+               t.Fatalf("error closing lock: %v", err)
+       }
+       err = l2.Close()
+       if err != nil {
+               t.Fatalf("error closing lock: %v", err)
+       }
+
+       // Only unlock one of them
+       err = l3.Unlock()
+       if err != nil {
+               t.Fatalf("error unlocking lock: %v", err)
+       }
+
+       // Now try an exclusive lock, should succeed
+       _, err = TryExclusiveLock(dir, Dir)
+       if err != nil {
+               t.Fatalf("error creating lock: %v", err)
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/keylock.go b/Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/keylock.go
new file mode 100644 (file)
index 0000000..768b421
--- /dev/null
@@ -0,0 +1,272 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lock
+
+import (
+       "fmt"
+       "os"
+       "path/filepath"
+       "syscall"
+)
+
+const (
+       defaultDirPerm     os.FileMode = 0660
+       defaultFilePerm    os.FileMode = 0660
+       defaultLockRetries             = 3
+)
+
+type keyLockMode uint
+
+const (
+       keyLockExclusive keyLockMode = 1 << iota
+       keyLockShared
+       keyLockNonBlocking
+)
+
+// KeyLock is a lock for a specific key. The lock file is created inside a
+// directory using the key name.
+// This is useful when multiple processes want to take a lock but cannot use
+// FileLock as they don't have a well defined file on the filesystem.
+// key value must be a valid file name (as the lock file is named after the key
+// value).
+type KeyLock struct {
+       lockDir string
+       key     string
+       // The lock on the key
+       keyLock *FileLock
+}
+
+// NewKeyLock returns a KeyLock for the specified key without acquisition.
+// lockdir is the directory where the lock file will be created. If lockdir
+// doesn't exists it will be created.
+// key value must be a valid file name (as the lock file is named after the key
+// value).
+func NewKeyLock(lockDir string, key string) (*KeyLock, error) {
+       err := os.MkdirAll(lockDir, defaultDirPerm)
+       if err != nil {
+               return nil, err
+       }
+       keyLockFile := filepath.Join(lockDir, key)
+       // create the file if it doesn't exists
+       f, err := os.OpenFile(keyLockFile, os.O_RDONLY|os.O_CREATE, defaultFilePerm)
+       if err != nil {
+               return nil, fmt.Errorf("error creating key lock file: %v", err)
+       }
+       f.Close()
+       keyLock, err := NewLock(keyLockFile, RegFile)
+       if err != nil {
+               return nil, fmt.Errorf("error opening key lock file: %v", err)
+       }
+       return &KeyLock{lockDir: lockDir, key: key, keyLock: keyLock}, nil
+}
+
+// Close closes the key lock which implicitly unlocks it as well
+func (l *KeyLock) Close() {
+       l.keyLock.Close()
+}
+
+// TryExclusiveLock takes an exclusive lock on a key without blocking.
+// This is idempotent when the KeyLock already represents an exclusive lock,
+// and tries promote a shared lock to exclusive atomically.
+// It will return ErrLocked if any lock is already held on the key.
+func (l *KeyLock) TryExclusiveKeyLock() error {
+       return l.lock(keyLockExclusive|keyLockNonBlocking, defaultLockRetries)
+}
+
+// TryExclusiveLock takes an exclusive lock on the key without blocking.
+// lockDir is the directory where the lock file will be created.
+// It will return ErrLocked if any lock is already held.
+func TryExclusiveKeyLock(lockDir string, key string) (*KeyLock, error) {
+       return createAndLock(lockDir, key, keyLockExclusive|keyLockNonBlocking)
+}
+
+// ExclusiveLock takes an exclusive lock on a key.
+// This is idempotent when the KeyLock already represents an exclusive lock,
+// and promotes a shared lock to exclusive atomically.
+// It will block if an exclusive lock is already held on the key.
+func (l *KeyLock) ExclusiveKeyLock() error {
+       return l.lock(keyLockExclusive, defaultLockRetries)
+}
+
+// ExclusiveLock takes an exclusive lock on a key.
+// lockDir is the directory where the lock file will be created.
+// It will block if an exclusive lock is already held on the key.
+func ExclusiveKeyLock(lockDir string, key string) (*KeyLock, error) {
+       return createAndLock(lockDir, key, keyLockExclusive)
+}
+
+// TrySharedLock takes a co-operative (shared) lock on the key without blocking.
+// This is idempotent when the KeyLock already represents a shared lock,
+// and tries demote an exclusive lock to shared atomically.
+// It will return ErrLocked if an exclusive lock already exists on the key.
+func (l *KeyLock) TrySharedKeyLock() error {
+       return l.lock(keyLockShared|keyLockNonBlocking, defaultLockRetries)
+}
+
+// TrySharedLock takes a co-operative (shared) lock on a key without blocking.
+// lockDir is the directory where the lock file will be created.
+// It will return ErrLocked if an exclusive lock already exists on the key.
+func TrySharedKeyLock(lockDir string, key string) (*KeyLock, error) {
+       return createAndLock(lockDir, key, keyLockShared|keyLockNonBlocking)
+}
+
+// SharedLock takes a co-operative (shared) lock on a key.
+// This is idempotent when the KeyLock already represents a shared lock,
+// and demotes an exclusive lock to shared atomically.
+// It will block if an exclusive lock is already held on the key.
+func (l *KeyLock) SharedKeyLock() error {
+       return l.lock(keyLockShared, defaultLockRetries)
+}
+
+// SharedLock takes a co-operative (shared) lock on a key.
+// lockDir is the directory where the lock file will be created.
+// It will block if an exclusive lock is already held on the key.
+func SharedKeyLock(lockDir string, key string) (*KeyLock, error) {
+       return createAndLock(lockDir, key, keyLockShared)
+}
+
+func createAndLock(lockDir string, key string, mode keyLockMode) (*KeyLock, error) {
+       keyLock, err := NewKeyLock(lockDir, key)
+       if err != nil {
+               return nil, err
+       }
+       err = keyLock.lock(mode, defaultLockRetries)
+       if err != nil {
+               return nil, err
+       }
+       return keyLock, nil
+}
+
+// lock is the base function to take a lock and handle changed lock files
+// As there's the need to remove unused (see CleanKeyLocks) lock files without
+// races, a changed file detection is needed.
+//
+// Without changed file detection this can happen:
+//
+// Process A takes exclusive lock on file01
+// Process B waits for exclusive lock on file01.
+// Process A deletes file01 and then releases the lock.
+// Process B takes the lock on the removed file01 as it has the fd opened
+// Process C comes, creates the file as it doesn't exists, and it also takes an exclusive lock.
+// Now B and C thinks to own an exclusive lock.
+//
+// maxRetries can be passed, useful for testing.
+func (l *KeyLock) lock(mode keyLockMode, maxRetries int) error {
+       retries := 0
+       for {
+               var err error
+               var isExclusive bool
+               var isNonBlocking bool
+               if mode&keyLockExclusive != 0 {
+                       isExclusive = true
+               }
+               if mode&keyLockNonBlocking != 0 {
+                       isNonBlocking = true
+               }
+               switch {
+               case isExclusive && !isNonBlocking:
+                       err = l.keyLock.ExclusiveLock()
+               case isExclusive && isNonBlocking:
+                       err = l.keyLock.TryExclusiveLock()
+               case !isExclusive && !isNonBlocking:
+                       err = l.keyLock.SharedLock()
+               case !isExclusive && isNonBlocking:
+                       err = l.keyLock.TrySharedLock()
+               }
+               if err != nil {
+                       return err
+               }
+
+               // Check that the file referenced by the lock fd is the same as
+               // the current file on the filesystem
+               var lockStat, curStat syscall.Stat_t
+               lfd, err := l.keyLock.Fd()
+               if err != nil {
+                       return err
+               }
+               err = syscall.Fstat(lfd, &lockStat)
+               if err != nil {
+                       return err
+               }
+               keyLockFile := filepath.Join(l.lockDir, l.key)
+               fd, err := syscall.Open(keyLockFile, syscall.O_RDONLY, 0)
+               // If there's an error opening the file return an error
+               if err != nil {
+                       return err
+               }
+               err = syscall.Fstat(fd, &curStat)
+               if err != nil {
+                       return err
+               }
+               if lockStat.Ino == curStat.Ino && lockStat.Dev == curStat.Dev {
+                       return nil
+               }
+               if retries >= maxRetries {
+                       return fmt.Errorf("cannot acquire lock after %d retries", retries)
+               }
+
+               // If the file has changed discard this lock and try to take another lock.
+               l.keyLock.Close()
+               nl, err := NewKeyLock(l.lockDir, l.key)
+               if err != nil {
+                       return err
+               }
+               l.keyLock = nl.keyLock
+
+               retries++
+       }
+}
+
+// Unlock unlocks the key lock.
+func (l *KeyLock) Unlock() error {
+       err := l.keyLock.Unlock()
+       if err != nil {
+               return err
+       }
+       return nil
+}
+
+// CleanKeyLocks remove lock files from the lockDir.
+// For every key it tries to take an Exclusive lock on it and skip it if it
+// fails with ErrLocked
+func CleanKeyLocks(lockDir string) error {
+       f, err := os.Open(lockDir)
+       if err != nil {
+               return fmt.Errorf("error opening lockDir: %v", err)
+       }
+       defer f.Close()
+       files, err := f.Readdir(0)
+       if err != nil {
+               return fmt.Errorf("error getting lock files list: %v", err)
+       }
+       for _, f := range files {
+               filename := filepath.Join(lockDir, f.Name())
+               keyLock, err := TryExclusiveKeyLock(lockDir, f.Name())
+               if err == ErrLocked {
+                       continue
+               }
+               if err != nil {
+                       return err
+               }
+
+               err = os.Remove(filename)
+               if err != nil {
+                       keyLock.Close()
+                       return fmt.Errorf("error removing lock file: %v", err)
+               }
+               keyLock.Close()
+       }
+       return nil
+}
diff --git a/Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/keylock_test.go b/Godeps/_workspace/src/github.com/coreos/rkt/pkg/lock/keylock_test.go
new file mode 100644 (file)
index 0000000..56cc9f1
--- /dev/null
@@ -0,0 +1,203 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lock
+
+import (
+       "io/ioutil"
+       "os"
+       "path/filepath"
+       "testing"
+)
+
+func TestExclusiveKeyLock(t *testing.T) {
+       dir, err := ioutil.TempDir("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpdir: %v", err)
+       }
+       defer os.RemoveAll(dir)
+
+       l1, err := ExclusiveKeyLock(dir, "key01")
+       if err != nil {
+               t.Fatalf("error creating key lock: %v", err)
+       }
+
+       _, err = TryExclusiveKeyLock(dir, "key01")
+       if err == nil {
+               t.Fatalf("expected err trying exclusive key lock")
+       }
+
+       l1.Close()
+}
+
+func TestCleanKeyLocks(t *testing.T) {
+       dir, err := ioutil.TempDir("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpdir: %v", err)
+       }
+       defer os.RemoveAll(dir)
+
+       l1, err := ExclusiveKeyLock(dir, "key01")
+       if err != nil {
+               t.Fatalf("error creating keyLock: %v", err)
+       }
+
+       err = CleanKeyLocks(dir)
+       if err != nil {
+               t.Fatalf("unexpected error: %v", err)
+       }
+       filesnum, err := countFiles(dir)
+       if err != nil {
+               t.Fatalf("unexpected error: %v", err)
+       }
+       if filesnum != 1 {
+               t.Fatalf("expected 1 file in lock dir. found %d files", filesnum)
+       }
+
+       l2, err := SharedKeyLock(dir, "key02")
+       if err != nil {
+               t.Fatalf("error creating keyLock: %v", err)
+       }
+
+       l1.Close()
+       l2.Close()
+
+       err = CleanKeyLocks(dir)
+       if err != nil {
+               t.Fatalf("unexpected error: %v", err)
+       }
+
+       filesnum, err = countFiles(dir)
+       if err != nil {
+               t.Fatalf("unexpected error: %v", err)
+       }
+       if filesnum != 0 {
+               t.Fatalf("expected empty lock dir. found %d files", filesnum)
+       }
+}
+
+func TestFileChangedLock(t *testing.T) {
+       dir, err := ioutil.TempDir("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpdir: %v", err)
+       }
+       defer os.RemoveAll(dir)
+
+       l1, err := ExclusiveKeyLock(dir, "key01")
+       if err != nil {
+               t.Fatalf("error creating keyLock: %v", err)
+       }
+
+       l2, err := NewKeyLock(dir, "key01")
+       if err != nil {
+               t.Fatalf("error creating keyLock: %v", err)
+       }
+
+       // Simulate that l1 owner removes the actual key1 lock file
+       err = os.Remove(filepath.Join(dir, "key01"))
+       if err != nil {
+               t.Fatalf("error creating NewLock: %v", err)
+       }
+       l1.Close()
+
+       // Now l2 owner takes a lock, using the fd of the removed file
+       err = l2.lock(keyLockShared, 0)
+       if err == nil {
+               t.Fatalf("expected error")
+       }
+       l2.Close()
+
+       // Do the same with a new file created after removal
+       dir, err = ioutil.TempDir("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpdir: %v", err)
+       }
+       defer os.RemoveAll(dir)
+
+       l1, err = ExclusiveKeyLock(dir, "key01")
+       if err != nil {
+               t.Fatalf("error creating keyLock: %v", err)
+       }
+
+       l2, err = NewKeyLock(dir, "key01")
+       if err != nil {
+               t.Fatalf("error creating keyLock: %v", err)
+       }
+
+       // Simulate that l1 owner removes the actual key1 lock file
+       err = os.Remove(filepath.Join(dir, "key01"))
+       if err != nil {
+               t.Fatalf("error creating NewLock: %v", err)
+       }
+       l1.Close()
+
+       // Simulate that another user comes and takes a lock, this will create
+       // a new lock  file as it was removed.
+       l3, err := ExclusiveKeyLock(dir, "key01")
+       l3.Close()
+
+       // Now l2 owner takes a lock, using the fd of the old file
+       err = l2.lock(keyLockShared, 0)
+       if err == nil {
+               t.Fatalf("expected error")
+       }
+
+       // Do the same but with a retry so if should work.
+       dir, err = ioutil.TempDir("", "")
+       if err != nil {
+               t.Fatalf("error creating tmpdir: %v", err)
+       }
+       defer os.RemoveAll(dir)
+
+       l1, err = ExclusiveKeyLock(dir, "key01")
+       if err != nil {
+               t.Fatalf("error creating keyLock: %v", err)
+       }
+
+       l2, err = NewKeyLock(dir, "key01")
+       if err != nil {
+               t.Fatalf("error creating keyLock: %v", err)
+       }
+
+       // Simulate that l1 owner removes the actual key1 lock file
+       err = os.Remove(filepath.Join(dir, "key01"))
+       if err != nil {
+               t.Fatalf("error creating NewLock: %v", err)
+       }
+       l1.Close()
+
+       // Simulate that another user comes and takes a lock, this will create
+       // a new lock  file as it was removed.
+       l3, err = ExclusiveKeyLock(dir, "key01")
+       l3.Close()
+
+       // Now l2 owner takes a lock, using the fd of the old file
+       err = l2.lock(keyLockShared, 1)
+       if err != nil {
+               t.Fatalf("unexpected error: %v", err)
+       }
+}
+
+func countFiles(dir string) (int, error) {
+       f, err := os.Open(dir)
+       if err != nil {
+               return -1, err
+       }
+       defer f.Close()
+       files, err := f.Readdir(0)
+       if err != nil {
+               return -1, err
+       }
+       return len(files), nil
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/.travis.yml b/Godeps/_workspace/src/github.com/vishvananda/netlink/.travis.yml
new file mode 100644 (file)
index 0000000..1970069
--- /dev/null
@@ -0,0 +1,3 @@
+language: go
+install:
+      - go get github.com/vishvananda/netns
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/LICENSE b/Godeps/_workspace/src/github.com/vishvananda/netlink/LICENSE
new file mode 100644 (file)
index 0000000..9f64db8
--- /dev/null
@@ -0,0 +1,192 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2014 Vishvananda Ishaya.
+   Copyright 2014 Docker, Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/Makefile b/Godeps/_workspace/src/github.com/vishvananda/netlink/Makefile
new file mode 100644 (file)
index 0000000..b325018
--- /dev/null
@@ -0,0 +1,29 @@
+DIRS := \
+       . \
+       nl
+
+DEPS = \
+       github.com/vishvananda/netns
+
+uniq = $(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1)))
+testdirs = $(call uniq,$(foreach d,$(1),$(dir $(wildcard $(d)/*_test.go))))
+goroot = $(addprefix ../../../,$(1))
+unroot = $(subst ../../../,,$(1))
+fmt = $(addprefix fmt-,$(1))
+
+all: fmt
+
+$(call goroot,$(DEPS)):
+       go get $(call unroot,$@)
+
+.PHONY: $(call testdirs,$(DIRS))
+$(call testdirs,$(DIRS)):
+       sudo -E go test -v github.com/vishvananda/netlink/$@
+
+$(call fmt,$(call testdirs,$(DIRS))):
+       ! gofmt -l $(subst fmt-,,$@)/*.go | grep ''
+
+.PHONY: fmt
+fmt: $(call fmt,$(call testdirs,$(DIRS)))
+
+test: fmt $(call goroot,$(DEPS)) $(call testdirs,$(DIRS))
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/README.md b/Godeps/_workspace/src/github.com/vishvananda/netlink/README.md
new file mode 100644 (file)
index 0000000..555f886
--- /dev/null
@@ -0,0 +1,83 @@
+# netlink - netlink library for go #
+
+[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink)
+
+The netlink package provides a simple netlink library for go. Netlink
+is the interface a user-space program in linux uses to communicate with
+the kernel. It can be used to add and remove interfaces, set ip addresses
+and routes, and configure ipsec. Netlink communication requires elevated
+privileges, so in most cases this code needs to be run as root. Since
+low-level netlink messages are inscrutable at best, the library attempts
+to provide an api that is loosely modeled on the CLI provied by iproute2.
+Actions like `ip link add` will be accomplished via a similarly named
+function like AddLink(). This library began its life as a fork of the
+netlink functionality in
+[docker/libcontainer](https://github.com/docker/libcontainer) but was
+heavily rewritten to improve testability, performance, and to add new
+functionality like ipsec xfrm handling.
+
+## Local Build and Test ##
+
+You can use go get command:
+
+    go get github.com/vishvananda/netlink
+
+Testing dependencies:
+
+    go get github.com/vishvananda/netns
+
+Testing (requires root):
+
+    sudo -E go test github.com/vishvananda/netlink
+
+## Examples ##
+
+Add a new bridge and add eth1 into it:
+
+```go
+package main
+
+import (
+    "net"
+    "github.com/vishvananda/netlink"
+)
+
+func main() {
+    mybridge := &netlink.Bridge{netlink.LinkAttrs{Name: "foo"}}
+    _ := netlink.LinkAdd(mybridge)
+    eth1, _ := netlink.LinkByName("eth1")
+    netlink.LinkSetMaster(eth1, mybridge)
+}
+
+```
+
+Add a new ip address to loopback:
+
+```go
+package main
+
+import (
+    "net"
+    "github.com/vishvananda/netlink"
+)
+
+func main() {
+    lo, _ := netlink.LinkByName("lo")
+    addr, _ := netlink.ParseAddr("169.254.169.254/32")
+    netlink.AddrAdd(lo, addr)
+}
+
+```
+
+## Future Work ##
+
+Many pieces of netlink are not yet fully supported in the high-level
+interface. Aspects of virtually all of the high-level objects don't exist.
+Many of the underlying primitives are there, so its a matter of putting
+the right fields into the high-level objects and making sure that they
+are serialized and deserialized correctly in the Add and List methods.
+
+There are also a few pieces of low level netlink functionality that still
+need to be implemented. Routing rules are not in place and some of the
+more advanced link types. Hopefully there is decent structure and testing
+in place to make these fairly straightforward to add.
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go
new file mode 100644 (file)
index 0000000..5c12f4e
--- /dev/null
@@ -0,0 +1,43 @@
+package netlink
+
+import (
+       "fmt"
+       "net"
+       "strings"
+)
+
+// Addr represents an IP address from netlink. Netlink ip addresses
+// include a mask, so it stores the address as a net.IPNet.
+type Addr struct {
+       *net.IPNet
+       Label string
+}
+
+// String returns $ip/$netmask $label
+func (addr Addr) String() string {
+       return fmt.Sprintf("%s %s", addr.IPNet, addr.Label)
+}
+
+// ParseAddr parses the string representation of an address in the
+// form $ip/$netmask $label. The label portion is optional
+func ParseAddr(s string) (*Addr, error) {
+       label := ""
+       parts := strings.Split(s, " ")
+       if len(parts) > 1 {
+               s = parts[0]
+               label = parts[1]
+       }
+       m, err := ParseIPNet(s)
+       if err != nil {
+               return nil, err
+       }
+       return &Addr{IPNet: m, Label: label}, nil
+}
+
+// Equal returns true if both Addrs have the same net.IPNet value.
+func (a Addr) Equal(x Addr) bool {
+       sizea, _ := a.Mask.Size()
+       sizeb, _ := x.Mask.Size()
+       // ignore label for comparison
+       return a.IP.Equal(x.IP) && sizea == sizeb
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go
new file mode 100644 (file)
index 0000000..31e6b5f
--- /dev/null
@@ -0,0 +1,114 @@
+package netlink
+
+import (
+       "fmt"
+       "net"
+       "strings"
+       "syscall"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+// AddrAdd will add an IP address to a link device.
+// Equivalent to: `ip addr add $addr dev $link`
+func AddrAdd(link Link, addr *Addr) error {
+
+       req := nl.NewNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+       return addrHandle(link, addr, req)
+}
+
+// AddrDel will delete an IP address from a link device.
+// Equivalent to: `ip addr del $addr dev $link`
+func AddrDel(link Link, addr *Addr) error {
+       req := nl.NewNetlinkRequest(syscall.RTM_DELADDR, syscall.NLM_F_ACK)
+       return addrHandle(link, addr, req)
+}
+
+func addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error {
+       base := link.Attrs()
+       if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) {
+               return fmt.Errorf("label must begin with interface name")
+       }
+       ensureIndex(base)
+
+       family := nl.GetIPFamily(addr.IP)
+
+       msg := nl.NewIfAddrmsg(family)
+       msg.Index = uint32(base.Index)
+       prefixlen, _ := addr.Mask.Size()
+       msg.Prefixlen = uint8(prefixlen)
+       req.AddData(msg)
+
+       var addrData []byte
+       if family == FAMILY_V4 {
+               addrData = addr.IP.To4()
+       } else {
+               addrData = addr.IP.To16()
+       }
+
+       localData := nl.NewRtAttr(syscall.IFA_LOCAL, addrData)
+       req.AddData(localData)
+
+       addressData := nl.NewRtAttr(syscall.IFA_ADDRESS, addrData)
+       req.AddData(addressData)
+
+       if addr.Label != "" {
+               labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label))
+               req.AddData(labelData)
+       }
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// AddrList gets a list of IP addresses in the system.
+// Equivalent to: `ip addr show`.
+// The list can be filtered by link and ip family.
+func AddrList(link Link, family int) ([]Addr, error) {
+       req := nl.NewNetlinkRequest(syscall.RTM_GETADDR, syscall.NLM_F_DUMP)
+       msg := nl.NewIfInfomsg(family)
+       req.AddData(msg)
+
+       msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWADDR)
+       if err != nil {
+               return nil, err
+       }
+
+       index := 0
+       if link != nil {
+               base := link.Attrs()
+               ensureIndex(base)
+               index = base.Index
+       }
+
+       res := make([]Addr, 0)
+       for _, m := range msgs {
+               msg := nl.DeserializeIfAddrmsg(m)
+
+               if link != nil && msg.Index != uint32(index) {
+                       // Ignore messages from other interfaces
+                       continue
+               }
+
+               attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+               if err != nil {
+                       return nil, err
+               }
+
+               var addr Addr
+               for _, attr := range attrs {
+                       switch attr.Attr.Type {
+                       case syscall.IFA_ADDRESS:
+                               addr.IPNet = &net.IPNet{
+                                       IP:   attr.Value,
+                                       Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
+                               }
+                       case syscall.IFA_LABEL:
+                               addr.Label = string(attr.Value[:len(attr.Value)-1])
+                       }
+               }
+               res = append(res, addr)
+       }
+
+       return res, nil
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/addr_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/addr_test.go
new file mode 100644 (file)
index 0000000..45e22c0
--- /dev/null
@@ -0,0 +1,45 @@
+package netlink
+
+import (
+       "testing"
+)
+
+func TestAddrAddDel(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       link, err := LinkByName("lo")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       addr, err := ParseAddr("127.1.1.1/24 local")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if err = AddrAdd(link, addr); err != nil {
+               t.Fatal(err)
+       }
+
+       addrs, err := AddrList(link, FAMILY_ALL)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if len(addrs) != 1 || !addr.Equal(addrs[0]) || addrs[0].Label != addr.Label {
+               t.Fatal("Address not added properly")
+       }
+
+       if err = AddrDel(link, addr); err != nil {
+               t.Fatal(err)
+       }
+       addrs, err = AddrList(link, FAMILY_ALL)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if len(addrs) != 0 {
+               t.Fatal("Address not removed properly")
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/link.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/link.go
new file mode 100644 (file)
index 0000000..0cb6fc3
--- /dev/null
@@ -0,0 +1,193 @@
+package netlink
+
+import "net"
+
+// Link represents a link device from netlink. Shared link attributes
+// like name may be retrieved using the Attrs() method. Unique data
+// can be retrieved by casting the object to the proper type.
+type Link interface {
+       Attrs() *LinkAttrs
+       Type() string
+}
+
+type (
+       NsPid int
+       NsFd  int
+)
+
+// LinkAttrs represents data shared by most link types
+type LinkAttrs struct {
+       Index        int
+       MTU          int
+       TxQLen       uint32 // Transmit Queue Length
+       Name         string
+       HardwareAddr net.HardwareAddr
+       Flags        net.Flags
+       ParentIndex  int         // index of the parent link device
+       MasterIndex  int         // must be the index of a bridge
+       Namespace    interface{} // nil | NsPid | NsFd
+}
+
+// Device links cannot be created via netlink. These links
+// are links created by udev like 'lo' and 'etho0'
+type Device struct {
+       LinkAttrs
+}
+
+func (device *Device) Attrs() *LinkAttrs {
+       return &device.LinkAttrs
+}
+
+func (device *Device) Type() string {
+       return "device"
+}
+
+// Dummy links are dummy ethernet devices
+type Dummy struct {
+       LinkAttrs
+}
+
+func (dummy *Dummy) Attrs() *LinkAttrs {
+       return &dummy.LinkAttrs
+}
+
+func (dummy *Dummy) Type() string {
+       return "dummy"
+}
+
+// Bridge links are simple linux bridges
+type Bridge struct {
+       LinkAttrs
+}
+
+func (bridge *Bridge) Attrs() *LinkAttrs {
+       return &bridge.LinkAttrs
+}
+
+func (bridge *Bridge) Type() string {
+       return "bridge"
+}
+
+// Vlan links have ParentIndex set in their Attrs()
+type Vlan struct {
+       LinkAttrs
+       VlanId int
+}
+
+func (vlan *Vlan) Attrs() *LinkAttrs {
+       return &vlan.LinkAttrs
+}
+
+func (vlan *Vlan) Type() string {
+       return "vlan"
+}
+
+type MacvlanMode uint16
+
+const (
+       MACVLAN_MODE_DEFAULT MacvlanMode = iota
+       MACVLAN_MODE_PRIVATE
+       MACVLAN_MODE_VEPA
+       MACVLAN_MODE_BRIDGE
+       MACVLAN_MODE_PASSTHRU
+       MACVLAN_MODE_SOURCE
+)
+
+// Macvlan links have ParentIndex set in their Attrs()
+type Macvlan struct {
+       LinkAttrs
+       Mode MacvlanMode
+}
+
+func (macvlan *Macvlan) Attrs() *LinkAttrs {
+       return &macvlan.LinkAttrs
+}
+
+func (macvlan *Macvlan) Type() string {
+       return "macvlan"
+}
+
+// Veth devices must specify PeerName on create
+type Veth struct {
+       LinkAttrs
+       PeerName string // veth on create only
+}
+
+func (veth *Veth) Attrs() *LinkAttrs {
+       return &veth.LinkAttrs
+}
+
+func (veth *Veth) Type() string {
+       return "veth"
+}
+
+// Generic links represent types that are not currently understood
+// by this netlink library.
+type Generic struct {
+       LinkAttrs
+       LinkType string
+}
+
+func (generic *Generic) Attrs() *LinkAttrs {
+       return &generic.LinkAttrs
+}
+
+func (generic *Generic) Type() string {
+       return generic.LinkType
+}
+
+type Vxlan struct {
+       LinkAttrs
+       VxlanId      int
+       VtepDevIndex int
+       SrcAddr      net.IP
+       Group        net.IP
+       TTL          int
+       TOS          int
+       Learning     bool
+       Proxy        bool
+       RSC          bool
+       L2miss       bool
+       L3miss       bool
+       NoAge        bool
+       Age          int
+       Limit        int
+       Port         int
+       PortLow      int
+       PortHigh     int
+}
+
+func (vxlan *Vxlan) Attrs() *LinkAttrs {
+       return &vxlan.LinkAttrs
+}
+
+func (vxlan *Vxlan) Type() string {
+       return "vxlan"
+}
+
+type IPVlanMode uint16
+
+const (
+       IPVLAN_MODE_L2 IPVlanMode = iota
+       IPVLAN_MODE_L3
+       IPVLAN_MODE_MAX
+)
+
+type IPVlan struct {
+       LinkAttrs
+       Mode IPVlanMode
+}
+
+func (ipvlan *IPVlan) Attrs() *LinkAttrs {
+       return &ipvlan.LinkAttrs
+}
+
+func (ipvlan *IPVlan) Type() string {
+       return "ipvlan"
+}
+
+// iproute2 supported devices;
+// vlan | veth | vcan | dummy | ifb | macvlan | macvtap |
+// bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |
+// gre | gretap | ip6gre | ip6gretap | vti | nlmon |
+// bond_slave | ipvlan
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go
new file mode 100644 (file)
index 0000000..37605dc
--- /dev/null
@@ -0,0 +1,749 @@
+package netlink
+
+import (
+       "bytes"
+       "encoding/binary"
+       "fmt"
+       "net"
+       "syscall"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+var native = nl.NativeEndian()
+var lookupByDump = false
+
+var macvlanModes = [...]uint32{
+       0,
+       nl.MACVLAN_MODE_PRIVATE,
+       nl.MACVLAN_MODE_VEPA,
+       nl.MACVLAN_MODE_BRIDGE,
+       nl.MACVLAN_MODE_PASSTHRU,
+       nl.MACVLAN_MODE_SOURCE,
+}
+
+func ensureIndex(link *LinkAttrs) {
+       if link != nil && link.Index == 0 {
+               newlink, _ := LinkByName(link.Name)
+               if newlink != nil {
+                       link.Index = newlink.Attrs().Index
+               }
+       }
+}
+
+// LinkSetUp enables the link device.
+// Equivalent to: `ip link set $link up`
+func LinkSetUp(link Link) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Change = syscall.IFF_UP
+       msg.Flags = syscall.IFF_UP
+       msg.Index = int32(base.Index)
+       req.AddData(msg)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// LinkSetUp disables link device.
+// Equivalent to: `ip link set $link down`
+func LinkSetDown(link Link) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Change = syscall.IFF_UP
+       msg.Flags = 0 & ^syscall.IFF_UP
+       msg.Index = int32(base.Index)
+       req.AddData(msg)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// LinkSetMTU sets the mtu of the link device.
+// Equivalent to: `ip link set $link mtu $mtu`
+func LinkSetMTU(link Link, mtu int) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Type = syscall.RTM_SETLINK
+       msg.Flags = syscall.NLM_F_REQUEST
+       msg.Index = int32(base.Index)
+       msg.Change = nl.DEFAULT_CHANGE
+       req.AddData(msg)
+
+       b := make([]byte, 4)
+       native.PutUint32(b, uint32(mtu))
+
+       data := nl.NewRtAttr(syscall.IFLA_MTU, b)
+       req.AddData(data)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// LinkSetName sets the name of the link device.
+// Equivalent to: `ip link set $link name $name`
+func LinkSetName(link Link, name string) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Type = syscall.RTM_SETLINK
+       msg.Flags = syscall.NLM_F_REQUEST
+       msg.Index = int32(base.Index)
+       msg.Change = nl.DEFAULT_CHANGE
+       req.AddData(msg)
+
+       data := nl.NewRtAttr(syscall.IFLA_IFNAME, []byte(name))
+       req.AddData(data)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// LinkSetHardwareAddr sets the hardware address of the link device.
+// Equivalent to: `ip link set $link address $hwaddr`
+func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Type = syscall.RTM_SETLINK
+       msg.Flags = syscall.NLM_F_REQUEST
+       msg.Index = int32(base.Index)
+       msg.Change = nl.DEFAULT_CHANGE
+       req.AddData(msg)
+
+       data := nl.NewRtAttr(syscall.IFLA_ADDRESS, []byte(hwaddr))
+       req.AddData(data)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// LinkSetMaster sets the master of the link device.
+// Equivalent to: `ip link set $link master $master`
+func LinkSetMaster(link Link, master *Bridge) error {
+       index := 0
+       if master != nil {
+               masterBase := master.Attrs()
+               ensureIndex(masterBase)
+               index = masterBase.Index
+       }
+       return LinkSetMasterByIndex(link, index)
+}
+
+// LinkSetMasterByIndex sets the master of the link device.
+// Equivalent to: `ip link set $link master $master`
+func LinkSetMasterByIndex(link Link, masterIndex int) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Type = syscall.RTM_SETLINK
+       msg.Flags = syscall.NLM_F_REQUEST
+       msg.Index = int32(base.Index)
+       msg.Change = nl.DEFAULT_CHANGE
+       req.AddData(msg)
+
+       b := make([]byte, 4)
+       native.PutUint32(b, uint32(masterIndex))
+
+       data := nl.NewRtAttr(syscall.IFLA_MASTER, b)
+       req.AddData(data)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// LinkSetNsPid puts the device into a new network namespace. The
+// pid must be a pid of a running process.
+// Equivalent to: `ip link set $link netns $pid`
+func LinkSetNsPid(link Link, nspid int) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Type = syscall.RTM_SETLINK
+       msg.Flags = syscall.NLM_F_REQUEST
+       msg.Index = int32(base.Index)
+       msg.Change = nl.DEFAULT_CHANGE
+       req.AddData(msg)
+
+       b := make([]byte, 4)
+       native.PutUint32(b, uint32(nspid))
+
+       data := nl.NewRtAttr(syscall.IFLA_NET_NS_PID, b)
+       req.AddData(data)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// LinkSetNsPid puts the device into a new network namespace. The
+// fd must be an open file descriptor to a network namespace.
+// Similar to: `ip link set $link netns $ns`
+func LinkSetNsFd(link Link, fd int) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Type = syscall.RTM_SETLINK
+       msg.Flags = syscall.NLM_F_REQUEST
+       msg.Index = int32(base.Index)
+       msg.Change = nl.DEFAULT_CHANGE
+       req.AddData(msg)
+
+       b := make([]byte, 4)
+       native.PutUint32(b, uint32(fd))
+
+       data := nl.NewRtAttr(nl.IFLA_NET_NS_FD, b)
+       req.AddData(data)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+func boolAttr(val bool) []byte {
+       var v uint8
+       if val {
+               v = 1
+       }
+       return nl.Uint8Attr(v)
+}
+
+type vxlanPortRange struct {
+       Lo, Hi uint16
+}
+
+func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) {
+       data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_ID, nl.Uint32Attr(uint32(vxlan.VxlanId)))
+       if vxlan.VtepDevIndex != 0 {
+               nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LINK, nl.Uint32Attr(uint32(vxlan.VtepDevIndex)))
+       }
+       if vxlan.SrcAddr != nil {
+               ip := vxlan.SrcAddr.To4()
+               if ip != nil {
+                       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LOCAL, []byte(ip))
+               } else {
+                       ip = vxlan.SrcAddr.To16()
+                       if ip != nil {
+                               nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LOCAL6, []byte(ip))
+                       }
+               }
+       }
+       if vxlan.Group != nil {
+               group := vxlan.Group.To4()
+               if group != nil {
+                       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GROUP, []byte(group))
+               } else {
+                       group = vxlan.Group.To16()
+                       if group != nil {
+                               nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GROUP6, []byte(group))
+                       }
+               }
+       }
+
+       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_TTL, nl.Uint8Attr(uint8(vxlan.TTL)))
+       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_TOS, nl.Uint8Attr(uint8(vxlan.TOS)))
+       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LEARNING, boolAttr(vxlan.Learning))
+       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PROXY, boolAttr(vxlan.Proxy))
+       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_RSC, boolAttr(vxlan.RSC))
+       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss))
+       nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss))
+
+       if vxlan.NoAge {
+               nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0))
+       } else if vxlan.Age > 0 {
+               nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(uint32(vxlan.Age)))
+       }
+       if vxlan.Limit > 0 {
+               nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LIMIT, nl.Uint32Attr(uint32(vxlan.Limit)))
+       }
+       if vxlan.Port > 0 {
+               nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT, nl.Uint16Attr(uint16(vxlan.Port)))
+       }
+       if vxlan.PortLow > 0 || vxlan.PortHigh > 0 {
+               pr := vxlanPortRange{uint16(vxlan.PortLow), uint16(vxlan.PortHigh)}
+
+               buf := new(bytes.Buffer)
+               binary.Write(buf, binary.BigEndian, &pr)
+
+               nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT_RANGE, buf.Bytes())
+       }
+}
+
+// LinkAdd adds a new link device. The type and features of the device
+// are taken fromt the parameters in the link object.
+// Equivalent to: `ip link add $link`
+func LinkAdd(link Link) error {
+       // TODO: set mtu and hardware address
+       // TODO: support extra data for macvlan
+       base := link.Attrs()
+
+       if base.Name == "" {
+               return fmt.Errorf("LinkAttrs.Name cannot be empty!")
+       }
+
+       req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       req.AddData(msg)
+
+       if base.ParentIndex != 0 {
+               b := make([]byte, 4)
+               native.PutUint32(b, uint32(base.ParentIndex))
+               data := nl.NewRtAttr(syscall.IFLA_LINK, b)
+               req.AddData(data)
+       } else if link.Type() == "ipvlan" {
+               return fmt.Errorf("Can't create ipvlan link without ParentIndex")
+       }
+
+       nameData := nl.NewRtAttr(syscall.IFLA_IFNAME, nl.ZeroTerminated(base.Name))
+       req.AddData(nameData)
+
+       if base.MTU > 0 {
+               mtu := nl.NewRtAttr(syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU)))
+               req.AddData(mtu)
+       }
+
+       if base.Namespace != nil {
+               var attr *nl.RtAttr
+               switch base.Namespace.(type) {
+               case NsPid:
+                       val := nl.Uint32Attr(uint32(base.Namespace.(NsPid)))
+                       attr = nl.NewRtAttr(syscall.IFLA_NET_NS_PID, val)
+               case NsFd:
+                       val := nl.Uint32Attr(uint32(base.Namespace.(NsFd)))
+                       attr = nl.NewRtAttr(nl.IFLA_NET_NS_FD, val)
+               }
+
+               req.AddData(attr)
+       }
+
+       linkInfo := nl.NewRtAttr(syscall.IFLA_LINKINFO, nil)
+       nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type()))
+
+       nl.NewRtAttrChild(linkInfo, syscall.IFLA_TXQLEN, nl.Uint32Attr(base.TxQLen))
+
+       if vlan, ok := link.(*Vlan); ok {
+               b := make([]byte, 2)
+               native.PutUint16(b, uint16(vlan.VlanId))
+               data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+               nl.NewRtAttrChild(data, nl.IFLA_VLAN_ID, b)
+       } else if veth, ok := link.(*Veth); ok {
+               data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+               peer := nl.NewRtAttrChild(data, nl.VETH_INFO_PEER, nil)
+               nl.NewIfInfomsgChild(peer, syscall.AF_UNSPEC)
+               nl.NewRtAttrChild(peer, syscall.IFLA_IFNAME, nl.ZeroTerminated(veth.PeerName))
+               nl.NewRtAttrChild(peer, syscall.IFLA_TXQLEN, nl.Uint32Attr(base.TxQLen))
+               if base.MTU > 0 {
+                       nl.NewRtAttrChild(peer, syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU)))
+               }
+       } else if vxlan, ok := link.(*Vxlan); ok {
+               addVxlanAttrs(vxlan, linkInfo)
+       } else if ipv, ok := link.(*IPVlan); ok {
+               data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+               nl.NewRtAttrChild(data, nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(ipv.Mode)))
+       } else if macv, ok := link.(*Macvlan); ok {
+               if macv.Mode != MACVLAN_MODE_DEFAULT {
+                       data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+                       nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode]))
+               }
+       }
+
+       req.AddData(linkInfo)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       if err != nil {
+               return err
+       }
+
+       ensureIndex(base)
+
+       // can't set master during create, so set it afterwards
+       if base.MasterIndex != 0 {
+               // TODO: verify MasterIndex is actually a bridge?
+               return LinkSetMasterByIndex(link, base.MasterIndex)
+       }
+       return nil
+}
+
+// LinkDel deletes link device. Either Index or Name must be set in
+// the link object for it to be deleted. The other values are ignored.
+// Equivalent to: `ip link del $link`
+func LinkDel(link Link) error {
+       base := link.Attrs()
+
+       ensureIndex(base)
+
+       req := nl.NewNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Index = int32(base.Index)
+       req.AddData(msg)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+func linkByNameDump(name string) (Link, error) {
+       links, err := LinkList()
+       if err != nil {
+               return nil, err
+       }
+
+       for _, link := range links {
+               if link.Attrs().Name == name {
+                       return link, nil
+               }
+       }
+       return nil, fmt.Errorf("Link %s not found", name)
+}
+
+// LinkByName finds a link by name and returns a pointer to the object.
+func LinkByName(name string) (Link, error) {
+       if lookupByDump {
+               return linkByNameDump(name)
+       }
+
+       req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       req.AddData(msg)
+
+       nameData := nl.NewRtAttr(syscall.IFLA_IFNAME, nl.ZeroTerminated(name))
+       req.AddData(nameData)
+
+       link, err := execGetLink(req)
+       if err == syscall.EINVAL {
+               // older kernels don't support looking up via IFLA_IFNAME
+               // so fall back to dumping all links
+               lookupByDump = true
+               return linkByNameDump(name)
+       }
+
+       return link, err
+}
+
+// LinkByIndex finds a link by index and returns a pointer to the object.
+func LinkByIndex(index int) (Link, error) {
+       req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       msg.Index = int32(index)
+       req.AddData(msg)
+
+       return execGetLink(req)
+}
+
+func execGetLink(req *nl.NetlinkRequest) (Link, error) {
+       msgs, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       if err != nil {
+               if errno, ok := err.(syscall.Errno); ok {
+                       if errno == syscall.ENODEV {
+                               return nil, fmt.Errorf("Link not found")
+                       }
+               }
+               return nil, err
+       }
+
+       switch {
+       case len(msgs) == 0:
+               return nil, fmt.Errorf("Link not found")
+
+       case len(msgs) == 1:
+               return linkDeserialize(msgs[0])
+
+       default:
+               return nil, fmt.Errorf("More than one link found")
+       }
+}
+
+// linkDeserialize deserializes a raw message received from netlink into
+// a link object.
+func linkDeserialize(m []byte) (Link, error) {
+       msg := nl.DeserializeIfInfomsg(m)
+
+       attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+       if err != nil {
+               return nil, err
+       }
+
+       base := LinkAttrs{Index: int(msg.Index), Flags: linkFlags(msg.Flags)}
+       var link Link
+       linkType := ""
+       for _, attr := range attrs {
+               switch attr.Attr.Type {
+               case syscall.IFLA_LINKINFO:
+                       infos, err := nl.ParseRouteAttr(attr.Value)
+                       if err != nil {
+                               return nil, err
+                       }
+                       for _, info := range infos {
+                               switch info.Attr.Type {
+                               case nl.IFLA_INFO_KIND:
+                                       linkType = string(info.Value[:len(info.Value)-1])
+                                       switch linkType {
+                                       case "dummy":
+                                               link = &Dummy{}
+                                       case "bridge":
+                                               link = &Bridge{}
+                                       case "vlan":
+                                               link = &Vlan{}
+                                       case "veth":
+                                               link = &Veth{}
+                                       case "vxlan":
+                                               link = &Vxlan{}
+                                       case "ipvlan":
+                                               link = &IPVlan{}
+                                       case "macvlan":
+                                               link = &Macvlan{}
+                                       default:
+                                               link = &Generic{LinkType: linkType}
+                                       }
+                               case nl.IFLA_INFO_DATA:
+                                       data, err := nl.ParseRouteAttr(info.Value)
+                                       if err != nil {
+                                               return nil, err
+                                       }
+                                       switch linkType {
+                                       case "vlan":
+                                               parseVlanData(link, data)
+                                       case "vxlan":
+                                               parseVxlanData(link, data)
+                                       case "ipvlan":
+                                               parseIPVlanData(link, data)
+                                       case "macvlan":
+                                               parseMacvlanData(link, data)
+                                       }
+                               }
+                       }
+               case syscall.IFLA_ADDRESS:
+                       var nonzero bool
+                       for _, b := range attr.Value {
+                               if b != 0 {
+                                       nonzero = true
+                               }
+                       }
+                       if nonzero {
+                               base.HardwareAddr = attr.Value[:]
+                       }
+               case syscall.IFLA_IFNAME:
+                       base.Name = string(attr.Value[:len(attr.Value)-1])
+               case syscall.IFLA_MTU:
+                       base.MTU = int(native.Uint32(attr.Value[0:4]))
+               case syscall.IFLA_LINK:
+                       base.ParentIndex = int(native.Uint32(attr.Value[0:4]))
+               case syscall.IFLA_MASTER:
+                       base.MasterIndex = int(native.Uint32(attr.Value[0:4]))
+               case syscall.IFLA_TXQLEN:
+                       base.TxQLen = native.Uint32(attr.Value[0:4])
+               }
+       }
+       // Links that don't have IFLA_INFO_KIND are hardware devices
+       if link == nil {
+               link = &Device{}
+       }
+       *link.Attrs() = base
+
+       return link, nil
+}
+
+// LinkList gets a list of link devices.
+// Equivalent to: `ip link show`
+func LinkList() ([]Link, error) {
+       // NOTE(vish): This duplicates functionality in net/iface_linux.go, but we need
+       //             to get the message ourselves to parse link type.
+       req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
+
+       msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+       req.AddData(msg)
+
+       msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWLINK)
+       if err != nil {
+               return nil, err
+       }
+
+       res := make([]Link, 0)
+
+       for _, m := range msgs {
+               link, err := linkDeserialize(m)
+               if err != nil {
+                       return nil, err
+               }
+               res = append(res, link)
+       }
+
+       return res, nil
+}
+
+func LinkSetHairpin(link Link, mode bool) error {
+       return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE)
+}
+
+func LinkSetGuard(link Link, mode bool) error {
+       return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_GUARD)
+}
+
+func LinkSetFastLeave(link Link, mode bool) error {
+       return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_FAST_LEAVE)
+}
+
+func LinkSetLearning(link Link, mode bool) error {
+       return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_LEARNING)
+}
+
+func LinkSetRootBlock(link Link, mode bool) error {
+       return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROTECT)
+}
+
+func LinkSetFlood(link Link, mode bool) error {
+       return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD)
+}
+
+func setProtinfoAttr(link Link, mode bool, attr int) error {
+       base := link.Attrs()
+       ensureIndex(base)
+       req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+       msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
+       msg.Type = syscall.RTM_SETLINK
+       msg.Flags = syscall.NLM_F_REQUEST
+       msg.Index = int32(base.Index)
+       msg.Change = nl.DEFAULT_CHANGE
+       req.AddData(msg)
+
+       br := nl.NewRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil)
+       nl.NewRtAttrChild(br, attr, boolToByte(mode))
+       req.AddData(br)
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       if err != nil {
+               return err
+       }
+       return nil
+}
+
+func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) {
+       vlan := link.(*Vlan)
+       for _, datum := range data {
+               switch datum.Attr.Type {
+               case nl.IFLA_VLAN_ID:
+                       vlan.VlanId = int(native.Uint16(datum.Value[0:2]))
+               }
+       }
+}
+
+func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) {
+       vxlan := link.(*Vxlan)
+       for _, datum := range data {
+               switch datum.Attr.Type {
+               case nl.IFLA_VXLAN_ID:
+                       vxlan.VxlanId = int(native.Uint32(datum.Value[0:4]))
+               case nl.IFLA_VXLAN_LINK:
+                       vxlan.VtepDevIndex = int(native.Uint32(datum.Value[0:4]))
+               case nl.IFLA_VXLAN_LOCAL:
+                       vxlan.SrcAddr = net.IP(datum.Value[0:4])
+               case nl.IFLA_VXLAN_LOCAL6:
+                       vxlan.SrcAddr = net.IP(datum.Value[0:16])
+               case nl.IFLA_VXLAN_GROUP:
+                       vxlan.Group = net.IP(datum.Value[0:4])
+               case nl.IFLA_VXLAN_GROUP6:
+                       vxlan.Group = net.IP(datum.Value[0:16])
+               case nl.IFLA_VXLAN_TTL:
+                       vxlan.TTL = int(datum.Value[0])
+               case nl.IFLA_VXLAN_TOS:
+                       vxlan.TOS = int(datum.Value[0])
+               case nl.IFLA_VXLAN_LEARNING:
+                       vxlan.Learning = int8(datum.Value[0]) != 0
+               case nl.IFLA_VXLAN_PROXY:
+                       vxlan.Proxy = int8(datum.Value[0]) != 0
+               case nl.IFLA_VXLAN_RSC:
+                       vxlan.RSC = int8(datum.Value[0]) != 0
+               case nl.IFLA_VXLAN_L2MISS:
+                       vxlan.L2miss = int8(datum.Value[0]) != 0
+               case nl.IFLA_VXLAN_L3MISS:
+                       vxlan.L3miss = int8(datum.Value[0]) != 0
+               case nl.IFLA_VXLAN_AGEING:
+                       vxlan.Age = int(native.Uint32(datum.Value[0:4]))
+                       vxlan.NoAge = vxlan.Age == 0
+               case nl.IFLA_VXLAN_LIMIT:
+                       vxlan.Limit = int(native.Uint32(datum.Value[0:4]))
+               case nl.IFLA_VXLAN_PORT:
+                       vxlan.Port = int(native.Uint16(datum.Value[0:2]))
+               case nl.IFLA_VXLAN_PORT_RANGE:
+                       buf := bytes.NewBuffer(datum.Value[0:4])
+                       var pr vxlanPortRange
+                       if binary.Read(buf, binary.BigEndian, &pr) != nil {
+                               vxlan.PortLow = int(pr.Lo)
+                               vxlan.PortHigh = int(pr.Hi)
+                       }
+               }
+       }
+}
+
+func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
+       ipv := link.(*IPVlan)
+       for _, datum := range data {
+               if datum.Attr.Type == nl.IFLA_IPVLAN_MODE {
+                       ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4]))
+                       return
+               }
+       }
+}
+
+func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) {
+       macv := link.(*Macvlan)
+       for _, datum := range data {
+               if datum.Attr.Type == nl.IFLA_MACVLAN_MODE {
+                       switch native.Uint32(datum.Value[0:4]) {
+                       case nl.MACVLAN_MODE_PRIVATE:
+                               macv.Mode = MACVLAN_MODE_PRIVATE
+                       case nl.MACVLAN_MODE_VEPA:
+                               macv.Mode = MACVLAN_MODE_VEPA
+                       case nl.MACVLAN_MODE_BRIDGE:
+                               macv.Mode = MACVLAN_MODE_BRIDGE
+                       case nl.MACVLAN_MODE_PASSTHRU:
+                               macv.Mode = MACVLAN_MODE_PASSTHRU
+                       case nl.MACVLAN_MODE_SOURCE:
+                               macv.Mode = MACVLAN_MODE_SOURCE
+                       }
+                       return
+               }
+       }
+}
+
+// copied from pkg/net_linux.go
+func linkFlags(rawFlags uint32) net.Flags {
+       var f net.Flags
+       if rawFlags&syscall.IFF_UP != 0 {
+               f |= net.FlagUp
+       }
+       if rawFlags&syscall.IFF_BROADCAST != 0 {
+               f |= net.FlagBroadcast
+       }
+       if rawFlags&syscall.IFF_LOOPBACK != 0 {
+               f |= net.FlagLoopback
+       }
+       if rawFlags&syscall.IFF_POINTOPOINT != 0 {
+               f |= net.FlagPointToPoint
+       }
+       if rawFlags&syscall.IFF_MULTICAST != 0 {
+               f |= net.FlagMulticast
+       }
+       return f
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go
new file mode 100644 (file)
index 0000000..41349ca
--- /dev/null
@@ -0,0 +1,544 @@
+package netlink
+
+import (
+       "bytes"
+       "net"
+       "testing"
+
+       "github.com/vishvananda/netns"
+)
+
+const testTxQLen uint32 = 100
+
+func testLinkAddDel(t *testing.T, link Link) {
+       links, err := LinkList()
+       if err != nil {
+               t.Fatal(err)
+       }
+       num := len(links)
+
+       if err := LinkAdd(link); err != nil {
+               t.Fatal(err)
+       }
+
+       base := link.Attrs()
+
+       result, err := LinkByName(base.Name)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       rBase := result.Attrs()
+
+       if vlan, ok := link.(*Vlan); ok {
+               other, ok := result.(*Vlan)
+               if !ok {
+                       t.Fatal("Result of create is not a vlan")
+               }
+               if vlan.VlanId != other.VlanId {
+                       t.Fatal("Link.VlanId id doesn't match")
+               }
+       }
+
+       if rBase.ParentIndex == 0 && base.ParentIndex != 0 {
+               t.Fatal("Created link doesn't have a Parent but it should")
+       } else if rBase.ParentIndex != 0 && base.ParentIndex == 0 {
+               t.Fatal("Created link has a Parent but it shouldn't")
+       } else if rBase.ParentIndex != 0 && base.ParentIndex != 0 {
+               if rBase.ParentIndex != base.ParentIndex {
+                       t.Fatal("Link.ParentIndex doesn't match")
+               }
+       }
+
+       if veth, ok := link.(*Veth); ok {
+               if veth.TxQLen != testTxQLen {
+                       t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, testTxQLen)
+               }
+               if rBase.MTU != base.MTU {
+                       t.Fatalf("MTU is %d, should be %d", rBase.MTU, base.MTU)
+               }
+
+               if veth.PeerName != "" {
+                       var peer *Veth
+                       other, err := LinkByName(veth.PeerName)
+                       if err != nil {
+                               t.Fatalf("Peer %s not created", veth.PeerName)
+                       }
+                       if peer, ok = other.(*Veth); !ok {
+                               t.Fatalf("Peer %s is incorrect type", veth.PeerName)
+                       }
+                       if peer.TxQLen != testTxQLen {
+                               t.Fatalf("TxQLen of peer is %d, should be %d", peer.TxQLen, testTxQLen)
+                       }
+               }
+       }
+
+       if vxlan, ok := link.(*Vxlan); ok {
+               other, ok := result.(*Vxlan)
+               if !ok {
+                       t.Fatal("Result of create is not a vxlan")
+               }
+               compareVxlan(t, vxlan, other)
+       }
+
+       if ipv, ok := link.(*IPVlan); ok {
+               other, ok := result.(*IPVlan)
+               if !ok {
+                       t.Fatal("Result of create is not a ipvlan")
+               }
+               if ipv.Mode != other.Mode {
+                       t.Fatalf("Got unexpected mode: %d, expected: %d", other.Mode, ipv.Mode)
+               }
+       }
+
+       if macv, ok := link.(*Macvlan); ok {
+               other, ok := result.(*Macvlan)
+               if !ok {
+                       t.Fatal("Result of create is not a macvlan")
+               }
+               if macv.Mode != other.Mode {
+                       t.Fatalf("Got unexpected mode: %d, expected: %d", other.Mode, macv.Mode)
+               }
+       }
+
+       if err = LinkDel(link); err != nil {
+               t.Fatal(err)
+       }
+
+       links, err = LinkList()
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if len(links) != num {
+               t.Fatal("Link not removed properly")
+       }
+}
+
+func compareVxlan(t *testing.T, expected, actual *Vxlan) {
+
+       if actual.VxlanId != expected.VxlanId {
+               t.Fatal("Vxlan.VxlanId doesn't match")
+       }
+       if expected.SrcAddr != nil && !actual.SrcAddr.Equal(expected.SrcAddr) {
+               t.Fatal("Vxlan.SrcAddr doesn't match")
+       }
+       if expected.Group != nil && !actual.Group.Equal(expected.Group) {
+               t.Fatal("Vxlan.Group doesn't match")
+       }
+       if expected.TTL != -1 && actual.TTL != expected.TTL {
+               t.Fatal("Vxlan.TTL doesn't match")
+       }
+       if expected.TOS != -1 && actual.TOS != expected.TOS {
+               t.Fatal("Vxlan.TOS doesn't match")
+       }
+       if actual.Learning != expected.Learning {
+               t.Fatal("Vxlan.Learning doesn't match")
+       }
+       if actual.Proxy != expected.Proxy {
+               t.Fatal("Vxlan.Proxy doesn't match")
+       }
+       if actual.RSC != expected.RSC {
+               t.Fatal("Vxlan.RSC doesn't match")
+       }
+       if actual.L2miss != expected.L2miss {
+               t.Fatal("Vxlan.L2miss doesn't match")
+       }
+       if actual.L3miss != expected.L3miss {
+               t.Fatal("Vxlan.L3miss doesn't match")
+       }
+       if expected.NoAge {
+               if !actual.NoAge {
+                       t.Fatal("Vxlan.NoAge doesn't match")
+               }
+       } else if expected.Age > 0 && actual.Age != expected.Age {
+               t.Fatal("Vxlan.Age doesn't match")
+       }
+       if expected.Limit > 0 && actual.Limit != expected.Limit {
+               t.Fatal("Vxlan.Limit doesn't match")
+       }
+       if expected.Port > 0 && actual.Port != expected.Port {
+               t.Fatal("Vxlan.Port doesn't match")
+       }
+       if expected.PortLow > 0 || expected.PortHigh > 0 {
+               if actual.PortLow != expected.PortLow {
+                       t.Fatal("Vxlan.PortLow doesn't match")
+               }
+               if actual.PortHigh != expected.PortHigh {
+                       t.Fatal("Vxlan.PortHigh doesn't match")
+               }
+       }
+}
+
+func TestLinkAddDelDummy(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       testLinkAddDel(t, &Dummy{LinkAttrs{Name: "foo"}})
+}
+
+func TestLinkAddDelBridge(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       testLinkAddDel(t, &Bridge{LinkAttrs{Name: "foo", MTU: 1400}})
+}
+
+func TestLinkAddDelVlan(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       parent := &Dummy{LinkAttrs{Name: "foo"}}
+       if err := LinkAdd(parent); err != nil {
+               t.Fatal(err)
+       }
+
+       testLinkAddDel(t, &Vlan{LinkAttrs{Name: "bar", ParentIndex: parent.Attrs().Index}, 900})
+
+       if err := LinkDel(parent); err != nil {
+               t.Fatal(err)
+       }
+}
+
+func TestLinkAddDelMacvlan(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       parent := &Dummy{LinkAttrs{Name: "foo"}}
+       if err := LinkAdd(parent); err != nil {
+               t.Fatal(err)
+       }
+
+       testLinkAddDel(t, &Macvlan{
+               LinkAttrs: LinkAttrs{Name: "bar", ParentIndex: parent.Attrs().Index},
+               Mode:      MACVLAN_MODE_PRIVATE,
+       })
+
+       if err := LinkDel(parent); err != nil {
+               t.Fatal(err)
+       }
+}
+
+func TestLinkAddDelVeth(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       testLinkAddDel(t, &Veth{LinkAttrs{Name: "foo", TxQLen: testTxQLen, MTU: 1400}, "bar"})
+}
+
+func TestLinkAddDelBridgeMaster(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       master := &Bridge{LinkAttrs{Name: "foo"}}
+       if err := LinkAdd(master); err != nil {
+               t.Fatal(err)
+       }
+       testLinkAddDel(t, &Dummy{LinkAttrs{Name: "bar", MasterIndex: master.Attrs().Index}})
+
+       if err := LinkDel(master); err != nil {
+               t.Fatal(err)
+       }
+}
+
+func TestLinkSetUnsetResetMaster(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       master := &Bridge{LinkAttrs{Name: "foo"}}
+       if err := LinkAdd(master); err != nil {
+               t.Fatal(err)
+       }
+
+       newmaster := &Bridge{LinkAttrs{Name: "bar"}}
+       if err := LinkAdd(newmaster); err != nil {
+               t.Fatal(err)
+       }
+
+       slave := &Dummy{LinkAttrs{Name: "baz"}}
+       if err := LinkAdd(slave); err != nil {
+               t.Fatal(err)
+       }
+
+       if err := LinkSetMaster(slave, master); err != nil {
+               t.Fatal(err)
+       }
+
+       link, err := LinkByName("baz")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if link.Attrs().MasterIndex != master.Attrs().Index {
+               t.Fatal("Master not set properly")
+       }
+
+       if err := LinkSetMaster(slave, newmaster); err != nil {
+               t.Fatal(err)
+       }
+
+       link, err = LinkByName("baz")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if link.Attrs().MasterIndex != newmaster.Attrs().Index {
+               t.Fatal("Master not reset properly")
+       }
+
+       if err := LinkSetMaster(slave, nil); err != nil {
+               t.Fatal(err)
+       }
+
+       link, err = LinkByName("baz")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if link.Attrs().MasterIndex != 0 {
+               t.Fatal("Master not unset properly")
+       }
+       if err := LinkDel(slave); err != nil {
+               t.Fatal(err)
+       }
+
+       if err := LinkDel(newmaster); err != nil {
+               t.Fatal(err)
+       }
+
+       if err := LinkDel(master); err != nil {
+               t.Fatal(err)
+       }
+}
+
+func TestLinkSetNs(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       basens, err := netns.Get()
+       if err != nil {
+               t.Fatal("Failed to get basens")
+       }
+       defer basens.Close()
+
+       newns, err := netns.New()
+       if err != nil {
+               t.Fatal("Failed to create newns")
+       }
+       defer newns.Close()
+
+       link := &Veth{LinkAttrs{Name: "foo"}, "bar"}
+       if err := LinkAdd(link); err != nil {
+               t.Fatal(err)
+       }
+
+       peer, err := LinkByName("bar")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       LinkSetNsFd(peer, int(basens))
+       if err != nil {
+               t.Fatal("Failed to set newns for link")
+       }
+
+       _, err = LinkByName("bar")
+       if err == nil {
+               t.Fatal("Link bar is still in newns")
+       }
+
+       err = netns.Set(basens)
+       if err != nil {
+               t.Fatal("Failed to set basens")
+       }
+
+       peer, err = LinkByName("bar")
+       if err != nil {
+               t.Fatal("Link is not in basens")
+       }
+
+       if err := LinkDel(peer); err != nil {
+               t.Fatal(err)
+       }
+
+       err = netns.Set(newns)
+       if err != nil {
+               t.Fatal("Failed to set newns")
+       }
+
+       _, err = LinkByName("foo")
+       if err == nil {
+               t.Fatal("Other half of veth pair not deleted")
+       }
+
+}
+
+func TestLinkAddDelVxlan(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       parent := &Dummy{
+               LinkAttrs{Name: "foo"},
+       }
+       if err := LinkAdd(parent); err != nil {
+               t.Fatal(err)
+       }
+
+       vxlan := Vxlan{
+               LinkAttrs: LinkAttrs{
+                       Name: "bar",
+               },
+               VxlanId:      10,
+               VtepDevIndex: parent.Index,
+               Learning:     true,
+               L2miss:       true,
+               L3miss:       true,
+       }
+
+       testLinkAddDel(t, &vxlan)
+       if err := LinkDel(parent); err != nil {
+               t.Fatal(err)
+       }
+}
+
+func TestLinkAddDelIPVlanL2(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+       parent := &Dummy{LinkAttrs{Name: "foo"}}
+       if err := LinkAdd(parent); err != nil {
+               t.Fatal(err)
+       }
+
+       ipv := IPVlan{
+               LinkAttrs: LinkAttrs{
+                       Name:        "bar",
+                       ParentIndex: parent.Index,
+               },
+               Mode: IPVLAN_MODE_L2,
+       }
+
+       testLinkAddDel(t, &ipv)
+}
+
+func TestLinkAddDelIPVlanL3(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+       parent := &Dummy{LinkAttrs{Name: "foo"}}
+       if err := LinkAdd(parent); err != nil {
+               t.Fatal(err)
+       }
+
+       ipv := IPVlan{
+               LinkAttrs: LinkAttrs{
+                       Name:        "bar",
+                       ParentIndex: parent.Index,
+               },
+               Mode: IPVLAN_MODE_L3,
+       }
+
+       testLinkAddDel(t, &ipv)
+}
+
+func TestLinkAddDelIPVlanNoParent(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       ipv := IPVlan{
+               LinkAttrs: LinkAttrs{
+                       Name: "bar",
+               },
+               Mode: IPVLAN_MODE_L3,
+       }
+       err := LinkAdd(&ipv)
+       if err == nil {
+               t.Fatal("Add should fail if ipvlan creating without ParentIndex")
+       }
+       if err.Error() != "Can't create ipvlan link without ParentIndex" {
+               t.Fatalf("Error should be about missing ParentIndex, got %q", err)
+       }
+}
+
+func TestLinkByIndex(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       dummy := &Dummy{LinkAttrs{Name: "dummy"}}
+       if err := LinkAdd(dummy); err != nil {
+               t.Fatal(err)
+       }
+
+       found, err := LinkByIndex(dummy.Index)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if found.Attrs().Index != dummy.Attrs().Index {
+               t.Fatalf("Indices don't match: %v != %v", found.Attrs().Index, dummy.Attrs().Index)
+       }
+
+       LinkDel(dummy)
+
+       // test not found
+       _, err = LinkByIndex(dummy.Attrs().Index)
+       if err == nil {
+               t.Fatalf("LinkByIndex(%v) found deleted link", err)
+       }
+}
+
+func TestLinkSet(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       iface := &Dummy{LinkAttrs{Name: "foo"}}
+       if err := LinkAdd(iface); err != nil {
+               t.Fatal(err)
+       }
+
+       link, err := LinkByName("foo")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       err = LinkSetName(link, "bar")
+       if err != nil {
+               t.Fatalf("Could not change interface name: %v", err)
+       }
+
+       link, err = LinkByName("bar")
+       if err != nil {
+               t.Fatalf("Interface name not changed: %v", err)
+       }
+
+       err = LinkSetMTU(link, 1400)
+       if err != nil {
+               t.Fatalf("Could not set MTU: %v", err)
+       }
+
+       link, err = LinkByName("bar")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if link.Attrs().MTU != 1400 {
+               t.Fatal("MTU not changed!")
+       }
+
+       addr, err := net.ParseMAC("00:12:34:56:78:AB")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       err = LinkSetHardwareAddr(link, addr)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       link, err = LinkByName("bar")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if !bytes.Equal(link.Attrs().HardwareAddr, addr) {
+               t.Fatalf("hardware address not changed!")
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh.go
new file mode 100644 (file)
index 0000000..0e5eb90
--- /dev/null
@@ -0,0 +1,22 @@
+package netlink
+
+import (
+       "fmt"
+       "net"
+)
+
+// Neigh represents a link layer neighbor from netlink.
+type Neigh struct {
+       LinkIndex    int
+       Family       int
+       State        int
+       Type         int
+       Flags        int
+       IP           net.IP
+       HardwareAddr net.HardwareAddr
+}
+
+// String returns $ip/$hwaddr $label
+func (neigh *Neigh) String() string {
+       return fmt.Sprintf("%s %s", neigh.IP, neigh.HardwareAddr)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go
new file mode 100644 (file)
index 0000000..ca97a96
--- /dev/null
@@ -0,0 +1,189 @@
+package netlink
+
+import (
+       "net"
+       "syscall"
+       "unsafe"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+const (
+       NDA_UNSPEC = iota
+       NDA_DST
+       NDA_LLADDR
+       NDA_CACHEINFO
+       NDA_PROBES
+       NDA_VLAN
+       NDA_PORT
+       NDA_VNI
+       NDA_IFINDEX
+       NDA_MAX = NDA_IFINDEX
+)
+
+// Neighbor Cache Entry States.
+const (
+       NUD_NONE       = 0x00
+       NUD_INCOMPLETE = 0x01
+       NUD_REACHABLE  = 0x02
+       NUD_STALE      = 0x04
+       NUD_DELAY      = 0x08
+       NUD_PROBE      = 0x10
+       NUD_FAILED     = 0x20
+       NUD_NOARP      = 0x40
+       NUD_PERMANENT  = 0x80
+)
+
+// Neighbor Flags
+const (
+       NTF_USE    = 0x01
+       NTF_SELF   = 0x02
+       NTF_MASTER = 0x04
+       NTF_PROXY  = 0x08
+       NTF_ROUTER = 0x80
+)
+
+type Ndmsg struct {
+       Family uint8
+       Index  uint32
+       State  uint16
+       Flags  uint8
+       Type   uint8
+}
+
+func deserializeNdmsg(b []byte) *Ndmsg {
+       var dummy Ndmsg
+       return (*Ndmsg)(unsafe.Pointer(&b[0:unsafe.Sizeof(dummy)][0]))
+}
+
+func (msg *Ndmsg) Serialize() []byte {
+       return (*(*[unsafe.Sizeof(*msg)]byte)(unsafe.Pointer(msg)))[:]
+}
+
+func (msg *Ndmsg) Len() int {
+       return int(unsafe.Sizeof(*msg))
+}
+
+// NeighAdd will add an IP to MAC mapping to the ARP table
+// Equivalent to: `ip neigh add ....`
+func NeighAdd(neigh *Neigh) error {
+       return neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL)
+}
+
+// NeighAdd will add or replace an IP to MAC mapping to the ARP table
+// Equivalent to: `ip neigh replace....`
+func NeighSet(neigh *Neigh) error {
+       return neighAdd(neigh, syscall.NLM_F_CREATE)
+}
+
+// NeighAppend will append an entry to FDB
+// Equivalent to: `bridge fdb append...`
+func NeighAppend(neigh *Neigh) error {
+       return neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_APPEND)
+}
+
+func neighAdd(neigh *Neigh, mode int) error {
+       req := nl.NewNetlinkRequest(syscall.RTM_NEWNEIGH, mode|syscall.NLM_F_ACK)
+       return neighHandle(neigh, req)
+}
+
+// NeighDel will delete an IP address from a link device.
+// Equivalent to: `ip addr del $addr dev $link`
+func NeighDel(neigh *Neigh) error {
+       req := nl.NewNetlinkRequest(syscall.RTM_DELNEIGH, syscall.NLM_F_ACK)
+       return neighHandle(neigh, req)
+}
+
+func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error {
+       var family int
+       if neigh.Family > 0 {
+               family = neigh.Family
+       } else {
+               family = nl.GetIPFamily(neigh.IP)
+       }
+
+       msg := Ndmsg{
+               Family: uint8(family),
+               Index:  uint32(neigh.LinkIndex),
+               State:  uint16(neigh.State),
+               Type:   uint8(neigh.Type),
+               Flags:  uint8(neigh.Flags),
+       }
+       req.AddData(&msg)
+
+       ipData := neigh.IP.To4()
+       if ipData == nil {
+               ipData = neigh.IP.To16()
+       }
+
+       dstData := nl.NewRtAttr(NDA_DST, ipData)
+       req.AddData(dstData)
+
+       hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr))
+       req.AddData(hwData)
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// NeighList gets a list of IP-MAC mappings in the system (ARP table).
+// Equivalent to: `ip neighbor show`.
+// The list can be filtered by link and ip family.
+func NeighList(linkIndex, family int) ([]Neigh, error) {
+       req := nl.NewNetlinkRequest(syscall.RTM_GETNEIGH, syscall.NLM_F_DUMP)
+       msg := Ndmsg{
+               Family: uint8(family),
+       }
+       req.AddData(&msg)
+
+       msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWNEIGH)
+       if err != nil {
+               return nil, err
+       }
+
+       res := make([]Neigh, 0)
+       for _, m := range msgs {
+               ndm := deserializeNdmsg(m)
+               if linkIndex != 0 && int(ndm.Index) != linkIndex {
+                       // Ignore messages from other interfaces
+                       continue
+               }
+
+               neigh, err := NeighDeserialize(m)
+               if err != nil {
+                       continue
+               }
+
+               res = append(res, *neigh)
+       }
+
+       return res, nil
+}
+
+func NeighDeserialize(m []byte) (*Neigh, error) {
+       msg := deserializeNdmsg(m)
+
+       neigh := Neigh{
+               LinkIndex: int(msg.Index),
+               Family:    int(msg.Family),
+               State:     int(msg.State),
+               Type:      int(msg.Type),
+               Flags:     int(msg.Flags),
+       }
+
+       attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+       if err != nil {
+               return nil, err
+       }
+
+       for _, attr := range attrs {
+               switch attr.Attr.Type {
+               case NDA_DST:
+                       neigh.IP = net.IP(attr.Value)
+               case NDA_LLADDR:
+                       neigh.HardwareAddr = net.HardwareAddr(attr.Value)
+               }
+       }
+
+       return &neigh, nil
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_test.go
new file mode 100644 (file)
index 0000000..50da59c
--- /dev/null
@@ -0,0 +1,104 @@
+package netlink
+
+import (
+       "net"
+       "testing"
+)
+
+type arpEntry struct {
+       ip  net.IP
+       mac net.HardwareAddr
+}
+
+func parseMAC(s string) net.HardwareAddr {
+       m, err := net.ParseMAC(s)
+       if err != nil {
+               panic(err)
+       }
+       return m
+}
+
+func dumpContains(dump []Neigh, e arpEntry) bool {
+       for _, n := range dump {
+               if n.IP.Equal(e.ip) && (n.State&NUD_INCOMPLETE) == 0 {
+                       return true
+               }
+       }
+       return false
+}
+
+func TestNeighAddDel(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       dummy := Dummy{LinkAttrs{Name: "neigh0"}}
+       if err := LinkAdd(&dummy); err != nil {
+               t.Fatal(err)
+       }
+
+       ensureIndex(dummy.Attrs())
+
+       arpTable := []arpEntry{
+               {net.ParseIP("10.99.0.1"), parseMAC("aa:bb:cc:dd:00:01")},
+               {net.ParseIP("10.99.0.2"), parseMAC("aa:bb:cc:dd:00:02")},
+               {net.ParseIP("10.99.0.3"), parseMAC("aa:bb:cc:dd:00:03")},
+               {net.ParseIP("10.99.0.4"), parseMAC("aa:bb:cc:dd:00:04")},
+               {net.ParseIP("10.99.0.5"), parseMAC("aa:bb:cc:dd:00:05")},
+       }
+
+       // Add the arpTable
+       for _, entry := range arpTable {
+               err := NeighAdd(&Neigh{
+                       LinkIndex:    dummy.Index,
+                       State:        NUD_REACHABLE,
+                       IP:           entry.ip,
+                       HardwareAddr: entry.mac,
+               })
+
+               if err != nil {
+                       t.Errorf("Failed to NeighAdd: %v", err)
+               }
+       }
+
+       // Dump and see that all added entries are there
+       dump, err := NeighList(dummy.Index, 0)
+       if err != nil {
+               t.Errorf("Failed to NeighList: %v", err)
+       }
+
+       for _, entry := range arpTable {
+               if !dumpContains(dump, entry) {
+                       t.Errorf("Dump does not contain: %v", entry)
+               }
+       }
+
+       // Delete the arpTable
+       for _, entry := range arpTable {
+               err := NeighDel(&Neigh{
+                       LinkIndex:    dummy.Index,
+                       IP:           entry.ip,
+                       HardwareAddr: entry.mac,
+               })
+
+               if err != nil {
+                       t.Errorf("Failed to NeighDel: %v", err)
+               }
+       }
+
+       // TODO: seems not working because of cache
+       //// Dump and see that none of deleted entries are there
+       //dump, err = NeighList(dummy.Index, 0)
+       //if err != nil {
+       //t.Errorf("Failed to NeighList: %v", err)
+       //}
+
+       //for _, entry := range arpTable {
+       //if dumpContains(dump, entry) {
+       //t.Errorf("Dump contains: %v", entry)
+       //}
+       //}
+
+       if err := LinkDel(&dummy); err != nil {
+               t.Fatal(err)
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/netlink.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/netlink.go
new file mode 100644 (file)
index 0000000..3130a4b
--- /dev/null
@@ -0,0 +1,39 @@
+// Package netlink provides a simple library for netlink. Netlink is
+// the interface a user-space program in linux uses to communicate with
+// the kernel. It can be used to add and remove interfaces, set up ip
+// addresses and routes, and confiugre ipsec. Netlink communication
+// requires elevated privileges, so in most cases this code needs to
+// be run as root. The low level primitives for netlink are contained
+// in the nl subpackage. This package attempts to provide a high-level
+// interface that is loosly modeled on the iproute2 cli.
+package netlink
+
+import (
+       "net"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+const (
+       // Family type definitions
+       FAMILY_ALL = nl.FAMILY_ALL
+       FAMILY_V4  = nl.FAMILY_V4
+       FAMILY_V6  = nl.FAMILY_V6
+)
+
+// ParseIPNet parses a string in ip/net format and returns a net.IPNet.
+// This is valuable because addresses in netlink are often IPNets and
+// ParseCIDR returns an IPNet with the IP part set to the base IP of the
+// range.
+func ParseIPNet(s string) (*net.IPNet, error) {
+       ip, ipNet, err := net.ParseCIDR(s)
+       if err != nil {
+               return nil, err
+       }
+       return &net.IPNet{IP: ip, Mask: ipNet.Mask}, nil
+}
+
+// NewIPNet generates an IPNet from an ip address using a netmask of 32.
+func NewIPNet(ip net.IP) *net.IPNet {
+       return &net.IPNet{IP: ip, Mask: net.CIDRMask(32, 32)}
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/netlink_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/netlink_test.go
new file mode 100644 (file)
index 0000000..3292b75
--- /dev/null
@@ -0,0 +1,34 @@
+package netlink
+
+import (
+       "log"
+       "os"
+       "runtime"
+       "testing"
+
+       "github.com/vishvananda/netns"
+)
+
+type tearDownNetlinkTest func()
+
+func setUpNetlinkTest(t *testing.T) tearDownNetlinkTest {
+       if os.Getuid() != 0 {
+               msg := "Skipped test because it requires root privileges."
+               log.Printf(msg)
+               t.Skip(msg)
+       }
+
+       // new temporary namespace so we don't pollute the host
+       // lock thread since the namespace is thread local
+       runtime.LockOSThread()
+       var err error
+       ns, err := netns.New()
+       if err != nil {
+               t.Fatal("Failed to create newns", ns)
+       }
+
+       return func() {
+               ns.Close()
+               runtime.UnlockOSThread()
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/netlink_unspecified.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/netlink_unspecified.go
new file mode 100644 (file)
index 0000000..10c49c1
--- /dev/null
@@ -0,0 +1,143 @@
+// +build !linux
+
+package netlink
+
+import (
+       "errors"
+)
+
+var (
+       ErrNotImplemented = errors.New("not implemented")
+)
+
+func LinkSetUp(link *Link) error {
+       return ErrNotImplemented
+}
+
+func LinkSetDown(link *Link) error {
+       return ErrNotImplemented
+}
+
+func LinkSetMTU(link *Link, mtu int) error {
+       return ErrNotImplemented
+}
+
+func LinkSetMaster(link *Link, master *Link) error {
+       return ErrNotImplemented
+}
+
+func LinkSetNsPid(link *Link, nspid int) error {
+       return ErrNotImplemented
+}
+
+func LinkSetNsFd(link *Link, fd int) error {
+       return ErrNotImplemented
+}
+
+func LinkAdd(link *Link) error {
+       return ErrNotImplemented
+}
+
+func LinkDel(link *Link) error {
+       return ErrNotImplemented
+}
+
+func SetHairpin(link Link, mode bool) error {
+       return ErrNotImplemented
+}
+
+func SetGuard(link Link, mode bool) error {
+       return ErrNotImplemented
+}
+
+func SetFastLeave(link Link, mode bool) error {
+       return ErrNotImplemented
+}
+
+func SetLearning(link Link, mode bool) error {
+       return ErrNotImplemented
+}
+
+func SetRootBlock(link Link, mode bool) error {
+       return ErrNotImplemented
+}
+
+func SetFlood(link Link, mode bool) error {
+       return ErrNotImplemented
+}
+
+func LinkList() ([]Link, error) {
+       return nil, ErrNotImplemented
+}
+
+func AddrAdd(link *Link, addr *Addr) error {
+       return ErrNotImplemented
+}
+
+func AddrDel(link *Link, addr *Addr) error {
+       return ErrNotImplemented
+}
+
+func AddrList(link *Link, family int) ([]Addr, error) {
+       return nil, ErrNotImplemented
+}
+
+func RouteAdd(route *Route) error {
+       return ErrNotImplemented
+}
+
+func RouteDel(route *Route) error {
+       return ErrNotImplemented
+}
+
+func RouteList(link *Link, family int) ([]Route, error) {
+       return nil, ErrNotImplemented
+}
+
+func XfrmPolicyAdd(policy *XfrmPolicy) error {
+       return ErrNotImplemented
+}
+
+func XfrmPolicyDel(policy *XfrmPolicy) error {
+       return ErrNotImplemented
+}
+
+func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
+       return nil, ErrNotImplemented
+}
+
+func XfrmStateAdd(policy *XfrmState) error {
+       return ErrNotImplemented
+}
+
+func XfrmStateDel(policy *XfrmState) error {
+       return ErrNotImplemented
+}
+
+func XfrmStateList(family int) ([]XfrmState, error) {
+       return nil, ErrNotImplemented
+}
+
+func NeighAdd(neigh *Neigh) error {
+       return ErrNotImplemented
+}
+
+func NeighSet(neigh *Neigh) error {
+       return ErrNotImplemented
+}
+
+func NeighAppend(neigh *Neigh) error {
+       return ErrNotImplemented
+}
+
+func NeighDel(neigh *Neigh) error {
+       return ErrNotImplemented
+}
+
+func NeighList(linkIndex, family int) ([]Neigh, error) {
+       return nil, ErrNotImplemented
+}
+
+func NeighDeserialize(m []byte) (*Ndmsg, *Neigh, error) {
+       return nil, nil, ErrNotImplemented
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/addr_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/addr_linux.go
new file mode 100644 (file)
index 0000000..17088fa
--- /dev/null
@@ -0,0 +1,47 @@
+package nl
+
+import (
+       "syscall"
+       "unsafe"
+)
+
+type IfAddrmsg struct {
+       syscall.IfAddrmsg
+}
+
+func NewIfAddrmsg(family int) *IfAddrmsg {
+       return &IfAddrmsg{
+               IfAddrmsg: syscall.IfAddrmsg{
+                       Family: uint8(family),
+               },
+       }
+}
+
+// struct ifaddrmsg {
+//   __u8    ifa_family;
+//   __u8    ifa_prefixlen;  /* The prefix length    */
+//   __u8    ifa_flags;  /* Flags      */
+//   __u8    ifa_scope;  /* Address scope    */
+//   __u32   ifa_index;  /* Link index     */
+// };
+
+// type IfAddrmsg struct {
+//     Family    uint8
+//     Prefixlen uint8
+//     Flags     uint8
+//     Scope     uint8
+//     Index     uint32
+// }
+// SizeofIfAddrmsg     = 0x8
+
+func DeserializeIfAddrmsg(b []byte) *IfAddrmsg {
+       return (*IfAddrmsg)(unsafe.Pointer(&b[0:syscall.SizeofIfAddrmsg][0]))
+}
+
+func (msg *IfAddrmsg) Serialize() []byte {
+       return (*(*[syscall.SizeofIfAddrmsg]byte)(unsafe.Pointer(msg)))[:]
+}
+
+func (msg *IfAddrmsg) Len() int {
+       return syscall.SizeofIfAddrmsg
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/addr_linux_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/addr_linux_test.go
new file mode 100644 (file)
index 0000000..98c3b21
--- /dev/null
@@ -0,0 +1,39 @@
+package nl
+
+import (
+       "bytes"
+       "crypto/rand"
+       "encoding/binary"
+       "syscall"
+       "testing"
+)
+
+func (msg *IfAddrmsg) write(b []byte) {
+       native := NativeEndian()
+       b[0] = msg.Family
+       b[1] = msg.Prefixlen
+       b[2] = msg.Flags
+       b[3] = msg.Scope
+       native.PutUint32(b[4:8], msg.Index)
+}
+
+func (msg *IfAddrmsg) serializeSafe() []byte {
+       len := syscall.SizeofIfAddrmsg
+       b := make([]byte, len)
+       msg.write(b)
+       return b
+}
+
+func deserializeIfAddrmsgSafe(b []byte) *IfAddrmsg {
+       var msg = IfAddrmsg{}
+       binary.Read(bytes.NewReader(b[0:syscall.SizeofIfAddrmsg]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestIfAddrmsgDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, syscall.SizeofIfAddrmsg)
+       rand.Read(orig)
+       safemsg := deserializeIfAddrmsgSafe(orig)
+       msg := DeserializeIfAddrmsg(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go
new file mode 100644 (file)
index 0000000..64ef5fd
--- /dev/null
@@ -0,0 +1,96 @@
+package nl
+
+const (
+       DEFAULT_CHANGE = 0xFFFFFFFF
+)
+
+const (
+       IFLA_INFO_UNSPEC = iota
+       IFLA_INFO_KIND
+       IFLA_INFO_DATA
+       IFLA_INFO_XSTATS
+       IFLA_INFO_MAX = IFLA_INFO_XSTATS
+)
+
+const (
+       IFLA_VLAN_UNSPEC = iota
+       IFLA_VLAN_ID
+       IFLA_VLAN_FLAGS
+       IFLA_VLAN_EGRESS_QOS
+       IFLA_VLAN_INGRESS_QOS
+       IFLA_VLAN_PROTOCOL
+       IFLA_VLAN_MAX = IFLA_VLAN_PROTOCOL
+)
+
+const (
+       VETH_INFO_UNSPEC = iota
+       VETH_INFO_PEER
+       VETH_INFO_MAX = VETH_INFO_PEER
+)
+
+const (
+       IFLA_VXLAN_UNSPEC = iota
+       IFLA_VXLAN_ID
+       IFLA_VXLAN_GROUP
+       IFLA_VXLAN_LINK
+       IFLA_VXLAN_LOCAL
+       IFLA_VXLAN_TTL
+       IFLA_VXLAN_TOS
+       IFLA_VXLAN_LEARNING
+       IFLA_VXLAN_AGEING
+       IFLA_VXLAN_LIMIT
+       IFLA_VXLAN_PORT_RANGE
+       IFLA_VXLAN_PROXY
+       IFLA_VXLAN_RSC
+       IFLA_VXLAN_L2MISS
+       IFLA_VXLAN_L3MISS
+       IFLA_VXLAN_PORT
+       IFLA_VXLAN_GROUP6
+       IFLA_VXLAN_LOCAL6
+       IFLA_VXLAN_MAX = IFLA_VXLAN_LOCAL6
+)
+
+const (
+       BRIDGE_MODE_UNSPEC = iota
+       BRIDGE_MODE_HAIRPIN
+)
+
+const (
+       IFLA_BRPORT_UNSPEC = iota
+       IFLA_BRPORT_STATE
+       IFLA_BRPORT_PRIORITY
+       IFLA_BRPORT_COST
+       IFLA_BRPORT_MODE
+       IFLA_BRPORT_GUARD
+       IFLA_BRPORT_PROTECT
+       IFLA_BRPORT_FAST_LEAVE
+       IFLA_BRPORT_LEARNING
+       IFLA_BRPORT_UNICAST_FLOOD
+       IFLA_BRPORT_MAX = IFLA_BRPORT_UNICAST_FLOOD
+)
+
+const (
+       IFLA_IPVLAN_UNSPEC = iota
+       IFLA_IPVLAN_MODE
+       IFLA_IPVLAN_MAX = IFLA_IPVLAN_MODE
+)
+
+const (
+       // not defined in syscall
+       IFLA_NET_NS_FD = 28
+)
+
+const (
+       IFLA_MACVLAN_UNSPEC = iota
+       IFLA_MACVLAN_MODE
+       IFLA_MACVLAN_FLAGS
+       IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS
+)
+
+const (
+       MACVLAN_MODE_PRIVATE  = 1
+       MACVLAN_MODE_VEPA     = 2
+       MACVLAN_MODE_BRIDGE   = 4
+       MACVLAN_MODE_PASSTHRU = 8
+       MACVLAN_MODE_SOURCE   = 16
+)
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go
new file mode 100644 (file)
index 0000000..72f2813
--- /dev/null
@@ -0,0 +1,417 @@
+// Package nl has low level primitives for making Netlink calls.
+package nl
+
+import (
+       "bytes"
+       "encoding/binary"
+       "fmt"
+       "net"
+       "sync/atomic"
+       "syscall"
+       "unsafe"
+)
+
+const (
+       // Family type definitions
+       FAMILY_ALL = syscall.AF_UNSPEC
+       FAMILY_V4  = syscall.AF_INET
+       FAMILY_V6  = syscall.AF_INET6
+)
+
+var nextSeqNr uint32
+
+// GetIPFamily returns the family type of a net.IP.
+func GetIPFamily(ip net.IP) int {
+       if len(ip) <= net.IPv4len {
+               return FAMILY_V4
+       }
+       if ip.To4() != nil {
+               return FAMILY_V4
+       }
+       return FAMILY_V6
+}
+
+var nativeEndian binary.ByteOrder
+
+// Get native endianness for the system
+func NativeEndian() binary.ByteOrder {
+       if nativeEndian == nil {
+               var x uint32 = 0x01020304
+               if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
+                       nativeEndian = binary.BigEndian
+               }
+               nativeEndian = binary.LittleEndian
+       }
+       return nativeEndian
+}
+
+// Byte swap a 16 bit value if we aren't big endian
+func Swap16(i uint16) uint16 {
+       if NativeEndian() == binary.BigEndian {
+               return i
+       }
+       return (i&0xff00)>>8 | (i&0xff)<<8
+}
+
+// Byte swap a 32 bit value if aren't big endian
+func Swap32(i uint32) uint32 {
+       if NativeEndian() == binary.BigEndian {
+               return i
+       }
+       return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24
+}
+
+type NetlinkRequestData interface {
+       Len() int
+       Serialize() []byte
+}
+
+// IfInfomsg is related to links, but it is used for list requests as well
+type IfInfomsg struct {
+       syscall.IfInfomsg
+}
+
+// Create an IfInfomsg with family specified
+func NewIfInfomsg(family int) *IfInfomsg {
+       return &IfInfomsg{
+               IfInfomsg: syscall.IfInfomsg{
+                       Family: uint8(family),
+               },
+       }
+}
+
+func DeserializeIfInfomsg(b []byte) *IfInfomsg {
+       return (*IfInfomsg)(unsafe.Pointer(&b[0:syscall.SizeofIfInfomsg][0]))
+}
+
+func (msg *IfInfomsg) Serialize() []byte {
+       return (*(*[syscall.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:]
+}
+
+func (msg *IfInfomsg) Len() int {
+       return syscall.SizeofIfInfomsg
+}
+
+func rtaAlignOf(attrlen int) int {
+       return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1)
+}
+
+func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
+       msg := NewIfInfomsg(family)
+       parent.children = append(parent.children, msg)
+       return msg
+}
+
+// Extend RtAttr to handle data and children
+type RtAttr struct {
+       syscall.RtAttr
+       Data     []byte
+       children []NetlinkRequestData
+}
+
+// Create a new Extended RtAttr object
+func NewRtAttr(attrType int, data []byte) *RtAttr {
+       return &RtAttr{
+               RtAttr: syscall.RtAttr{
+                       Type: uint16(attrType),
+               },
+               children: []NetlinkRequestData{},
+               Data:     data,
+       }
+}
+
+// Create a new RtAttr obj anc add it as a child of an existing object
+func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
+       attr := NewRtAttr(attrType, data)
+       parent.children = append(parent.children, attr)
+       return attr
+}
+
+func (a *RtAttr) Len() int {
+       if len(a.children) == 0 {
+               return (syscall.SizeofRtAttr + len(a.Data))
+       }
+
+       l := 0
+       for _, child := range a.children {
+               l += rtaAlignOf(child.Len())
+       }
+       l += syscall.SizeofRtAttr
+       return rtaAlignOf(l + len(a.Data))
+}
+
+// Serialize the RtAttr into a byte array
+// This can't ust unsafe.cast because it must iterate through children.
+func (a *RtAttr) Serialize() []byte {
+       native := NativeEndian()
+
+       length := a.Len()
+       buf := make([]byte, rtaAlignOf(length))
+
+       if a.Data != nil {
+               copy(buf[4:], a.Data)
+       } else {
+               next := 4
+               for _, child := range a.children {
+                       childBuf := child.Serialize()
+                       copy(buf[next:], childBuf)
+                       next += rtaAlignOf(len(childBuf))
+               }
+       }
+
+       if l := uint16(length); l != 0 {
+               native.PutUint16(buf[0:2], l)
+       }
+       native.PutUint16(buf[2:4], a.Type)
+       return buf
+}
+
+type NetlinkRequest struct {
+       syscall.NlMsghdr
+       Data []NetlinkRequestData
+}
+
+// Serialize the Netlink Request into a byte array
+func (msg *NetlinkRequest) Serialize() []byte {
+       length := syscall.SizeofNlMsghdr
+       dataBytes := make([][]byte, len(msg.Data))
+       for i, data := range msg.Data {
+               dataBytes[i] = data.Serialize()
+               length = length + len(dataBytes[i])
+       }
+       msg.Len = uint32(length)
+       b := make([]byte, length)
+       hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(msg)))[:]
+       next := syscall.SizeofNlMsghdr
+       copy(b[0:next], hdr)
+       for _, data := range dataBytes {
+               for _, dataByte := range data {
+                       b[next] = dataByte
+                       next = next + 1
+               }
+       }
+       return b
+}
+
+func (msg *NetlinkRequest) AddData(data NetlinkRequestData) {
+       if data != nil {
+               msg.Data = append(msg.Data, data)
+       }
+}
+
+// Execute the request against a the given sockType.
+// Returns a list of netlink messages in seriaized format, optionally filtered
+// by resType.
+func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
+       s, err := getNetlinkSocket(sockType)
+       if err != nil {
+               return nil, err
+       }
+       defer s.Close()
+
+       if err := s.Send(req); err != nil {
+               return nil, err
+       }
+
+       pid, err := s.GetPid()
+       if err != nil {
+               return nil, err
+       }
+
+       res := make([][]byte, 0)
+
+done:
+       for {
+               msgs, err := s.Recieve()
+               if err != nil {
+                       return nil, err
+               }
+               for _, m := range msgs {
+                       if m.Header.Seq != req.Seq {
+                               return nil, fmt.Errorf("Wrong Seq nr %d, expected 1", m.Header.Seq)
+                       }
+                       if m.Header.Pid != pid {
+                               return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
+                       }
+                       if m.Header.Type == syscall.NLMSG_DONE {
+                               break done
+                       }
+                       if m.Header.Type == syscall.NLMSG_ERROR {
+                               native := NativeEndian()
+                               error := int32(native.Uint32(m.Data[0:4]))
+                               if error == 0 {
+                                       break done
+                               }
+                               return nil, syscall.Errno(-error)
+                       }
+                       if resType != 0 && m.Header.Type != resType {
+                               continue
+                       }
+                       res = append(res, m.Data)
+                       if m.Header.Flags&syscall.NLM_F_MULTI == 0 {
+                               break done
+                       }
+               }
+       }
+       return res, nil
+}
+
+// Create a new netlink request from proto and flags
+// Note the Len value will be inaccurate once data is added until
+// the message is serialized
+func NewNetlinkRequest(proto, flags int) *NetlinkRequest {
+       return &NetlinkRequest{
+               NlMsghdr: syscall.NlMsghdr{
+                       Len:   uint32(syscall.SizeofNlMsghdr),
+                       Type:  uint16(proto),
+                       Flags: syscall.NLM_F_REQUEST | uint16(flags),
+                       Seq:   atomic.AddUint32(&nextSeqNr, 1),
+               },
+       }
+}
+
+type NetlinkSocket struct {
+       fd  int
+       lsa syscall.SockaddrNetlink
+}
+
+func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
+       fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol)
+       if err != nil {
+               return nil, err
+       }
+       s := &NetlinkSocket{
+               fd: fd,
+       }
+       s.lsa.Family = syscall.AF_NETLINK
+       if err := syscall.Bind(fd, &s.lsa); err != nil {
+               syscall.Close(fd)
+               return nil, err
+       }
+
+       return s, nil
+}
+
+// Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE)
+// and subscribe it to multicast groups passed in variable argument list.
+// Returns the netlink socket on whic hReceive() method can be called
+// to retrieve the messages from the kernel.
+func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) {
+       fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol)
+       if err != nil {
+               return nil, err
+       }
+       s := &NetlinkSocket{
+               fd: fd,
+       }
+       s.lsa.Family = syscall.AF_NETLINK
+
+       for _, g := range groups {
+               s.lsa.Groups |= (1 << (g - 1))
+       }
+
+       if err := syscall.Bind(fd, &s.lsa); err != nil {
+               syscall.Close(fd)
+               return nil, err
+       }
+
+       return s, nil
+}
+
+func (s *NetlinkSocket) Close() {
+       syscall.Close(s.fd)
+}
+
+func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
+       if err := syscall.Sendto(s.fd, request.Serialize(), 0, &s.lsa); err != nil {
+               return err
+       }
+       return nil
+}
+
+func (s *NetlinkSocket) Recieve() ([]syscall.NetlinkMessage, error) {
+       rb := make([]byte, syscall.Getpagesize())
+       nr, _, err := syscall.Recvfrom(s.fd, rb, 0)
+       if err != nil {
+               return nil, err
+       }
+       if nr < syscall.NLMSG_HDRLEN {
+               return nil, fmt.Errorf("Got short response from netlink")
+       }
+       rb = rb[:nr]
+       return syscall.ParseNetlinkMessage(rb)
+}
+
+func (s *NetlinkSocket) GetPid() (uint32, error) {
+       lsa, err := syscall.Getsockname(s.fd)
+       if err != nil {
+               return 0, err
+       }
+       switch v := lsa.(type) {
+       case *syscall.SockaddrNetlink:
+               return v.Pid, nil
+       }
+       return 0, fmt.Errorf("Wrong socket type")
+}
+
+func ZeroTerminated(s string) []byte {
+       bytes := make([]byte, len(s)+1)
+       for i := 0; i < len(s); i++ {
+               bytes[i] = s[i]
+       }
+       bytes[len(s)] = 0
+       return bytes
+}
+
+func NonZeroTerminated(s string) []byte {
+       bytes := make([]byte, len(s))
+       for i := 0; i < len(s); i++ {
+               bytes[i] = s[i]
+       }
+       return bytes
+}
+
+func BytesToString(b []byte) string {
+       n := bytes.Index(b, []byte{0})
+       return string(b[:n])
+}
+
+func Uint8Attr(v uint8) []byte {
+       return []byte{byte(v)}
+}
+
+func Uint16Attr(v uint16) []byte {
+       native := NativeEndian()
+       bytes := make([]byte, 2)
+       native.PutUint16(bytes, v)
+       return bytes
+}
+
+func Uint32Attr(v uint32) []byte {
+       native := NativeEndian()
+       bytes := make([]byte, 4)
+       native.PutUint32(bytes, v)
+       return bytes
+}
+
+func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) {
+       var attrs []syscall.NetlinkRouteAttr
+       for len(b) >= syscall.SizeofRtAttr {
+               a, vbuf, alen, err := netlinkRouteAttrAndValue(b)
+               if err != nil {
+                       return nil, err
+               }
+               ra := syscall.NetlinkRouteAttr{Attr: *a, Value: vbuf[:int(a.Len)-syscall.SizeofRtAttr]}
+               attrs = append(attrs, ra)
+               b = b[alen:]
+       }
+       return attrs, nil
+}
+
+func netlinkRouteAttrAndValue(b []byte) (*syscall.RtAttr, []byte, int, error) {
+       a := (*syscall.RtAttr)(unsafe.Pointer(&b[0]))
+       if int(a.Len) < syscall.SizeofRtAttr || int(a.Len) > len(b) {
+               return nil, nil, 0, syscall.EINVAL
+       }
+       return a, b[syscall.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux_test.go
new file mode 100644 (file)
index 0000000..4672684
--- /dev/null
@@ -0,0 +1,60 @@
+package nl
+
+import (
+       "bytes"
+       "crypto/rand"
+       "encoding/binary"
+       "reflect"
+       "syscall"
+       "testing"
+)
+
+type testSerializer interface {
+       serializeSafe() []byte
+       Serialize() []byte
+}
+
+func testDeserializeSerialize(t *testing.T, orig []byte, safemsg testSerializer, msg testSerializer) {
+       if !reflect.DeepEqual(safemsg, msg) {
+               t.Fatal("Deserialization failed.\n", safemsg, "\n", msg)
+       }
+       safe := msg.serializeSafe()
+       if !bytes.Equal(safe, orig) {
+               t.Fatal("Safe serialization failed.\n", safe, "\n", orig)
+       }
+       b := msg.Serialize()
+       if !bytes.Equal(b, safe) {
+               t.Fatal("Serialization failed.\n", b, "\n", safe)
+       }
+}
+
+func (msg *IfInfomsg) write(b []byte) {
+       native := NativeEndian()
+       b[0] = msg.Family
+       b[1] = msg.X__ifi_pad
+       native.PutUint16(b[2:4], msg.Type)
+       native.PutUint32(b[4:8], uint32(msg.Index))
+       native.PutUint32(b[8:12], msg.Flags)
+       native.PutUint32(b[12:16], msg.Change)
+}
+
+func (msg *IfInfomsg) serializeSafe() []byte {
+       length := syscall.SizeofIfInfomsg
+       b := make([]byte, length)
+       msg.write(b)
+       return b
+}
+
+func deserializeIfInfomsgSafe(b []byte) *IfInfomsg {
+       var msg = IfInfomsg{}
+       binary.Read(bytes.NewReader(b[0:syscall.SizeofIfInfomsg]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestIfInfomsgDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, syscall.SizeofIfInfomsg)
+       rand.Read(orig)
+       safemsg := deserializeIfInfomsgSafe(orig)
+       msg := DeserializeIfInfomsg(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go
new file mode 100644 (file)
index 0000000..5dde998
--- /dev/null
@@ -0,0 +1,33 @@
+package nl
+
+import (
+       "syscall"
+       "unsafe"
+)
+
+type RtMsg struct {
+       syscall.RtMsg
+}
+
+func NewRtMsg() *RtMsg {
+       return &RtMsg{
+               RtMsg: syscall.RtMsg{
+                       Table:    syscall.RT_TABLE_MAIN,
+                       Scope:    syscall.RT_SCOPE_UNIVERSE,
+                       Protocol: syscall.RTPROT_BOOT,
+                       Type:     syscall.RTN_UNICAST,
+               },
+       }
+}
+
+func (msg *RtMsg) Len() int {
+       return syscall.SizeofRtMsg
+}
+
+func DeserializeRtMsg(b []byte) *RtMsg {
+       return (*RtMsg)(unsafe.Pointer(&b[0:syscall.SizeofRtMsg][0]))
+}
+
+func (msg *RtMsg) Serialize() []byte {
+       return (*(*[syscall.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:]
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux_test.go
new file mode 100644 (file)
index 0000000..ba9c410
--- /dev/null
@@ -0,0 +1,43 @@
+package nl
+
+import (
+       "bytes"
+       "crypto/rand"
+       "encoding/binary"
+       "syscall"
+       "testing"
+)
+
+func (msg *RtMsg) write(b []byte) {
+       native := NativeEndian()
+       b[0] = msg.Family
+       b[1] = msg.Dst_len
+       b[2] = msg.Src_len
+       b[3] = msg.Tos
+       b[4] = msg.Table
+       b[5] = msg.Protocol
+       b[6] = msg.Scope
+       b[7] = msg.Type
+       native.PutUint32(b[8:12], msg.Flags)
+}
+
+func (msg *RtMsg) serializeSafe() []byte {
+       len := syscall.SizeofRtMsg
+       b := make([]byte, len)
+       msg.write(b)
+       return b
+}
+
+func deserializeRtMsgSafe(b []byte) *RtMsg {
+       var msg = RtMsg{}
+       binary.Read(bytes.NewReader(b[0:syscall.SizeofRtMsg]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestRtMsgDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, syscall.SizeofRtMsg)
+       rand.Read(orig)
+       safemsg := deserializeRtMsgSafe(orig)
+       msg := DeserializeRtMsg(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go
new file mode 100644 (file)
index 0000000..d953130
--- /dev/null
@@ -0,0 +1,259 @@
+package nl
+
+import (
+       "bytes"
+       "net"
+       "unsafe"
+)
+
+// Infinity for packet and byte counts
+const (
+       XFRM_INF = ^uint64(0)
+)
+
+// Message Types
+const (
+       XFRM_MSG_BASE        = 0x10
+       XFRM_MSG_NEWSA       = 0x10
+       XFRM_MSG_DELSA       = 0x11
+       XFRM_MSG_GETSA       = 0x12
+       XFRM_MSG_NEWPOLICY   = 0x13
+       XFRM_MSG_DELPOLICY   = 0x14
+       XFRM_MSG_GETPOLICY   = 0x15
+       XFRM_MSG_ALLOCSPI    = 0x16
+       XFRM_MSG_ACQUIRE     = 0x17
+       XFRM_MSG_EXPIRE      = 0x18
+       XFRM_MSG_UPDPOLICY   = 0x19
+       XFRM_MSG_UPDSA       = 0x1a
+       XFRM_MSG_POLEXPIRE   = 0x1b
+       XFRM_MSG_FLUSHSA     = 0x1c
+       XFRM_MSG_FLUSHPOLICY = 0x1d
+       XFRM_MSG_NEWAE       = 0x1e
+       XFRM_MSG_GETAE       = 0x1f
+       XFRM_MSG_REPORT      = 0x20
+       XFRM_MSG_MIGRATE     = 0x21
+       XFRM_MSG_NEWSADINFO  = 0x22
+       XFRM_MSG_GETSADINFO  = 0x23
+       XFRM_MSG_NEWSPDINFO  = 0x24
+       XFRM_MSG_GETSPDINFO  = 0x25
+       XFRM_MSG_MAPPING     = 0x26
+       XFRM_MSG_MAX         = 0x26
+       XFRM_NR_MSGTYPES     = 0x17
+)
+
+// Attribute types
+const (
+       /* Netlink message attributes.  */
+       XFRMA_UNSPEC         = 0x00
+       XFRMA_ALG_AUTH       = 0x01 /* struct xfrm_algo */
+       XFRMA_ALG_CRYPT      = 0x02 /* struct xfrm_algo */
+       XFRMA_ALG_COMP       = 0x03 /* struct xfrm_algo */
+       XFRMA_ENCAP          = 0x04 /* struct xfrm_algo + struct xfrm_encap_tmpl */
+       XFRMA_TMPL           = 0x05 /* 1 or more struct xfrm_user_tmpl */
+       XFRMA_SA             = 0x06 /* struct xfrm_usersa_info  */
+       XFRMA_POLICY         = 0x07 /* struct xfrm_userpolicy_info */
+       XFRMA_SEC_CTX        = 0x08 /* struct xfrm_sec_ctx */
+       XFRMA_LTIME_VAL      = 0x09
+       XFRMA_REPLAY_VAL     = 0x0a
+       XFRMA_REPLAY_THRESH  = 0x0b
+       XFRMA_ETIMER_THRESH  = 0x0c
+       XFRMA_SRCADDR        = 0x0d /* xfrm_address_t */
+       XFRMA_COADDR         = 0x0e /* xfrm_address_t */
+       XFRMA_LASTUSED       = 0x0f /* unsigned long  */
+       XFRMA_POLICY_TYPE    = 0x10 /* struct xfrm_userpolicy_type */
+       XFRMA_MIGRATE        = 0x11
+       XFRMA_ALG_AEAD       = 0x12 /* struct xfrm_algo_aead */
+       XFRMA_KMADDRESS      = 0x13 /* struct xfrm_user_kmaddress */
+       XFRMA_ALG_AUTH_TRUNC = 0x14 /* struct xfrm_algo_auth */
+       XFRMA_MARK           = 0x15 /* struct xfrm_mark */
+       XFRMA_TFCPAD         = 0x16 /* __u32 */
+       XFRMA_REPLAY_ESN_VAL = 0x17 /* struct xfrm_replay_esn */
+       XFRMA_SA_EXTRA_FLAGS = 0x18 /* __u32 */
+       XFRMA_MAX            = 0x18
+)
+
+const (
+       SizeofXfrmAddress     = 0x10
+       SizeofXfrmSelector    = 0x38
+       SizeofXfrmLifetimeCfg = 0x40
+       SizeofXfrmLifetimeCur = 0x20
+       SizeofXfrmId          = 0x18
+)
+
+// typedef union {
+//   __be32    a4;
+//   __be32    a6[4];
+// } xfrm_address_t;
+
+type XfrmAddress [SizeofXfrmAddress]byte
+
+func (x *XfrmAddress) ToIP() net.IP {
+       var empty = [12]byte{}
+       ip := make(net.IP, net.IPv6len)
+       if bytes.Equal(x[4:16], empty[:]) {
+               ip[10] = 0xff
+               ip[11] = 0xff
+               copy(ip[12:16], x[0:4])
+       } else {
+               copy(ip[:], x[:])
+       }
+       return ip
+}
+
+func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet {
+       ip := x.ToIP()
+       if GetIPFamily(ip) == FAMILY_V4 {
+               return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)}
+       } else {
+               return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 128)}
+       }
+}
+
+func (x *XfrmAddress) FromIP(ip net.IP) {
+       var empty = [16]byte{}
+       if len(ip) < net.IPv4len {
+               copy(x[4:16], empty[:])
+       } else if GetIPFamily(ip) == FAMILY_V4 {
+               copy(x[0:4], ip.To4()[0:4])
+               copy(x[4:16], empty[:12])
+       } else {
+               copy(x[0:16], ip.To16()[0:16])
+       }
+}
+
+func DeserializeXfrmAddress(b []byte) *XfrmAddress {
+       return (*XfrmAddress)(unsafe.Pointer(&b[0:SizeofXfrmAddress][0]))
+}
+
+func (msg *XfrmAddress) Serialize() []byte {
+       return (*(*[SizeofXfrmAddress]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_selector {
+//   xfrm_address_t  daddr;
+//   xfrm_address_t  saddr;
+//   __be16  dport;
+//   __be16  dport_mask;
+//   __be16  sport;
+//   __be16  sport_mask;
+//   __u16 family;
+//   __u8  prefixlen_d;
+//   __u8  prefixlen_s;
+//   __u8  proto;
+//   int ifindex;
+//   __kernel_uid32_t  user;
+// };
+
+type XfrmSelector struct {
+       Daddr      XfrmAddress
+       Saddr      XfrmAddress
+       Dport      uint16 // big endian
+       DportMask  uint16 // big endian
+       Sport      uint16 // big endian
+       SportMask  uint16 // big endian
+       Family     uint16
+       PrefixlenD uint8
+       PrefixlenS uint8
+       Proto      uint8
+       Pad        [3]byte
+       Ifindex    int32
+       User       uint32
+}
+
+func (msg *XfrmSelector) Len() int {
+       return SizeofXfrmSelector
+}
+
+func DeserializeXfrmSelector(b []byte) *XfrmSelector {
+       return (*XfrmSelector)(unsafe.Pointer(&b[0:SizeofXfrmSelector][0]))
+}
+
+func (msg *XfrmSelector) Serialize() []byte {
+       return (*(*[SizeofXfrmSelector]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_lifetime_cfg {
+//   __u64 soft_byte_limit;
+//   __u64 hard_byte_limit;
+//   __u64 soft_packet_limit;
+//   __u64 hard_packet_limit;
+//   __u64 soft_add_expires_seconds;
+//   __u64 hard_add_expires_seconds;
+//   __u64 soft_use_expires_seconds;
+//   __u64 hard_use_expires_seconds;
+// };
+//
+
+type XfrmLifetimeCfg struct {
+       SoftByteLimit         uint64
+       HardByteLimit         uint64
+       SoftPacketLimit       uint64
+       HardPacketLimit       uint64
+       SoftAddExpiresSeconds uint64
+       HardAddExpiresSeconds uint64
+       SoftUseExpiresSeconds uint64
+       HardUseExpiresSeconds uint64
+}
+
+func (msg *XfrmLifetimeCfg) Len() int {
+       return SizeofXfrmLifetimeCfg
+}
+
+func DeserializeXfrmLifetimeCfg(b []byte) *XfrmLifetimeCfg {
+       return (*XfrmLifetimeCfg)(unsafe.Pointer(&b[0:SizeofXfrmLifetimeCfg][0]))
+}
+
+func (msg *XfrmLifetimeCfg) Serialize() []byte {
+       return (*(*[SizeofXfrmLifetimeCfg]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_lifetime_cur {
+//   __u64 bytes;
+//   __u64 packets;
+//   __u64 add_time;
+//   __u64 use_time;
+// };
+
+type XfrmLifetimeCur struct {
+       Bytes   uint64
+       Packets uint64
+       AddTime uint64
+       UseTime uint64
+}
+
+func (msg *XfrmLifetimeCur) Len() int {
+       return SizeofXfrmLifetimeCur
+}
+
+func DeserializeXfrmLifetimeCur(b []byte) *XfrmLifetimeCur {
+       return (*XfrmLifetimeCur)(unsafe.Pointer(&b[0:SizeofXfrmLifetimeCur][0]))
+}
+
+func (msg *XfrmLifetimeCur) Serialize() []byte {
+       return (*(*[SizeofXfrmLifetimeCur]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_id {
+//   xfrm_address_t  daddr;
+//   __be32    spi;
+//   __u8    proto;
+// };
+
+type XfrmId struct {
+       Daddr XfrmAddress
+       Spi   uint32 // big endian
+       Proto uint8
+       Pad   [3]byte
+}
+
+func (msg *XfrmId) Len() int {
+       return SizeofXfrmId
+}
+
+func DeserializeXfrmId(b []byte) *XfrmId {
+       return (*XfrmId)(unsafe.Pointer(&b[0:SizeofXfrmId][0]))
+}
+
+func (msg *XfrmId) Serialize() []byte {
+       return (*(*[SizeofXfrmId]byte)(unsafe.Pointer(msg)))[:]
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux_test.go
new file mode 100644 (file)
index 0000000..04404d7
--- /dev/null
@@ -0,0 +1,161 @@
+package nl
+
+import (
+       "bytes"
+       "crypto/rand"
+       "encoding/binary"
+       "testing"
+)
+
+func (msg *XfrmAddress) write(b []byte) {
+       copy(b[0:SizeofXfrmAddress], msg[:])
+}
+
+func (msg *XfrmAddress) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmAddress)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmAddressSafe(b []byte) *XfrmAddress {
+       var msg = XfrmAddress{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmAddress]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmAddressDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmAddress)
+       rand.Read(orig)
+       safemsg := deserializeXfrmAddressSafe(orig)
+       msg := DeserializeXfrmAddress(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmSelector) write(b []byte) {
+       const AddrEnd = SizeofXfrmAddress * 2
+       native := NativeEndian()
+       msg.Daddr.write(b[0:SizeofXfrmAddress])
+       msg.Saddr.write(b[SizeofXfrmAddress:AddrEnd])
+       native.PutUint16(b[AddrEnd:AddrEnd+2], msg.Dport)
+       native.PutUint16(b[AddrEnd+2:AddrEnd+4], msg.DportMask)
+       native.PutUint16(b[AddrEnd+4:AddrEnd+6], msg.Sport)
+       native.PutUint16(b[AddrEnd+6:AddrEnd+8], msg.SportMask)
+       native.PutUint16(b[AddrEnd+8:AddrEnd+10], msg.Family)
+       b[AddrEnd+10] = msg.PrefixlenD
+       b[AddrEnd+11] = msg.PrefixlenS
+       b[AddrEnd+12] = msg.Proto
+       copy(b[AddrEnd+13:AddrEnd+16], msg.Pad[:])
+       native.PutUint32(b[AddrEnd+16:AddrEnd+20], uint32(msg.Ifindex))
+       native.PutUint32(b[AddrEnd+20:AddrEnd+24], msg.User)
+}
+
+func (msg *XfrmSelector) serializeSafe() []byte {
+       length := SizeofXfrmSelector
+       b := make([]byte, length)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmSelectorSafe(b []byte) *XfrmSelector {
+       var msg = XfrmSelector{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmSelector]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmSelectorDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmSelector)
+       rand.Read(orig)
+       safemsg := deserializeXfrmSelectorSafe(orig)
+       msg := DeserializeXfrmSelector(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmLifetimeCfg) write(b []byte) {
+       native := NativeEndian()
+       native.PutUint64(b[0:8], msg.SoftByteLimit)
+       native.PutUint64(b[8:16], msg.HardByteLimit)
+       native.PutUint64(b[16:24], msg.SoftPacketLimit)
+       native.PutUint64(b[24:32], msg.HardPacketLimit)
+       native.PutUint64(b[32:40], msg.SoftAddExpiresSeconds)
+       native.PutUint64(b[40:48], msg.HardAddExpiresSeconds)
+       native.PutUint64(b[48:56], msg.SoftUseExpiresSeconds)
+       native.PutUint64(b[56:64], msg.HardUseExpiresSeconds)
+}
+
+func (msg *XfrmLifetimeCfg) serializeSafe() []byte {
+       length := SizeofXfrmLifetimeCfg
+       b := make([]byte, length)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmLifetimeCfgSafe(b []byte) *XfrmLifetimeCfg {
+       var msg = XfrmLifetimeCfg{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmLifetimeCfg]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmLifetimeCfgDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmLifetimeCfg)
+       rand.Read(orig)
+       safemsg := deserializeXfrmLifetimeCfgSafe(orig)
+       msg := DeserializeXfrmLifetimeCfg(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmLifetimeCur) write(b []byte) {
+       native := NativeEndian()
+       native.PutUint64(b[0:8], msg.Bytes)
+       native.PutUint64(b[8:16], msg.Packets)
+       native.PutUint64(b[16:24], msg.AddTime)
+       native.PutUint64(b[24:32], msg.UseTime)
+}
+
+func (msg *XfrmLifetimeCur) serializeSafe() []byte {
+       length := SizeofXfrmLifetimeCur
+       b := make([]byte, length)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmLifetimeCurSafe(b []byte) *XfrmLifetimeCur {
+       var msg = XfrmLifetimeCur{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmLifetimeCur]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmLifetimeCurDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmLifetimeCur)
+       rand.Read(orig)
+       safemsg := deserializeXfrmLifetimeCurSafe(orig)
+       msg := DeserializeXfrmLifetimeCur(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmId) write(b []byte) {
+       native := NativeEndian()
+       msg.Daddr.write(b[0:SizeofXfrmAddress])
+       native.PutUint32(b[SizeofXfrmAddress:SizeofXfrmAddress+4], msg.Spi)
+       b[SizeofXfrmAddress+4] = msg.Proto
+       copy(b[SizeofXfrmAddress+5:SizeofXfrmAddress+8], msg.Pad[:])
+}
+
+func (msg *XfrmId) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmId)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmIdSafe(b []byte) *XfrmId {
+       var msg = XfrmId{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmId]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmIdDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmId)
+       rand.Read(orig)
+       safemsg := deserializeXfrmIdSafe(orig)
+       msg := DeserializeXfrmId(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go
new file mode 100644 (file)
index 0000000..66f7e03
--- /dev/null
@@ -0,0 +1,119 @@
+package nl
+
+import (
+       "unsafe"
+)
+
+const (
+       SizeofXfrmUserpolicyId   = 0x40
+       SizeofXfrmUserpolicyInfo = 0xa8
+       SizeofXfrmUserTmpl       = 0x40
+)
+
+// struct xfrm_userpolicy_id {
+//   struct xfrm_selector    sel;
+//   __u32       index;
+//   __u8        dir;
+// };
+//
+
+type XfrmUserpolicyId struct {
+       Sel   XfrmSelector
+       Index uint32
+       Dir   uint8
+       Pad   [3]byte
+}
+
+func (msg *XfrmUserpolicyId) Len() int {
+       return SizeofXfrmUserpolicyId
+}
+
+func DeserializeXfrmUserpolicyId(b []byte) *XfrmUserpolicyId {
+       return (*XfrmUserpolicyId)(unsafe.Pointer(&b[0:SizeofXfrmUserpolicyId][0]))
+}
+
+func (msg *XfrmUserpolicyId) Serialize() []byte {
+       return (*(*[SizeofXfrmUserpolicyId]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_userpolicy_info {
+//   struct xfrm_selector    sel;
+//   struct xfrm_lifetime_cfg  lft;
+//   struct xfrm_lifetime_cur  curlft;
+//   __u32       priority;
+//   __u32       index;
+//   __u8        dir;
+//   __u8        action;
+// #define XFRM_POLICY_ALLOW 0
+// #define XFRM_POLICY_BLOCK 1
+//   __u8        flags;
+// #define XFRM_POLICY_LOCALOK 1 /* Allow user to override global policy */
+//   /* Automatically expand selector to include matching ICMP payloads. */
+// #define XFRM_POLICY_ICMP  2
+//   __u8        share;
+// };
+
+type XfrmUserpolicyInfo struct {
+       Sel      XfrmSelector
+       Lft      XfrmLifetimeCfg
+       Curlft   XfrmLifetimeCur
+       Priority uint32
+       Index    uint32
+       Dir      uint8
+       Action   uint8
+       Flags    uint8
+       Share    uint8
+       Pad      [4]byte
+}
+
+func (msg *XfrmUserpolicyInfo) Len() int {
+       return SizeofXfrmUserpolicyInfo
+}
+
+func DeserializeXfrmUserpolicyInfo(b []byte) *XfrmUserpolicyInfo {
+       return (*XfrmUserpolicyInfo)(unsafe.Pointer(&b[0:SizeofXfrmUserpolicyInfo][0]))
+}
+
+func (msg *XfrmUserpolicyInfo) Serialize() []byte {
+       return (*(*[SizeofXfrmUserpolicyInfo]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_user_tmpl {
+//   struct xfrm_id    id;
+//   __u16     family;
+//   xfrm_address_t    saddr;
+//   __u32     reqid;
+//   __u8      mode;
+//   __u8      share;
+//   __u8      optional;
+//   __u32     aalgos;
+//   __u32     ealgos;
+//   __u32     calgos;
+// }
+
+type XfrmUserTmpl struct {
+       XfrmId   XfrmId
+       Family   uint16
+       Pad1     [2]byte
+       Saddr    XfrmAddress
+       Reqid    uint32
+       Mode     uint8
+       Share    uint8
+       Optional uint8
+       Pad2     byte
+       Aalgos   uint32
+       Ealgos   uint32
+       Calgos   uint32
+}
+
+func (msg *XfrmUserTmpl) Len() int {
+       return SizeofXfrmUserTmpl
+}
+
+func DeserializeXfrmUserTmpl(b []byte) *XfrmUserTmpl {
+       return (*XfrmUserTmpl)(unsafe.Pointer(&b[0:SizeofXfrmUserTmpl][0]))
+}
+
+func (msg *XfrmUserTmpl) Serialize() []byte {
+       return (*(*[SizeofXfrmUserTmpl]byte)(unsafe.Pointer(msg)))[:]
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_policy_linux_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_policy_linux_test.go
new file mode 100644 (file)
index 0000000..08a604b
--- /dev/null
@@ -0,0 +1,109 @@
+package nl
+
+import (
+       "bytes"
+       "crypto/rand"
+       "encoding/binary"
+       "testing"
+)
+
+func (msg *XfrmUserpolicyId) write(b []byte) {
+       native := NativeEndian()
+       msg.Sel.write(b[0:SizeofXfrmSelector])
+       native.PutUint32(b[SizeofXfrmSelector:SizeofXfrmSelector+4], msg.Index)
+       b[SizeofXfrmSelector+4] = msg.Dir
+       copy(b[SizeofXfrmSelector+5:SizeofXfrmSelector+8], msg.Pad[:])
+}
+
+func (msg *XfrmUserpolicyId) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmUserpolicyId)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmUserpolicyIdSafe(b []byte) *XfrmUserpolicyId {
+       var msg = XfrmUserpolicyId{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmUserpolicyId]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmUserpolicyIdDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmUserpolicyId)
+       rand.Read(orig)
+       safemsg := deserializeXfrmUserpolicyIdSafe(orig)
+       msg := DeserializeXfrmUserpolicyId(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmUserpolicyInfo) write(b []byte) {
+       const CfgEnd = SizeofXfrmSelector + SizeofXfrmLifetimeCfg
+       const CurEnd = CfgEnd + SizeofXfrmLifetimeCur
+       native := NativeEndian()
+       msg.Sel.write(b[0:SizeofXfrmSelector])
+       msg.Lft.write(b[SizeofXfrmSelector:CfgEnd])
+       msg.Curlft.write(b[CfgEnd:CurEnd])
+       native.PutUint32(b[CurEnd:CurEnd+4], msg.Priority)
+       native.PutUint32(b[CurEnd+4:CurEnd+8], msg.Index)
+       b[CurEnd+8] = msg.Dir
+       b[CurEnd+9] = msg.Action
+       b[CurEnd+10] = msg.Flags
+       b[CurEnd+11] = msg.Share
+       copy(b[CurEnd+12:CurEnd+16], msg.Pad[:])
+}
+
+func (msg *XfrmUserpolicyInfo) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmUserpolicyInfo)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmUserpolicyInfoSafe(b []byte) *XfrmUserpolicyInfo {
+       var msg = XfrmUserpolicyInfo{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmUserpolicyInfo]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmUserpolicyInfoDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmUserpolicyInfo)
+       rand.Read(orig)
+       safemsg := deserializeXfrmUserpolicyInfoSafe(orig)
+       msg := DeserializeXfrmUserpolicyInfo(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmUserTmpl) write(b []byte) {
+       const AddrEnd = SizeofXfrmId + 4 + SizeofXfrmAddress
+       native := NativeEndian()
+       msg.XfrmId.write(b[0:SizeofXfrmId])
+       native.PutUint16(b[SizeofXfrmId:SizeofXfrmId+2], msg.Family)
+       copy(b[SizeofXfrmId+2:SizeofXfrmId+4], msg.Pad1[:])
+       msg.Saddr.write(b[SizeofXfrmId+4 : AddrEnd])
+       native.PutUint32(b[AddrEnd:AddrEnd+4], msg.Reqid)
+       b[AddrEnd+4] = msg.Mode
+       b[AddrEnd+5] = msg.Share
+       b[AddrEnd+6] = msg.Optional
+       b[AddrEnd+7] = msg.Pad2
+       native.PutUint32(b[AddrEnd+8:AddrEnd+12], msg.Aalgos)
+       native.PutUint32(b[AddrEnd+12:AddrEnd+16], msg.Ealgos)
+       native.PutUint32(b[AddrEnd+16:AddrEnd+20], msg.Calgos)
+}
+
+func (msg *XfrmUserTmpl) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmUserTmpl)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmUserTmplSafe(b []byte) *XfrmUserTmpl {
+       var msg = XfrmUserTmpl{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmUserTmpl]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmUserTmplDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmUserTmpl)
+       rand.Read(orig)
+       safemsg := deserializeXfrmUserTmplSafe(orig)
+       msg := DeserializeXfrmUserTmpl(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_state_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_state_linux.go
new file mode 100644 (file)
index 0000000..4876ce4
--- /dev/null
@@ -0,0 +1,221 @@
+package nl
+
+import (
+       "unsafe"
+)
+
+const (
+       SizeofXfrmUsersaId   = 0x18
+       SizeofXfrmStats      = 0x0c
+       SizeofXfrmUsersaInfo = 0xe0
+       SizeofXfrmAlgo       = 0x44
+       SizeofXfrmAlgoAuth   = 0x48
+       SizeofXfrmEncapTmpl  = 0x18
+)
+
+// struct xfrm_usersa_id {
+//   xfrm_address_t      daddr;
+//   __be32        spi;
+//   __u16       family;
+//   __u8        proto;
+// };
+
+type XfrmUsersaId struct {
+       Daddr  XfrmAddress
+       Spi    uint32 // big endian
+       Family uint16
+       Proto  uint8
+       Pad    byte
+}
+
+func (msg *XfrmUsersaId) Len() int {
+       return SizeofXfrmUsersaId
+}
+
+func DeserializeXfrmUsersaId(b []byte) *XfrmUsersaId {
+       return (*XfrmUsersaId)(unsafe.Pointer(&b[0:SizeofXfrmUsersaId][0]))
+}
+
+func (msg *XfrmUsersaId) Serialize() []byte {
+       return (*(*[SizeofXfrmUsersaId]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_stats {
+//   __u32 replay_window;
+//   __u32 replay;
+//   __u32 integrity_failed;
+// };
+
+type XfrmStats struct {
+       ReplayWindow    uint32
+       Replay          uint32
+       IntegrityFailed uint32
+}
+
+func (msg *XfrmStats) Len() int {
+       return SizeofXfrmStats
+}
+
+func DeserializeXfrmStats(b []byte) *XfrmStats {
+       return (*XfrmStats)(unsafe.Pointer(&b[0:SizeofXfrmStats][0]))
+}
+
+func (msg *XfrmStats) Serialize() []byte {
+       return (*(*[SizeofXfrmStats]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_usersa_info {
+//   struct xfrm_selector    sel;
+//   struct xfrm_id      id;
+//   xfrm_address_t      saddr;
+//   struct xfrm_lifetime_cfg  lft;
+//   struct xfrm_lifetime_cur  curlft;
+//   struct xfrm_stats   stats;
+//   __u32       seq;
+//   __u32       reqid;
+//   __u16       family;
+//   __u8        mode;   /* XFRM_MODE_xxx */
+//   __u8        replay_window;
+//   __u8        flags;
+// #define XFRM_STATE_NOECN  1
+// #define XFRM_STATE_DECAP_DSCP 2
+// #define XFRM_STATE_NOPMTUDISC 4
+// #define XFRM_STATE_WILDRECV 8
+// #define XFRM_STATE_ICMP   16
+// #define XFRM_STATE_AF_UNSPEC  32
+// #define XFRM_STATE_ALIGN4 64
+// #define XFRM_STATE_ESN    128
+// };
+//
+// #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1
+//
+
+type XfrmUsersaInfo struct {
+       Sel          XfrmSelector
+       Id           XfrmId
+       Saddr        XfrmAddress
+       Lft          XfrmLifetimeCfg
+       Curlft       XfrmLifetimeCur
+       Stats        XfrmStats
+       Seq          uint32
+       Reqid        uint32
+       Family       uint16
+       Mode         uint8
+       ReplayWindow uint8
+       Flags        uint8
+       Pad          [7]byte
+}
+
+func (msg *XfrmUsersaInfo) Len() int {
+       return SizeofXfrmUsersaInfo
+}
+
+func DeserializeXfrmUsersaInfo(b []byte) *XfrmUsersaInfo {
+       return (*XfrmUsersaInfo)(unsafe.Pointer(&b[0:SizeofXfrmUsersaInfo][0]))
+}
+
+func (msg *XfrmUsersaInfo) Serialize() []byte {
+       return (*(*[SizeofXfrmUsersaInfo]byte)(unsafe.Pointer(msg)))[:]
+}
+
+// struct xfrm_algo {
+//   char    alg_name[64];
+//   unsigned int  alg_key_len;    /* in bits */
+//   char    alg_key[0];
+// };
+
+type XfrmAlgo struct {
+       AlgName   [64]byte
+       AlgKeyLen uint32
+       AlgKey    []byte
+}
+
+func (msg *XfrmAlgo) Len() int {
+       return SizeofXfrmAlgo + int(msg.AlgKeyLen/8)
+}
+
+func DeserializeXfrmAlgo(b []byte) *XfrmAlgo {
+       ret := XfrmAlgo{}
+       copy(ret.AlgName[:], b[0:64])
+       ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64]))
+       ret.AlgKey = b[68:ret.Len()]
+       return &ret
+}
+
+func (msg *XfrmAlgo) Serialize() []byte {
+       b := make([]byte, msg.Len())
+       copy(b[0:64], msg.AlgName[:])
+       copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:])
+       copy(b[68:msg.Len()], msg.AlgKey[:])
+       return b
+}
+
+// struct xfrm_algo_auth {
+//   char    alg_name[64];
+//   unsigned int  alg_key_len;    /* in bits */
+//   unsigned int  alg_trunc_len;  /* in bits */
+//   char    alg_key[0];
+// };
+
+type XfrmAlgoAuth struct {
+       AlgName     [64]byte
+       AlgKeyLen   uint32
+       AlgTruncLen uint32
+       AlgKey      []byte
+}
+
+func (msg *XfrmAlgoAuth) Len() int {
+       return SizeofXfrmAlgoAuth + int(msg.AlgKeyLen/8)
+}
+
+func DeserializeXfrmAlgoAuth(b []byte) *XfrmAlgoAuth {
+       ret := XfrmAlgoAuth{}
+       copy(ret.AlgName[:], b[0:64])
+       ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64]))
+       ret.AlgTruncLen = *(*uint32)(unsafe.Pointer(&b[68]))
+       ret.AlgKey = b[72:ret.Len()]
+       return &ret
+}
+
+func (msg *XfrmAlgoAuth) Serialize() []byte {
+       b := make([]byte, msg.Len())
+       copy(b[0:64], msg.AlgName[:])
+       copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:])
+       copy(b[68:72], (*(*[4]byte)(unsafe.Pointer(&msg.AlgTruncLen)))[:])
+       copy(b[72:msg.Len()], msg.AlgKey[:])
+       return b
+}
+
+// struct xfrm_algo_aead {
+//   char    alg_name[64];
+//   unsigned int  alg_key_len;  /* in bits */
+//   unsigned int  alg_icv_len;  /* in bits */
+//   char    alg_key[0];
+// }
+
+// struct xfrm_encap_tmpl {
+//   __u16   encap_type;
+//   __be16    encap_sport;
+//   __be16    encap_dport;
+//   xfrm_address_t  encap_oa;
+// };
+
+type XfrmEncapTmpl struct {
+       EncapType  uint16
+       EncapSport uint16 // big endian
+       EncapDport uint16 // big endian
+       Pad        [2]byte
+       EncapOa    XfrmAddress
+}
+
+func (msg *XfrmEncapTmpl) Len() int {
+       return SizeofXfrmEncapTmpl
+}
+
+func DeserializeXfrmEncapTmpl(b []byte) *XfrmEncapTmpl {
+       return (*XfrmEncapTmpl)(unsafe.Pointer(&b[0:SizeofXfrmEncapTmpl][0]))
+}
+
+func (msg *XfrmEncapTmpl) Serialize() []byte {
+       return (*(*[SizeofXfrmEncapTmpl]byte)(unsafe.Pointer(msg)))[:]
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_state_linux_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_state_linux_test.go
new file mode 100644 (file)
index 0000000..d5281e9
--- /dev/null
@@ -0,0 +1,207 @@
+package nl
+
+import (
+       "bytes"
+       "crypto/rand"
+       "encoding/binary"
+       "testing"
+)
+
+func (msg *XfrmUsersaId) write(b []byte) {
+       native := NativeEndian()
+       msg.Daddr.write(b[0:SizeofXfrmAddress])
+       native.PutUint32(b[SizeofXfrmAddress:SizeofXfrmAddress+4], msg.Spi)
+       native.PutUint16(b[SizeofXfrmAddress+4:SizeofXfrmAddress+6], msg.Family)
+       b[SizeofXfrmAddress+6] = msg.Proto
+       b[SizeofXfrmAddress+7] = msg.Pad
+}
+
+func (msg *XfrmUsersaId) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmUsersaId)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmUsersaIdSafe(b []byte) *XfrmUsersaId {
+       var msg = XfrmUsersaId{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmUsersaId]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmUsersaIdDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmUsersaId)
+       rand.Read(orig)
+       safemsg := deserializeXfrmUsersaIdSafe(orig)
+       msg := DeserializeXfrmUsersaId(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmStats) write(b []byte) {
+       native := NativeEndian()
+       native.PutUint32(b[0:4], msg.ReplayWindow)
+       native.PutUint32(b[4:8], msg.Replay)
+       native.PutUint32(b[8:12], msg.IntegrityFailed)
+}
+
+func (msg *XfrmStats) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmStats)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmStatsSafe(b []byte) *XfrmStats {
+       var msg = XfrmStats{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmStats]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmStatsDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmStats)
+       rand.Read(orig)
+       safemsg := deserializeXfrmStatsSafe(orig)
+       msg := DeserializeXfrmStats(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmUsersaInfo) write(b []byte) {
+       const IdEnd = SizeofXfrmSelector + SizeofXfrmId
+       const AddressEnd = IdEnd + SizeofXfrmAddress
+       const CfgEnd = AddressEnd + SizeofXfrmLifetimeCfg
+       const CurEnd = CfgEnd + SizeofXfrmLifetimeCur
+       const StatsEnd = CurEnd + SizeofXfrmStats
+       native := NativeEndian()
+       msg.Sel.write(b[0:SizeofXfrmSelector])
+       msg.Id.write(b[SizeofXfrmSelector:IdEnd])
+       msg.Saddr.write(b[IdEnd:AddressEnd])
+       msg.Lft.write(b[AddressEnd:CfgEnd])
+       msg.Curlft.write(b[CfgEnd:CurEnd])
+       msg.Stats.write(b[CurEnd:StatsEnd])
+       native.PutUint32(b[StatsEnd:StatsEnd+4], msg.Seq)
+       native.PutUint32(b[StatsEnd+4:StatsEnd+8], msg.Reqid)
+       native.PutUint16(b[StatsEnd+8:StatsEnd+10], msg.Family)
+       b[StatsEnd+10] = msg.Mode
+       b[StatsEnd+11] = msg.ReplayWindow
+       b[StatsEnd+12] = msg.Flags
+       copy(b[StatsEnd+13:StatsEnd+20], msg.Pad[:])
+}
+
+func (msg *XfrmUsersaInfo) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmUsersaInfo)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmUsersaInfoSafe(b []byte) *XfrmUsersaInfo {
+       var msg = XfrmUsersaInfo{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmUsersaInfo]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmUsersaInfoDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmUsersaInfo)
+       rand.Read(orig)
+       safemsg := deserializeXfrmUsersaInfoSafe(orig)
+       msg := DeserializeXfrmUsersaInfo(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmAlgo) write(b []byte) {
+       native := NativeEndian()
+       copy(b[0:64], msg.AlgName[:])
+       native.PutUint32(b[64:68], msg.AlgKeyLen)
+       copy(b[68:msg.Len()], msg.AlgKey[:])
+}
+
+func (msg *XfrmAlgo) serializeSafe() []byte {
+       b := make([]byte, msg.Len())
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmAlgoSafe(b []byte) *XfrmAlgo {
+       var msg = XfrmAlgo{}
+       copy(msg.AlgName[:], b[0:64])
+       binary.Read(bytes.NewReader(b[64:68]), NativeEndian(), &msg.AlgKeyLen)
+       msg.AlgKey = b[68:msg.Len()]
+       return &msg
+}
+
+func TestXfrmAlgoDeserializeSerialize(t *testing.T) {
+       // use a 32 byte key len
+       var orig = make([]byte, SizeofXfrmAlgo+32)
+       rand.Read(orig)
+       // set the key len to 256 bits
+       orig[64] = 0
+       orig[65] = 1
+       orig[66] = 0
+       orig[67] = 0
+       safemsg := deserializeXfrmAlgoSafe(orig)
+       msg := DeserializeXfrmAlgo(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmAlgoAuth) write(b []byte) {
+       native := NativeEndian()
+       copy(b[0:64], msg.AlgName[:])
+       native.PutUint32(b[64:68], msg.AlgKeyLen)
+       native.PutUint32(b[68:72], msg.AlgTruncLen)
+       copy(b[72:msg.Len()], msg.AlgKey[:])
+}
+
+func (msg *XfrmAlgoAuth) serializeSafe() []byte {
+       b := make([]byte, msg.Len())
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmAlgoAuthSafe(b []byte) *XfrmAlgoAuth {
+       var msg = XfrmAlgoAuth{}
+       copy(msg.AlgName[:], b[0:64])
+       binary.Read(bytes.NewReader(b[64:68]), NativeEndian(), &msg.AlgKeyLen)
+       binary.Read(bytes.NewReader(b[68:72]), NativeEndian(), &msg.AlgTruncLen)
+       msg.AlgKey = b[72:msg.Len()]
+       return &msg
+}
+
+func TestXfrmAlgoAuthDeserializeSerialize(t *testing.T) {
+       // use a 32 byte key len
+       var orig = make([]byte, SizeofXfrmAlgoAuth+32)
+       rand.Read(orig)
+       // set the key len to 256 bits
+       orig[64] = 0
+       orig[65] = 1
+       orig[66] = 0
+       orig[67] = 0
+       safemsg := deserializeXfrmAlgoAuthSafe(orig)
+       msg := DeserializeXfrmAlgoAuth(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
+
+func (msg *XfrmEncapTmpl) write(b []byte) {
+       native := NativeEndian()
+       native.PutUint16(b[0:2], msg.EncapType)
+       native.PutUint16(b[2:4], msg.EncapSport)
+       native.PutUint16(b[4:6], msg.EncapDport)
+       copy(b[6:8], msg.Pad[:])
+       msg.EncapOa.write(b[8:SizeofXfrmAddress])
+}
+
+func (msg *XfrmEncapTmpl) serializeSafe() []byte {
+       b := make([]byte, SizeofXfrmEncapTmpl)
+       msg.write(b)
+       return b
+}
+
+func deserializeXfrmEncapTmplSafe(b []byte) *XfrmEncapTmpl {
+       var msg = XfrmEncapTmpl{}
+       binary.Read(bytes.NewReader(b[0:SizeofXfrmEncapTmpl]), NativeEndian(), &msg)
+       return &msg
+}
+
+func TestXfrmEncapTmplDeserializeSerialize(t *testing.T) {
+       var orig = make([]byte, SizeofXfrmEncapTmpl)
+       rand.Read(orig)
+       safemsg := deserializeXfrmEncapTmplSafe(orig)
+       msg := DeserializeXfrmEncapTmpl(orig)
+       testDeserializeSerialize(t, orig, safemsg, msg)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go
new file mode 100644 (file)
index 0000000..79396da
--- /dev/null
@@ -0,0 +1,53 @@
+package netlink
+
+import (
+       "strings"
+)
+
+// Protinfo represents bridge flags from netlink.
+type Protinfo struct {
+       Hairpin   bool
+       Guard     bool
+       FastLeave bool
+       RootBlock bool
+       Learning  bool
+       Flood     bool
+}
+
+// String returns a list of enabled flags
+func (prot *Protinfo) String() string {
+       boolStrings := make([]string, 0)
+       if prot.Hairpin {
+               boolStrings = append(boolStrings, "Hairpin")
+       }
+       if prot.Guard {
+               boolStrings = append(boolStrings, "Guard")
+       }
+       if prot.FastLeave {
+               boolStrings = append(boolStrings, "FastLeave")
+       }
+       if prot.RootBlock {
+               boolStrings = append(boolStrings, "RootBlock")
+       }
+       if prot.Learning {
+               boolStrings = append(boolStrings, "Learning")
+       }
+       if prot.Flood {
+               boolStrings = append(boolStrings, "Flood")
+       }
+       return strings.Join(boolStrings, " ")
+}
+
+func boolToByte(x bool) []byte {
+       if x {
+               return []byte{1}
+       }
+       return []byte{0}
+}
+
+func byteToBool(x byte) bool {
+       if uint8(x) != 0 {
+               return true
+       }
+       return false
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo_linux.go
new file mode 100644 (file)
index 0000000..a292d1c
--- /dev/null
@@ -0,0 +1,60 @@
+package netlink
+
+import (
+       "fmt"
+       "syscall"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+func LinkGetProtinfo(link Link) (Protinfo, error) {
+       base := link.Attrs()
+       ensureIndex(base)
+       var pi Protinfo
+       req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
+       msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
+       req.AddData(msg)
+       msgs, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       if err != nil {
+               return pi, err
+       }
+
+       for _, m := range msgs {
+               ans := nl.DeserializeIfInfomsg(m)
+               if int(ans.Index) != base.Index {
+                       continue
+               }
+               attrs, err := nl.ParseRouteAttr(m[ans.Len():])
+               if err != nil {
+                       return pi, err
+               }
+               for _, attr := range attrs {
+                       if attr.Attr.Type != syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED {
+                               continue
+                       }
+                       infos, err := nl.ParseRouteAttr(attr.Value)
+                       if err != nil {
+                               return pi, err
+                       }
+                       var pi Protinfo
+                       for _, info := range infos {
+                               switch info.Attr.Type {
+                               case nl.IFLA_BRPORT_MODE:
+                                       pi.Hairpin = byteToBool(info.Value[0])
+                               case nl.IFLA_BRPORT_GUARD:
+                                       pi.Guard = byteToBool(info.Value[0])
+                               case nl.IFLA_BRPORT_FAST_LEAVE:
+                                       pi.FastLeave = byteToBool(info.Value[0])
+                               case nl.IFLA_BRPORT_PROTECT:
+                                       pi.RootBlock = byteToBool(info.Value[0])
+                               case nl.IFLA_BRPORT_LEARNING:
+                                       pi.Learning = byteToBool(info.Value[0])
+                               case nl.IFLA_BRPORT_UNICAST_FLOOD:
+                                       pi.Flood = byteToBool(info.Value[0])
+                               }
+                       }
+                       return pi, nil
+               }
+       }
+       return pi, fmt.Errorf("Device with index %d not found", base.Index)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo_test.go
new file mode 100644 (file)
index 0000000..f94c42b
--- /dev/null
@@ -0,0 +1,98 @@
+package netlink
+
+import "testing"
+
+func TestProtinfo(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+       master := &Bridge{LinkAttrs{Name: "foo"}}
+       if err := LinkAdd(master); err != nil {
+               t.Fatal(err)
+       }
+       iface1 := &Dummy{LinkAttrs{Name: "bar1", MasterIndex: master.Index}}
+       iface2 := &Dummy{LinkAttrs{Name: "bar2", MasterIndex: master.Index}}
+       iface3 := &Dummy{LinkAttrs{Name: "bar3"}}
+
+       if err := LinkAdd(iface1); err != nil {
+               t.Fatal(err)
+       }
+       if err := LinkAdd(iface2); err != nil {
+               t.Fatal(err)
+       }
+       if err := LinkAdd(iface3); err != nil {
+               t.Fatal(err)
+       }
+
+       oldpi1, err := LinkGetProtinfo(iface1)
+       if err != nil {
+               t.Fatal(err)
+       }
+       oldpi2, err := LinkGetProtinfo(iface2)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if err := LinkSetHairpin(iface1, true); err != nil {
+               t.Fatal(err)
+       }
+
+       if err := LinkSetRootBlock(iface1, true); err != nil {
+               t.Fatal(err)
+       }
+
+       pi1, err := LinkGetProtinfo(iface1)
+       if err != nil {
+               t.Fatal(err)
+       }
+       if !pi1.Hairpin {
+               t.Fatalf("Hairpin mode is not enabled for %s, but should", iface1.Name)
+       }
+       if !pi1.RootBlock {
+               t.Fatalf("RootBlock is not enabled for %s, but should", iface1.Name)
+       }
+       if pi1.Guard != oldpi1.Guard {
+               t.Fatalf("Guard field was changed for %s but shouldn't", iface1.Name)
+       }
+       if pi1.FastLeave != oldpi1.FastLeave {
+               t.Fatalf("FastLeave field was changed for %s but shouldn't", iface1.Name)
+       }
+       if pi1.Learning != oldpi1.Learning {
+               t.Fatalf("Learning field was changed for %s but shouldn't", iface1.Name)
+       }
+       if pi1.Flood != oldpi1.Flood {
+               t.Fatalf("Flood field was changed for %s but shouldn't", iface1.Name)
+       }
+
+       if err := LinkSetGuard(iface2, true); err != nil {
+               t.Fatal(err)
+       }
+       if err := LinkSetLearning(iface2, false); err != nil {
+               t.Fatal(err)
+       }
+       pi2, err := LinkGetProtinfo(iface2)
+       if err != nil {
+               t.Fatal(err)
+       }
+       if pi2.Hairpin {
+               t.Fatalf("Hairpin mode is enabled for %s, but shouldn't", iface2.Name)
+       }
+       if !pi2.Guard {
+               t.Fatalf("Guard is not enabled for %s, but should", iface2.Name)
+       }
+       if pi2.Learning {
+               t.Fatalf("Learning is enabled for %s, but shouldn't", iface2.Name)
+       }
+       if pi2.RootBlock != oldpi2.RootBlock {
+               t.Fatalf("RootBlock field was changed for %s but shouldn't", iface2.Name)
+       }
+       if pi2.FastLeave != oldpi2.FastLeave {
+               t.Fatalf("FastLeave field was changed for %s but shouldn't", iface2.Name)
+       }
+       if pi2.Flood != oldpi2.Flood {
+               t.Fatalf("Flood field was changed for %s but shouldn't", iface2.Name)
+       }
+
+       if err := LinkSetHairpin(iface3, true); err == nil || err.Error() != "operation not supported" {
+               t.Fatalf("Set protinfo attrs for link without master is not supported, but err: %s", err)
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/route.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/route.go
new file mode 100644 (file)
index 0000000..6218546
--- /dev/null
@@ -0,0 +1,35 @@
+package netlink
+
+import (
+       "fmt"
+       "net"
+       "syscall"
+)
+
+// Scope is an enum representing a route scope.
+type Scope uint8
+
+const (
+       SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE
+       SCOPE_SITE     Scope = syscall.RT_SCOPE_SITE
+       SCOPE_LINK     Scope = syscall.RT_SCOPE_LINK
+       SCOPE_HOST     Scope = syscall.RT_SCOPE_HOST
+       SCOPE_NOWHERE  Scope = syscall.RT_SCOPE_NOWHERE
+)
+
+// Route represents a netlink route. A route is associated with a link,
+// has a destination network, an optional source ip, and optional
+// gateway. Advanced route parameters and non-main routing tables are
+// currently not supported.
+type Route struct {
+       LinkIndex int
+       Scope     Scope
+       Dst       *net.IPNet
+       Src       net.IP
+       Gw        net.IP
+}
+
+func (r Route) String() string {
+       return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst,
+               r.Src, r.Gw)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go
new file mode 100644 (file)
index 0000000..ef59cf7
--- /dev/null
@@ -0,0 +1,225 @@
+package netlink
+
+import (
+       "fmt"
+       "net"
+       "syscall"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+// RtAttr is shared so it is in netlink_linux.go
+
+// RouteAdd will add a route to the system.
+// Equivalent to: `ip route add $route`
+func RouteAdd(route *Route) error {
+       req := nl.NewNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+       return routeHandle(route, req)
+}
+
+// RouteAdd will delete a route from the system.
+// Equivalent to: `ip route del $route`
+func RouteDel(route *Route) error {
+       req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
+       return routeHandle(route, req)
+}
+
+func routeHandle(route *Route, req *nl.NetlinkRequest) error {
+       if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil {
+               return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil")
+       }
+
+       msg := nl.NewRtMsg()
+       msg.Scope = uint8(route.Scope)
+       family := -1
+       var rtAttrs []*nl.RtAttr
+
+       if route.Dst != nil && route.Dst.IP != nil {
+               dstLen, _ := route.Dst.Mask.Size()
+               msg.Dst_len = uint8(dstLen)
+               dstFamily := nl.GetIPFamily(route.Dst.IP)
+               family = dstFamily
+               var dstData []byte
+               if dstFamily == FAMILY_V4 {
+                       dstData = route.Dst.IP.To4()
+               } else {
+                       dstData = route.Dst.IP.To16()
+               }
+               rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData))
+       }
+
+       if route.Src != nil {
+               srcFamily := nl.GetIPFamily(route.Src)
+               if family != -1 && family != srcFamily {
+                       return fmt.Errorf("source and destination ip are not the same IP family")
+               }
+               family = srcFamily
+               var srcData []byte
+               if srcFamily == FAMILY_V4 {
+                       srcData = route.Src.To4()
+               } else {
+                       srcData = route.Src.To16()
+               }
+               // The commonly used src ip for routes is actually PREFSRC
+               rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_PREFSRC, srcData))
+       }
+
+       if route.Gw != nil {
+               gwFamily := nl.GetIPFamily(route.Gw)
+               if family != -1 && family != gwFamily {
+                       return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
+               }
+               family = gwFamily
+               var gwData []byte
+               if gwFamily == FAMILY_V4 {
+                       gwData = route.Gw.To4()
+               } else {
+                       gwData = route.Gw.To16()
+               }
+               rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_GATEWAY, gwData))
+       }
+
+       msg.Family = uint8(family)
+
+       req.AddData(msg)
+       for _, attr := range rtAttrs {
+               req.AddData(attr)
+       }
+
+       var (
+               b      = make([]byte, 4)
+               native = nl.NativeEndian()
+       )
+       native.PutUint32(b, uint32(route.LinkIndex))
+
+       req.AddData(nl.NewRtAttr(syscall.RTA_OIF, b))
+
+       _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+       return err
+}
+
+// RouteList gets a list of routes in the system.
+// Equivalent to: `ip route show`.
+// The list can be filtered by link and ip family.
+func RouteList(link Link, family int) ([]Route, error) {
+       req := nl.NewNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP)
+       msg := nl.NewIfInfomsg(family)
+       req.AddData(msg)
+
+       msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE)
+       if err != nil {
+               return nil, err
+       }
+
+       index := 0
+       if link != nil {
+               base := link.Attrs()
+               ensureIndex(base)
+               index = base.Index
+       }
+
+       native := nl.NativeEndian()
+       res := make([]Route, 0)
+       for _, m := range msgs {
+               msg := nl.DeserializeRtMsg(m)
+
+               if msg.Flags&syscall.RTM_F_CLONED != 0 {
+                       // Ignore cloned routes
+                       continue
+               }
+
+               if msg.Table != syscall.RT_TABLE_MAIN {
+                       // Ignore non-main tables
+                       continue
+               }
+
+               attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+               if err != nil {
+                       return nil, err
+               }
+
+               route := Route{Scope: Scope(msg.Scope)}
+               for _, attr := range attrs {
+                       switch attr.Attr.Type {
+                       case syscall.RTA_GATEWAY:
+                               route.Gw = net.IP(attr.Value)
+                       case syscall.RTA_PREFSRC:
+                               route.Src = net.IP(attr.Value)
+                       case syscall.RTA_DST:
+                               route.Dst = &net.IPNet{
+                                       IP:   attr.Value,
+                                       Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+                               }
+                       case syscall.RTA_OIF:
+                               routeIndex := int(native.Uint32(attr.Value[0:4]))
+                               if link != nil && routeIndex != index {
+                                       // Ignore routes from other interfaces
+                                       continue
+                               }
+                               route.LinkIndex = routeIndex
+                       }
+               }
+               res = append(res, route)
+       }
+
+       return res, nil
+}
+
+// RouteGet gets a route to a specific destination from the host system.
+// Equivalent to: 'ip route get'.
+func RouteGet(destination net.IP) ([]Route, error) {
+       req := nl.NewNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_REQUEST)
+       family := nl.GetIPFamily(destination)
+       var destinationData []byte
+       var bitlen uint8
+       if family == FAMILY_V4 {
+               destinationData = destination.To4()
+               bitlen = 32
+       } else {
+               destinationData = destination.To16()
+               bitlen = 128
+       }
+       msg := &nl.RtMsg{}
+       msg.Family = uint8(family)
+       msg.Dst_len = bitlen
+       req.AddData(msg)
+
+       rtaDst := nl.NewRtAttr(syscall.RTA_DST, destinationData)
+       req.AddData(rtaDst)
+
+       msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE)
+       if err != nil {
+               return nil, err
+       }
+
+       native := nl.NativeEndian()
+       res := make([]Route, 0)
+       for _, m := range msgs {
+               msg := nl.DeserializeRtMsg(m)
+               attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+               if err != nil {
+                       return nil, err
+               }
+
+               route := Route{}
+               for _, attr := range attrs {
+                       switch attr.Attr.Type {
+                       case syscall.RTA_GATEWAY:
+                               route.Gw = net.IP(attr.Value)
+                       case syscall.RTA_PREFSRC:
+                               route.Src = net.IP(attr.Value)
+                       case syscall.RTA_DST:
+                               route.Dst = &net.IPNet{
+                                       IP:   attr.Value,
+                                       Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+                               }
+                       case syscall.RTA_OIF:
+                               routeIndex := int(native.Uint32(attr.Value[0:4]))
+                               route.LinkIndex = routeIndex
+                       }
+               }
+               res = append(res, route)
+       }
+       return res, nil
+
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go
new file mode 100644 (file)
index 0000000..f02bef8
--- /dev/null
@@ -0,0 +1,84 @@
+package netlink
+
+import (
+       "net"
+       "testing"
+)
+
+func TestRouteAddDel(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       // get loopback interface
+       link, err := LinkByName("lo")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       // bring the interface up
+       if err = LinkSetUp(link); err != nil {
+               t.Fatal(err)
+       }
+
+       // add a gateway route
+       _, dst, err := net.ParseCIDR("192.168.0.0/24")
+
+       ip := net.ParseIP("127.1.1.1")
+       route := Route{LinkIndex: link.Attrs().Index, Dst: dst, Src: ip}
+       err = RouteAdd(&route)
+       if err != nil {
+               t.Fatal(err)
+       }
+       routes, err := RouteList(link, FAMILY_V4)
+       if err != nil {
+               t.Fatal(err)
+       }
+       if len(routes) != 1 {
+               t.Fatal("Link not added properly")
+       }
+
+       dstIP := net.ParseIP("192.168.0.42")
+       routeToDstIP, err := RouteGet(dstIP)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if len(routeToDstIP) == 0 {
+               t.Fatal("Default route not present")
+       }
+
+       err = RouteDel(&route)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       routes, err = RouteList(link, FAMILY_V4)
+       if err != nil {
+               t.Fatal(err)
+       }
+       if len(routes) != 0 {
+               t.Fatal("Route not removed properly")
+       }
+
+}
+
+func TestRouteAddIncomplete(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       // get loopback interface
+       link, err := LinkByName("lo")
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       // bring the interface up
+       if err = LinkSetUp(link); err != nil {
+               t.Fatal(err)
+       }
+
+       route := Route{LinkIndex: link.Attrs().Index}
+       if err := RouteAdd(&route); err == nil {
+               t.Fatal("Adding incomplete route should fail")
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm.go
new file mode 100644 (file)
index 0000000..621ffb6
--- /dev/null
@@ -0,0 +1,64 @@
+package netlink
+
+import (
+       "fmt"
+       "syscall"
+)
+
+// Proto is an enum representing an ipsec protocol.
+type Proto uint8
+
+const (
+       XFRM_PROTO_ROUTE2    Proto = syscall.IPPROTO_ROUTING
+       XFRM_PROTO_ESP       Proto = syscall.IPPROTO_ESP
+       XFRM_PROTO_AH        Proto = syscall.IPPROTO_AH
+       XFRM_PROTO_HAO       Proto = syscall.IPPROTO_DSTOPTS
+       XFRM_PROTO_COMP      Proto = syscall.IPPROTO_COMP
+       XFRM_PROTO_IPSEC_ANY Proto = syscall.IPPROTO_RAW
+)
+
+func (p Proto) String() string {
+       switch p {
+       case XFRM_PROTO_ROUTE2:
+               return "route2"
+       case XFRM_PROTO_ESP:
+               return "esp"
+       case XFRM_PROTO_AH:
+               return "ah"
+       case XFRM_PROTO_HAO:
+               return "hao"
+       case XFRM_PROTO_COMP:
+               return "comp"
+       case XFRM_PROTO_IPSEC_ANY:
+               return "ipsec-any"
+       }
+       return fmt.Sprintf("%d", p)
+}
+
+// Mode is an enum representing an ipsec transport.
+type Mode uint8
+
+const (
+       XFRM_MODE_TRANSPORT Mode = iota
+       XFRM_MODE_TUNNEL
+       XFRM_MODE_ROUTEOPTIMIZATION
+       XFRM_MODE_IN_TRIGGER
+       XFRM_MODE_BEET
+       XFRM_MODE_MAX
+)
+
+func (m Mode) String() string {
+       switch m {
+       case XFRM_MODE_TRANSPORT:
+               return "transport"
+       case XFRM_MODE_TUNNEL:
+               return "tunnel"
+       case XFRM_MODE_ROUTEOPTIMIZATION:
+               return "ro"
+       case XFRM_MODE_IN_TRIGGER:
+               return "in_trigger"
+       case XFRM_MODE_BEET:
+               return "beet"
+       }
+       return fmt.Sprintf("%d", m)
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy.go
new file mode 100644 (file)
index 0000000..d85c65d
--- /dev/null
@@ -0,0 +1,59 @@
+package netlink
+
+import (
+       "fmt"
+       "net"
+)
+
+// Dir is an enum representing an ipsec template direction.
+type Dir uint8
+
+const (
+       XFRM_DIR_IN Dir = iota
+       XFRM_DIR_OUT
+       XFRM_DIR_FWD
+       XFRM_SOCKET_IN
+       XFRM_SOCKET_OUT
+       XFRM_SOCKET_FWD
+)
+
+func (d Dir) String() string {
+       switch d {
+       case XFRM_DIR_IN:
+               return "dir in"
+       case XFRM_DIR_OUT:
+               return "dir out"
+       case XFRM_DIR_FWD:
+               return "dir fwd"
+       case XFRM_SOCKET_IN:
+               return "socket in"
+       case XFRM_SOCKET_OUT:
+               return "socket out"
+       case XFRM_SOCKET_FWD:
+               return "socket fwd"
+       }
+       return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN)
+}
+
+// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec
+// policy. These rules are matched with XfrmState to determine encryption
+// and authentication algorithms.
+type XfrmPolicyTmpl struct {
+       Dst   net.IP
+       Src   net.IP
+       Proto Proto
+       Mode  Mode
+       Reqid int
+}
+
+// XfrmPolicy represents an ipsec policy. It represents the overlay network
+// and has a list of XfrmPolicyTmpls representing the base addresses of
+// the policy.
+type XfrmPolicy struct {
+       Dst      *net.IPNet
+       Src      *net.IPNet
+       Dir      Dir
+       Priority int
+       Index    int
+       Tmpls    []XfrmPolicyTmpl
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go
new file mode 100644 (file)
index 0000000..f93be54
--- /dev/null
@@ -0,0 +1,127 @@
+package netlink
+
+import (
+       "syscall"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) {
+       sel.Family = uint16(nl.GetIPFamily(policy.Dst.IP))
+       sel.Daddr.FromIP(policy.Dst.IP)
+       sel.Saddr.FromIP(policy.Src.IP)
+       prefixlenD, _ := policy.Dst.Mask.Size()
+       sel.PrefixlenD = uint8(prefixlenD)
+       prefixlenS, _ := policy.Src.Mask.Size()
+       sel.PrefixlenS = uint8(prefixlenS)
+}
+
+// XfrmPolicyAdd will add an xfrm policy to the system.
+// Equivalent to: `ip xfrm policy add $policy`
+func XfrmPolicyAdd(policy *XfrmPolicy) error {
+       req := nl.NewNetlinkRequest(nl.XFRM_MSG_NEWPOLICY, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+
+       msg := &nl.XfrmUserpolicyInfo{}
+       selFromPolicy(&msg.Sel, policy)
+       msg.Priority = uint32(policy.Priority)
+       msg.Index = uint32(policy.Index)
+       msg.Dir = uint8(policy.Dir)
+       msg.Lft.SoftByteLimit = nl.XFRM_INF
+       msg.Lft.HardByteLimit = nl.XFRM_INF
+       msg.Lft.SoftPacketLimit = nl.XFRM_INF
+       msg.Lft.HardPacketLimit = nl.XFRM_INF
+       req.AddData(msg)
+
+       tmplData := make([]byte, nl.SizeofXfrmUserTmpl*len(policy.Tmpls))
+       for i, tmpl := range policy.Tmpls {
+               start := i * nl.SizeofXfrmUserTmpl
+               userTmpl := nl.DeserializeXfrmUserTmpl(tmplData[start : start+nl.SizeofXfrmUserTmpl])
+               userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst)
+               userTmpl.Saddr.FromIP(tmpl.Src)
+               userTmpl.XfrmId.Proto = uint8(tmpl.Proto)
+               userTmpl.Mode = uint8(tmpl.Mode)
+               userTmpl.Reqid = uint32(tmpl.Reqid)
+               userTmpl.Aalgos = ^uint32(0)
+               userTmpl.Ealgos = ^uint32(0)
+               userTmpl.Calgos = ^uint32(0)
+       }
+       if len(tmplData) > 0 {
+               tmpls := nl.NewRtAttr(nl.XFRMA_TMPL, tmplData)
+               req.AddData(tmpls)
+       }
+
+       _, err := req.Execute(syscall.NETLINK_XFRM, 0)
+       return err
+}
+
+// XfrmPolicyDel will delete an xfrm policy from the system. Note that
+// the Tmpls are ignored when matching the policy to delete.
+// Equivalent to: `ip xfrm policy del $policy`
+func XfrmPolicyDel(policy *XfrmPolicy) error {
+       req := nl.NewNetlinkRequest(nl.XFRM_MSG_DELPOLICY, syscall.NLM_F_ACK)
+
+       msg := &nl.XfrmUserpolicyId{}
+       selFromPolicy(&msg.Sel, policy)
+       msg.Index = uint32(policy.Index)
+       msg.Dir = uint8(policy.Dir)
+       req.AddData(msg)
+
+       _, err := req.Execute(syscall.NETLINK_XFRM, 0)
+       return err
+}
+
+// XfrmPolicyList gets a list of xfrm policies in the system.
+// Equivalent to: `ip xfrm policy show`.
+// The list can be filtered by ip family.
+func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
+       req := nl.NewNetlinkRequest(nl.XFRM_MSG_GETPOLICY, syscall.NLM_F_DUMP)
+
+       msg := nl.NewIfInfomsg(family)
+       req.AddData(msg)
+
+       msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY)
+       if err != nil {
+               return nil, err
+       }
+
+       res := make([]XfrmPolicy, 0)
+       for _, m := range msgs {
+               msg := nl.DeserializeXfrmUserpolicyInfo(m)
+
+               if family != FAMILY_ALL && family != int(msg.Sel.Family) {
+                       continue
+               }
+
+               var policy XfrmPolicy
+
+               policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD)
+               policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS)
+               policy.Priority = int(msg.Priority)
+               policy.Index = int(msg.Index)
+               policy.Dir = Dir(msg.Dir)
+
+               attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+               if err != nil {
+                       return nil, err
+               }
+
+               for _, attr := range attrs {
+                       switch attr.Attr.Type {
+                       case nl.XFRMA_TMPL:
+                               max := len(attr.Value)
+                               for i := 0; i < max; i += nl.SizeofXfrmUserTmpl {
+                                       var resTmpl XfrmPolicyTmpl
+                                       tmpl := nl.DeserializeXfrmUserTmpl(attr.Value[i : i+nl.SizeofXfrmUserTmpl])
+                                       resTmpl.Dst = tmpl.XfrmId.Daddr.ToIP()
+                                       resTmpl.Src = tmpl.Saddr.ToIP()
+                                       resTmpl.Proto = Proto(tmpl.XfrmId.Proto)
+                                       resTmpl.Mode = Mode(tmpl.Mode)
+                                       resTmpl.Reqid = int(tmpl.Reqid)
+                                       policy.Tmpls = append(policy.Tmpls, resTmpl)
+                               }
+                       }
+               }
+               res = append(res, policy)
+       }
+       return res, nil
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_test.go
new file mode 100644 (file)
index 0000000..06d178d
--- /dev/null
@@ -0,0 +1,49 @@
+package netlink
+
+import (
+       "net"
+       "testing"
+)
+
+func TestXfrmPolicyAddDel(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       src, _ := ParseIPNet("127.1.1.1/32")
+       dst, _ := ParseIPNet("127.1.1.2/32")
+       policy := XfrmPolicy{
+               Src: src,
+               Dst: dst,
+               Dir: XFRM_DIR_OUT,
+       }
+       tmpl := XfrmPolicyTmpl{
+               Src:   net.ParseIP("127.0.0.1"),
+               Dst:   net.ParseIP("127.0.0.2"),
+               Proto: XFRM_PROTO_ESP,
+               Mode:  XFRM_MODE_TUNNEL,
+       }
+       policy.Tmpls = append(policy.Tmpls, tmpl)
+       if err := XfrmPolicyAdd(&policy); err != nil {
+               t.Fatal(err)
+       }
+       policies, err := XfrmPolicyList(FAMILY_ALL)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if len(policies) != 1 {
+               t.Fatal("Policy not added properly")
+       }
+
+       if err = XfrmPolicyDel(&policy); err != nil {
+               t.Fatal(err)
+       }
+
+       policies, err = XfrmPolicyList(FAMILY_ALL)
+       if err != nil {
+               t.Fatal(err)
+       }
+       if len(policies) != 0 {
+               t.Fatal("Policy not removed properly")
+       }
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state.go
new file mode 100644 (file)
index 0000000..5b8f2df
--- /dev/null
@@ -0,0 +1,53 @@
+package netlink
+
+import (
+       "net"
+)
+
+// XfrmStateAlgo represents the algorithm to use for the ipsec encryption.
+type XfrmStateAlgo struct {
+       Name        string
+       Key         []byte
+       TruncateLen int // Auth only
+}
+
+// EncapType is an enum representing an ipsec template direction.
+type EncapType uint8
+
+const (
+       XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1
+       XFRM_ENCAP_ESPINUDP
+)
+
+func (e EncapType) String() string {
+       switch e {
+       case XFRM_ENCAP_ESPINUDP_NONIKE:
+               return "espinudp-nonike"
+       case XFRM_ENCAP_ESPINUDP:
+               return "espinudp"
+       }
+       return "unknown"
+}
+
+// XfrmEncap represents the encapsulation to use for the ipsec encryption.
+type XfrmStateEncap struct {
+       Type            EncapType
+       SrcPort         int
+       DstPort         int
+       OriginalAddress net.IP
+}
+
+// XfrmState represents the state of an ipsec policy. It optionally
+// contains an XfrmStateAlgo for encryption and one for authentication.
+type XfrmState struct {
+       Dst          net.IP
+       Src          net.IP
+       Proto        Proto
+       Mode         Mode
+       Spi          int
+       Reqid        int
+       ReplayWindow int
+       Auth         *XfrmStateAlgo
+       Crypt        *XfrmStateAlgo
+       Encap        *XfrmStateEncap
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go
new file mode 100644 (file)
index 0000000..f193e5c
--- /dev/null
@@ -0,0 +1,181 @@
+package netlink
+
+import (
+       "fmt"
+       "syscall"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink/nl"
+)
+
+func writeStateAlgo(a *XfrmStateAlgo) []byte {
+       algo := nl.XfrmAlgo{
+               AlgKeyLen: uint32(len(a.Key) * 8),
+               AlgKey:    a.Key,
+       }
+       end := len(a.Name)
+       if end > 64 {
+               end = 64
+       }
+       copy(algo.AlgName[:end], a.Name)
+       return algo.Serialize()
+}
+
+func writeStateAlgoAuth(a *XfrmStateAlgo) []byte {
+       algo := nl.XfrmAlgoAuth{
+               AlgKeyLen:   uint32(len(a.Key) * 8),
+               AlgTruncLen: uint32(a.TruncateLen),
+               AlgKey:      a.Key,
+       }
+       end := len(a.Name)
+       if end > 64 {
+               end = 64
+       }
+       copy(algo.AlgName[:end], a.Name)
+       return algo.Serialize()
+}
+
+// XfrmStateAdd will add an xfrm state to the system.
+// Equivalent to: `ip xfrm state add $state`
+func XfrmStateAdd(state *XfrmState) error {
+       // A state with spi 0 can't be deleted so don't allow it to be set
+       if state.Spi == 0 {
+               return fmt.Errorf("Spi must be set when adding xfrm state.")
+       }
+       req := nl.NewNetlinkRequest(nl.XFRM_MSG_NEWSA, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+
+       msg := &nl.XfrmUsersaInfo{}
+       msg.Family = uint16(nl.GetIPFamily(state.Dst))
+       msg.Id.Daddr.FromIP(state.Dst)
+       msg.Saddr.FromIP(state.Src)
+       msg.Id.Proto = uint8(state.Proto)
+       msg.Mode = uint8(state.Mode)
+       msg.Id.Spi = nl.Swap32(uint32(state.Spi))
+       msg.Reqid = uint32(state.Reqid)
+       msg.ReplayWindow = uint8(state.ReplayWindow)
+       msg.Lft.SoftByteLimit = nl.XFRM_INF
+       msg.Lft.HardByteLimit = nl.XFRM_INF
+       msg.Lft.SoftPacketLimit = nl.XFRM_INF
+       msg.Lft.HardPacketLimit = nl.XFRM_INF
+       req.AddData(msg)
+
+       if state.Auth != nil {
+               out := nl.NewRtAttr(nl.XFRMA_ALG_AUTH_TRUNC, writeStateAlgoAuth(state.Auth))
+               req.AddData(out)
+       }
+       if state.Crypt != nil {
+               out := nl.NewRtAttr(nl.XFRMA_ALG_CRYPT, writeStateAlgo(state.Crypt))
+               req.AddData(out)
+       }
+       if state.Encap != nil {
+               encapData := make([]byte, nl.SizeofXfrmEncapTmpl)
+               encap := nl.DeserializeXfrmEncapTmpl(encapData)
+               encap.EncapType = uint16(state.Encap.Type)
+               encap.EncapSport = nl.Swap16(uint16(state.Encap.SrcPort))
+               encap.EncapDport = nl.Swap16(uint16(state.Encap.DstPort))
+               encap.EncapOa.FromIP(state.Encap.OriginalAddress)
+               out := nl.NewRtAttr(nl.XFRMA_ENCAP, encapData)
+               req.AddData(out)
+       }
+
+       _, err := req.Execute(syscall.NETLINK_XFRM, 0)
+       return err
+}
+
+// XfrmStateDel will delete an xfrm state from the system. Note that
+// the Algos are ignored when matching the state to delete.
+// Equivalent to: `ip xfrm state del $state`
+func XfrmStateDel(state *XfrmState) error {
+       req := nl.NewNetlinkRequest(nl.XFRM_MSG_DELSA, syscall.NLM_F_ACK)
+
+       msg := &nl.XfrmUsersaId{}
+       msg.Daddr.FromIP(state.Dst)
+       msg.Family = uint16(nl.GetIPFamily(state.Dst))
+       msg.Proto = uint8(state.Proto)
+       msg.Spi = nl.Swap32(uint32(state.Spi))
+       req.AddData(msg)
+
+       saddr := nl.XfrmAddress{}
+       saddr.FromIP(state.Src)
+       srcdata := nl.NewRtAttr(nl.XFRMA_SRCADDR, saddr.Serialize())
+
+       req.AddData(srcdata)
+
+       _, err := req.Execute(syscall.NETLINK_XFRM, 0)
+       return err
+}
+
+// XfrmStateList gets a list of xfrm states in the system.
+// Equivalent to: `ip xfrm state show`.
+// The list can be filtered by ip family.
+func XfrmStateList(family int) ([]XfrmState, error) {
+       req := nl.NewNetlinkRequest(nl.XFRM_MSG_GETSA, syscall.NLM_F_DUMP)
+
+       msg := nl.NewIfInfomsg(family)
+       req.AddData(msg)
+
+       msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWSA)
+       if err != nil {
+               return nil, err
+       }
+
+       res := make([]XfrmState, 0)
+       for _, m := range msgs {
+               msg := nl.DeserializeXfrmUsersaInfo(m)
+
+               if family != FAMILY_ALL && family != int(msg.Family) {
+                       continue
+               }
+
+               var state XfrmState
+
+               state.Dst = msg.Id.Daddr.ToIP()
+               state.Src = msg.Saddr.ToIP()
+               state.Proto = Proto(msg.Id.Proto)
+               state.Mode = Mode(msg.Mode)
+               state.Spi = int(nl.Swap32(msg.Id.Spi))
+               state.Reqid = int(msg.Reqid)
+               state.ReplayWindow = int(msg.ReplayWindow)
+
+               attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+               if err != nil {
+                       return nil, err
+               }
+
+               for _, attr := range attrs {
+                       switch attr.Attr.Type {
+                       case nl.XFRMA_ALG_AUTH, nl.XFRMA_ALG_CRYPT:
+                               var resAlgo *XfrmStateAlgo
+                               if attr.Attr.Type == nl.XFRMA_ALG_AUTH {
+                                       if state.Auth == nil {
+                                               state.Auth = new(XfrmStateAlgo)
+                                       }
+                                       resAlgo = state.Auth
+                               } else {
+                                       state.Crypt = new(XfrmStateAlgo)
+                                       resAlgo = state.Crypt
+                               }
+                               algo := nl.DeserializeXfrmAlgo(attr.Value[:])
+                               (*resAlgo).Name = nl.BytesToString(algo.AlgName[:])
+                               (*resAlgo).Key = algo.AlgKey
+                       case nl.XFRMA_ALG_AUTH_TRUNC:
+                               if state.Auth == nil {
+                                       state.Auth = new(XfrmStateAlgo)
+                               }
+                               algo := nl.DeserializeXfrmAlgoAuth(attr.Value[:])
+                               state.Auth.Name = nl.BytesToString(algo.AlgName[:])
+                               state.Auth.Key = algo.AlgKey
+                               state.Auth.TruncateLen = int(algo.AlgTruncLen)
+                       case nl.XFRMA_ENCAP:
+                               encap := nl.DeserializeXfrmEncapTmpl(attr.Value[:])
+                               state.Encap = new(XfrmStateEncap)
+                               state.Encap.Type = EncapType(encap.EncapType)
+                               state.Encap.SrcPort = int(nl.Swap16(encap.EncapSport))
+                               state.Encap.DstPort = int(nl.Swap16(encap.EncapDport))
+                               state.Encap.OriginalAddress = encap.EncapOa.ToIP()
+                       }
+
+               }
+               res = append(res, state)
+       }
+       return res, nil
+}
diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_test.go
new file mode 100644 (file)
index 0000000..df57ef8
--- /dev/null
@@ -0,0 +1,50 @@
+package netlink
+
+import (
+       "net"
+       "testing"
+)
+
+func TestXfrmStateAddDel(t *testing.T) {
+       tearDown := setUpNetlinkTest(t)
+       defer tearDown()
+
+       state := XfrmState{
+               Src:   net.ParseIP("127.0.0.1"),
+               Dst:   net.ParseIP("127.0.0.2"),
+               Proto: XFRM_PROTO_ESP,
+               Mode:  XFRM_MODE_TUNNEL,
+               Spi:   1,
+               Auth: &XfrmStateAlgo{
+                       Name: "hmac(sha256)",
+                       Key:  []byte("abcdefghijklmnopqrstuvwzyzABCDEF"),
+               },
+               Crypt: &XfrmStateAlgo{
+                       Name: "cbc(aes)",
+                       Key:  []byte("abcdefghijklmnopqrstuvwzyzABCDEF"),
+               },
+       }
+       if err := XfrmStateAdd(&state); err != nil {
+               t.Fatal(err)
+       }
+       policies, err := XfrmStateList(FAMILY_ALL)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       if len(policies) != 1 {
+               t.Fatal("State not added properly")
+       }
+
+       if err = XfrmStateDel(&state); err != nil {
+               t.Fatal(err)
+       }
+
+       policies, err = XfrmStateList(FAMILY_ALL)
+       if err != nil {
+               t.Fatal(err)
+       }
+       if len(policies) != 0 {
+               t.Fatal("State not removed properly")
+       }
+}
index e8d8e9f..f806436 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1 +1,88 @@
 # cni
+
+## Included Plugins
+This repository includes a number of common plugins that can be found in plugins/ directory.
+
+## Running the plugins
+The scripts/ directory contains two scripts, priv-net-run.sh and docker-run.sh, that can be used to excercise the plugins.
+
+Start out by creating a netconf file to describe a network:
+
+```
+$ mkdir -p /etc/cni/net.d
+$ cat >/etc/cni/net.d/10-mynet.conf <<EOF
+{
+       "name": "mynet",
+       "type": "bridge",
+       "bridge": "cni0",
+       "isGateway": true,
+       "ipMasq": true,
+       "ipam": {
+               "type": "host-local",
+               "subnet": "10.22.0.0/16",
+               "routes": [
+                       { "dst": "0.0.0.0/0" }
+               ]
+       }
+}
+EOF
+```
+
+Next, build the plugins:
+
+```
+$ ./build
+```
+
+Finally, execute a command (`ifconfig` in this example) in a private network namespace that has joined `mynet` network:
+
+```
+$ CNI_PATH=`pwd`/bin
+$ cd scripts
+$ sudo CNI_PATH=$CNI_PATH ./priv-net-run.sh ifconfig
+eth0      Link encap:Ethernet  HWaddr f2:c2:6f:54:b8:2b  
+          inet addr:10.22.0.2  Bcast:0.0.0.0  Mask:255.255.0.0
+          inet6 addr: fe80::f0c2:6fff:fe54:b82b/64 Scope:Link
+          UP BROADCAST MULTICAST  MTU:1500  Metric:1
+          RX packets:1 errors:0 dropped:0 overruns:0 frame:0
+          TX packets:0 errors:0 dropped:1 overruns:0 carrier:0
+          collisions:0 txqueuelen:0 
+          RX bytes:90 (90.0 B)  TX bytes:0 (0.0 B)
+
+lo        Link encap:Local Loopback  
+          inet addr:127.0.0.1  Mask:255.0.0.0
+          inet6 addr: ::1/128 Scope:Host
+          UP LOOPBACK RUNNING  MTU:65536  Metric:1
+          RX packets:0 errors:0 dropped:0 overruns:0 frame:0
+          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
+          collisions:0 txqueuelen:0 
+          RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
+```
+
+## Running a Docker container with network namespace set up by CNI plugins
+
+Use instructions in the previous section to define a netconf and build the plugins.
+Next, docker-run.sh script wraps `docker run` command to execute the plugins prior to entering the container:
+
+```
+$ CNI_PATH=`pwd`/bin
+$ cd scripts
+$ sudo CNI_PATH=$CNI_PATH ./docker-run.sh --rm busybox:latest /sbin/ifconfig
+eth0      Link encap:Ethernet  HWaddr fa:60:70:aa:07:d1  
+          inet addr:10.22.0.2  Bcast:0.0.0.0  Mask:255.255.0.0
+          inet6 addr: fe80::f860:70ff:feaa:7d1/64 Scope:Link
+          UP BROADCAST MULTICAST  MTU:1500  Metric:1
+          RX packets:1 errors:0 dropped:0 overruns:0 frame:0
+          TX packets:0 errors:0 dropped:1 overruns:0 carrier:0
+          collisions:0 txqueuelen:0 
+          RX bytes:90 (90.0 B)  TX bytes:0 (0.0 B)
+
+lo        Link encap:Local Loopback  
+          inet addr:127.0.0.1  Mask:255.0.0.0
+          inet6 addr: ::1/128 Scope:Host
+          UP LOOPBACK RUNNING  MTU:65536  Metric:1
+          RX packets:0 errors:0 dropped:0 overruns:0 frame:0
+          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
+          collisions:0 txqueuelen:0 
+          RX bytes:0 (0.0 B)  TX bytes:0 (0.0 B)
+```
diff --git a/build b/build
new file mode 100755 (executable)
index 0000000..eebf04d
--- /dev/null
+++ b/build
@@ -0,0 +1,27 @@
+#!/bin/bash -e
+
+ORG_PATH="github.com/appc"
+REPO_PATH="${ORG_PATH}/cni"
+
+if [ ! -h gopath/src/${REPO_PATH} ]; then
+       mkdir -p gopath/src/${ORG_PATH}
+       ln -s ../../../.. gopath/src/${REPO_PATH} || exit 255
+fi
+
+export GOBIN=${PWD}/bin
+export GOPATH=${PWD}/gopath
+
+echo "Building plugins"
+
+PLUGINS="plugins/main/* plugins/ipam/*"
+for d in $PLUGINS; do
+       if [ -d $d ]; then
+               plugin=$(basename $d)
+               echo "  " $plugin
+               go install ${REPO_PATH}/$d
+       fi
+done
+
+if [ ! -h $GOBIN/host-local-ptp ]; then
+       ln -s host-local $GOBIN/host-local-ptp
+fi
diff --git a/pkg/ip/cidr.go b/pkg/ip/cidr.go
new file mode 100644 (file)
index 0000000..c963398
--- /dev/null
@@ -0,0 +1,86 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ip
+
+import (
+       "encoding/json"
+       "math/big"
+       "net"
+)
+
+// ParseCIDR takes a string like "10.2.3.1/24" and
+// return IPNet with "10.2.3.1" and /24 mask
+func ParseCIDR(s string) (*net.IPNet, error) {
+       ip, ipn, err := net.ParseCIDR(s)
+       if err != nil {
+               return nil, err
+       }
+
+       ipn.IP = ip
+       return ipn, nil
+}
+
+// NextIP returns IP incremented by 1
+func NextIP(ip net.IP) net.IP {
+       i := ipToInt(ip)
+       return intToIP(i.Add(i, big.NewInt(1)))
+}
+
+// PrevIP returns IP decremented by 1
+func PrevIP(ip net.IP) net.IP {
+       i := ipToInt(ip)
+       return intToIP(i.Sub(i, big.NewInt(1)))
+}
+
+func ipToInt(ip net.IP) *big.Int {
+       if v := ip.To4(); v != nil {
+               return big.NewInt(0).SetBytes(v)
+       }
+       return big.NewInt(0).SetBytes(ip.To16())
+}
+
+func intToIP(i *big.Int) net.IP {
+       return net.IP(i.Bytes())
+}
+
+// Network masks off the host portion of the IP
+func Network(ipn *net.IPNet) *net.IPNet {
+       return &net.IPNet{
+               IP:   ipn.IP.Mask(ipn.Mask),
+               Mask: ipn.Mask,
+       }
+}
+
+// like net.IPNet but adds JSON marshalling and unmarshalling
+type IPNet net.IPNet
+
+func (n IPNet) MarshalJSON() ([]byte, error) {
+       return json.Marshal((*net.IPNet)(&n).String())
+}
+
+func (n *IPNet) UnmarshalJSON(data []byte) error {
+       var s string
+       if err := json.Unmarshal(data, &s); err != nil {
+               return err
+       }
+
+       tmp, err := ParseCIDR(s)
+       if err != nil {
+               return err
+       }
+
+       *n = IPNet(*tmp)
+       return nil
+}
diff --git a/pkg/ip/ipmasq.go b/pkg/ip/ipmasq.go
new file mode 100644 (file)
index 0000000..665189b
--- /dev/null
@@ -0,0 +1,66 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ip
+
+import (
+       "fmt"
+       "net"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/coreos/go-iptables/iptables"
+)
+
+// SetupIPMasq installs iptables rules to masquerade traffic
+// coming from ipn and going outside of it
+func SetupIPMasq(ipn *net.IPNet, chain string) error {
+       ipt, err := iptables.New()
+       if err != nil {
+               return fmt.Errorf("failed to locate iptabes: %v", err)
+       }
+
+       if err = ipt.NewChain("nat", chain); err != nil {
+               if err.(*iptables.Error).ExitStatus() != 1 {
+                       // TODO(eyakubovich): assumes exit status 1 implies chain exists
+                       return err
+               }
+       }
+
+       if err = ipt.AppendUnique("nat", chain, "-d", ipn.String(), "-j", "ACCEPT"); err != nil {
+               return err
+       }
+
+       if err = ipt.AppendUnique("nat", chain, "!", "-d", "224.0.0.0/4", "-j", "MASQUERADE"); err != nil {
+               return err
+       }
+
+       return ipt.AppendUnique("nat", "POSTROUTING", "-s", ipn.String(), "-j", chain)
+}
+
+// TeardownIPMasq undoes the effects of SetupIPMasq
+func TeardownIPMasq(ipn *net.IPNet, chain string) error {
+       ipt, err := iptables.New()
+       if err != nil {
+               return fmt.Errorf("failed to locate iptabes: %v", err)
+       }
+
+       if err = ipt.Delete("nat", "POSTROUTING", "-s", ipn.String(), "-j", chain); err != nil {
+               return err
+       }
+
+       if err = ipt.ClearChain("nat", chain); err != nil {
+               return err
+       }
+
+       return ipt.DeleteChain("nat", chain)
+}
diff --git a/pkg/ip/link.go b/pkg/ip/link.go
new file mode 100644 (file)
index 0000000..59865cf
--- /dev/null
@@ -0,0 +1,117 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ip
+
+import (
+       "crypto/sha512"
+       "fmt"
+       "net"
+       "os"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink"
+)
+
+func makeVeth(name, peer string, mtu int) (netlink.Link, error) {
+       veth := &netlink.Veth{
+               LinkAttrs: netlink.LinkAttrs{
+                       Name:  name,
+                       Flags: net.FlagUp,
+                       MTU:   mtu,
+               },
+               PeerName: peer,
+       }
+       if err := netlink.LinkAdd(veth); err != nil {
+               return nil, err
+       }
+
+       return veth, nil
+}
+
+// RandomVethName returns string "veth" with random prefix (hashed from entropy)
+func RandomVethName(entropy string) string {
+       h := sha512.New()
+       h.Write([]byte(entropy))
+       return fmt.Sprintf("veth%x", h.Sum(nil)[:5])
+}
+
+// SetupVeth sets up a virtual ethernet link.
+// Should be in container netns.
+// TODO(eyakubovich): get rid of entropy and ask kernel to pick name via pattern
+func SetupVeth(entropy, contVethName string, mtu int, hostNS *os.File) (hostVeth, contVeth netlink.Link, err error) {
+       // NetworkManager (recent versions) will ignore veth devices that start with "veth"
+       hostVethName := RandomVethName(entropy)
+       hostVeth, err = makeVeth(hostVethName, contVethName, mtu)
+       if err != nil {
+               err = fmt.Errorf("failed to make veth pair: %v", err)
+               return
+       }
+
+       if err = netlink.LinkSetUp(hostVeth); err != nil {
+               err = fmt.Errorf("failed to set %q up: %v", hostVethName, err)
+               return
+       }
+
+       contVeth, err = netlink.LinkByName(contVethName)
+       if err != nil {
+               err = fmt.Errorf("failed to lookup %q: %v", contVethName, err)
+               return
+       }
+
+       if err = netlink.LinkSetUp(contVeth); err != nil {
+               err = fmt.Errorf("failed to set %q up: %v", contVethName, err)
+               return
+       }
+
+       if err = netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd())); err != nil {
+               err = fmt.Errorf("failed to move veth to host netns: %v", err)
+               return
+       }
+
+       return
+}
+
+// DelLinkByName removes an interface link.
+func DelLinkByName(ifName string) error {
+       iface, err := netlink.LinkByName(ifName)
+       if err != nil {
+               return fmt.Errorf("failed to lookup %q: %v", ifName, err)
+       }
+
+       if err = netlink.LinkDel(iface); err != nil {
+               return fmt.Errorf("failed to delete %q: %v", ifName, err)
+       }
+
+       return nil
+}
+
+// DelLinkByNameAddr remove an interface returns its IP address
+// of the specified family
+func DelLinkByNameAddr(ifName string, family int) (*net.IPNet, error) {
+       iface, err := netlink.LinkByName(ifName)
+       if err != nil {
+               return nil, fmt.Errorf("failed to lookup %q: %v", ifName, err)
+       }
+
+       addrs, err := netlink.AddrList(iface, family)
+       if err != nil || len(addrs) == 0 {
+               return nil, fmt.Errorf("failed to get IP addresses for %q: %v", ifName, err)
+       }
+
+       if err = netlink.LinkDel(iface); err != nil {
+               return nil, fmt.Errorf("failed to delete %q: %v", ifName, err)
+       }
+
+       return addrs[0].IPNet, nil
+}
diff --git a/pkg/ip/route.go b/pkg/ip/route.go
new file mode 100644 (file)
index 0000000..f310f1e
--- /dev/null
@@ -0,0 +1,47 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ip
+
+import (
+       "net"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink"
+)
+
+// AddDefaultRoute sets the default route on the given gateway.
+func AddDefaultRoute(gw net.IP, dev netlink.Link) error {
+       _, defNet, _ := net.ParseCIDR("0.0.0.0/0")
+       return AddRoute(defNet, gw, dev)
+}
+
+// AddRoute adds a universally-scoped route to a device.
+func AddRoute(ipn *net.IPNet, gw net.IP, dev netlink.Link) error {
+       return netlink.RouteAdd(&netlink.Route{
+               LinkIndex: dev.Attrs().Index,
+               Scope:     netlink.SCOPE_UNIVERSE,
+               Dst:       ipn,
+               Gw:        gw,
+       })
+}
+
+// AddHostRoute adds a host-scoped route to a device.
+func AddHostRoute(ipn *net.IPNet, gw net.IP, dev netlink.Link) error {
+       return netlink.RouteAdd(&netlink.Route{
+               LinkIndex: dev.Attrs().Index,
+               Scope:     netlink.SCOPE_HOST,
+               Dst:       ipn,
+               Gw:        gw,
+       })
+}
diff --git a/pkg/ns/ns.go b/pkg/ns/ns.go
new file mode 100644 (file)
index 0000000..82291f9
--- /dev/null
@@ -0,0 +1,81 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ns
+
+import (
+       "fmt"
+       "os"
+       "runtime"
+       "syscall"
+)
+
+var setNsMap = map[string]uintptr{
+       "386":   346,
+       "amd64": 308,
+       "arm":   374,
+}
+
+// SetNS sets the network namespace on a target file.
+func SetNS(f *os.File, flags uintptr) error {
+       if runtime.GOOS != "linux" {
+               return fmt.Errorf("unsupported OS: %s", runtime.GOOS)
+       }
+
+       trap, ok := setNsMap[runtime.GOARCH]
+       if !ok {
+               return fmt.Errorf("unsupported arch: %s", runtime.GOARCH)
+       }
+
+       _, _, err := syscall.RawSyscall(trap, f.Fd(), flags, 0)
+       if err != 0 {
+               return err
+       }
+
+       return nil
+}
+
+// WithNetNSPath executes the passed closure under the given network
+// namespace, restoring the original namespace afterwards.
+func WithNetNSPath(nspath string, f func(*os.File) error) error {
+       ns, err := os.Open(nspath)
+       if err != nil {
+               return fmt.Errorf("Failed to open %v: %v", nspath, err)
+       }
+       defer ns.Close()
+
+       return WithNetNS(ns, f)
+}
+
+// WithNetNS executes the passed closure under the given network
+// namespace, restoring the original namespace afterwards.
+func WithNetNS(ns *os.File, f func(*os.File) error) error {
+       // save a handle to current (host) network namespace
+       thisNS, err := os.Open("/proc/self/ns/net")
+       if err != nil {
+               return fmt.Errorf("Failed to open /proc/self/ns/net: %v", err)
+       }
+       defer thisNS.Close()
+
+       if err = SetNS(ns, syscall.CLONE_NEWNET); err != nil {
+               return fmt.Errorf("Error switching to ns %v: %v", ns.Name(), err)
+       }
+
+       if err = f(thisNS); err != nil {
+               return err
+       }
+
+       // switch back
+       return SetNS(thisNS, syscall.CLONE_NEWNET)
+}
diff --git a/pkg/plugin/ipam.go b/pkg/plugin/ipam.go
new file mode 100644 (file)
index 0000000..8b59cab
--- /dev/null
@@ -0,0 +1,136 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package plugin
+
+import (
+       "bytes"
+       "encoding/json"
+       "fmt"
+       "os"
+       "os/exec"
+       "path/filepath"
+       "strings"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink"
+       "github.com/appc/cni/pkg/ip"
+)
+
+// Find returns the full path of the plugin by searching in CNI_PATH
+func Find(plugin string) string {
+       paths := strings.Split(os.Getenv("CNI_PATH"), ":")
+
+       for _, p := range paths {
+               fullname := filepath.Join(p, plugin)
+               if fi, err := os.Stat(fullname); err == nil && fi.Mode().IsRegular() {
+                       return fullname
+               }
+       }
+
+       return ""
+}
+
+// ExecAdd executes IPAM plugin, assuming CNI_COMMAND == ADD.
+// Parses and returns resulting IPConfig
+func ExecAdd(plugin string, netconf []byte) (*Result, error) {
+       if os.Getenv("CNI_COMMAND") != "ADD" {
+               return nil, fmt.Errorf("CNI_COMMAND is not ADD")
+       }
+
+       pluginPath := Find(plugin)
+       if pluginPath == "" {
+               return nil, fmt.Errorf("could not find %q plugin", plugin)
+       }
+
+       stdout := &bytes.Buffer{}
+
+       c := exec.Cmd{
+               Path:   pluginPath,
+               Args:   []string{pluginPath},
+               Stdin:  bytes.NewBuffer(netconf),
+               Stdout: stdout,
+               Stderr: os.Stderr,
+       }
+       if err := c.Run(); err != nil {
+               return nil, err
+       }
+
+       res := &Result{}
+       err := json.Unmarshal(stdout.Bytes(), res)
+       return res, err
+}
+
+// ExecDel executes IPAM plugin, assuming CNI_COMMAND == DEL.
+func ExecDel(plugin string, netconf []byte) error {
+       if os.Getenv("CNI_COMMAND") != "DEL" {
+               return fmt.Errorf("CNI_COMMAND is not DEL")
+       }
+
+       pluginPath := Find(plugin)
+       if pluginPath == "" {
+               return fmt.Errorf("could not find %q plugin", plugin)
+       }
+
+       c := exec.Cmd{
+               Path:   pluginPath,
+               Args:   []string{pluginPath},
+               Stdin:  bytes.NewBuffer(netconf),
+               Stderr: os.Stderr,
+       }
+       return c.Run()
+}
+
+// ConfigureIface takes the result of IPAM plugin and
+// applies to the ifName interface
+func ConfigureIface(ifName string, res *Result) error {
+       link, err := netlink.LinkByName(ifName)
+       if err != nil {
+               return fmt.Errorf("failed to lookup %q: %v", ifName, err)
+       }
+
+       if err := netlink.LinkSetUp(link); err != nil {
+               return fmt.Errorf("failed too set %q UP: %v", ifName, err)
+       }
+
+       // TODO(eyakubovich): IPv6
+       addr := &netlink.Addr{IPNet: &res.IP4.IP, Label: ""}
+       if err = netlink.AddrAdd(link, addr); err != nil {
+               return fmt.Errorf("failed to add IP addr to %q: %v", ifName, err)
+       }
+
+       for _, r := range res.IP4.Routes {
+               gw := r.GW
+               if gw == nil {
+                       gw = res.IP4.Gateway
+               }
+               if err = ip.AddRoute(&r.Dst, gw, link); err != nil {
+                       // we skip over duplicate routes as we assume the first one wins
+                       if !os.IsExist(err) {
+                               return fmt.Errorf("failed to add route '%v via %v dev %v': %v", r.Dst, gw, ifName, err)
+                       }
+               }
+       }
+
+       return nil
+}
+
+// PrintResult writes out prettified Result to stdout
+func PrintResult(res *Result) error {
+       data, err := json.MarshalIndent(res, "", "    ")
+       if err != nil {
+               return err
+       }
+       _, err = os.Stdout.Write(data)
+       return err
+}
diff --git a/pkg/plugin/types.go b/pkg/plugin/types.go
new file mode 100644 (file)
index 0000000..6eb6ac2
--- /dev/null
@@ -0,0 +1,106 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package plugin
+
+import (
+       "encoding/json"
+       "net"
+
+       "github.com/appc/cni/pkg/ip"
+)
+
+// NetConf describes a network.
+type NetConf struct {
+       Name string `json:"name,omitempty"`
+       Type string `json:"type,omitempty"`
+       IPAM struct {
+               Type string `json:"type,omitempty"`
+       } `json:"ipam,omitempty"`
+}
+
+// Result is what gets returned from the plugin (via stdout) to the caller
+type Result struct {
+       IP4 *IPConfig `json:"ip4,omitempty"`
+       IP6 *IPConfig `json:"ip6,omitempty"`
+}
+
+// IPConfig contains values necessary to configure an interface
+type IPConfig struct {
+       IP      net.IPNet
+       Gateway net.IP
+       Routes  []Route
+}
+
+type Route struct {
+       Dst net.IPNet
+       GW  net.IP
+}
+
+// net.IPNet is not JSON (un)marshallable so this duality is needed
+// for our custom ip.IPNet type
+
+// JSON (un)marshallable types
+type ipConfig struct {
+       IP      ip.IPNet `json:"ip"`
+       Gateway net.IP   `json:"gateway,omitempty"`
+       Routes  []Route  `json:"routes,omitempty"`
+}
+
+type route struct {
+       Dst ip.IPNet `json:"dst"`
+       GW  net.IP   `json:"gw,omitempty"`
+}
+
+func (c *IPConfig) MarshalJSON() ([]byte, error) {
+       ipc := ipConfig{
+               IP:      ip.IPNet(c.IP),
+               Gateway: c.Gateway,
+               Routes:  c.Routes,
+       }
+
+       return json.Marshal(ipc)
+}
+
+func (c *IPConfig) UnmarshalJSON(data []byte) error {
+       ipc := ipConfig{}
+       if err := json.Unmarshal(data, &ipc); err != nil {
+               return err
+       }
+
+       c.IP = net.IPNet(ipc.IP)
+       c.Gateway = ipc.Gateway
+       c.Routes = ipc.Routes
+       return nil
+}
+
+func (r *Route) UnmarshalJSON(data []byte) error {
+       rt := route{}
+       if err := json.Unmarshal(data, &rt); err != nil {
+               return err
+       }
+
+       r.Dst = net.IPNet(rt.Dst)
+       r.GW = rt.GW
+       return nil
+}
+
+func (r *Route) MarshalJSON() ([]byte, error) {
+       rt := route{
+               Dst: ip.IPNet(r.Dst),
+               GW:  r.GW,
+       }
+
+       return json.Marshal(rt)
+}
diff --git a/pkg/skel/skel.go b/pkg/skel/skel.go
new file mode 100644 (file)
index 0000000..9f03335
--- /dev/null
@@ -0,0 +1,98 @@
+// Copyright 2014 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package skel provides skeleton code for a CNI plugin.
+// In particular, it implements argument parsing and validation.
+package skel
+
+import (
+       "io/ioutil"
+       "log"
+       "os"
+)
+
+// CmdArgs captures all the arguments passed in to the plugin
+// via both env vars and stdin
+type CmdArgs struct {
+       ContainerID string
+       Netns       string
+       IfName      string
+       Args        string
+       Path        string
+       StdinData   []byte
+}
+
+// PluginMain is the "main" for a plugin. It accepts
+// two callback functions for add and del commands.
+func PluginMain(cmdAdd, cmdDel func(_ *CmdArgs) error) {
+       var cmd, contID, netns, ifName, args, path string
+
+       vars := []struct {
+               name string
+               val  *string
+               req  bool
+       }{
+               {"CNI_COMMAND", &cmd, true},
+               {"CNI_CONTAINERID", &contID, false},
+               {"CNI_NETNS", &netns, true},
+               {"CNI_IFNAME", &ifName, true},
+               {"CNI_ARGS", &args, false},
+               {"CNI_PATH", &path, true},
+       }
+
+       argsMissing := false
+       for _, v := range vars {
+               *v.val = os.Getenv(v.name)
+               if v.req && *v.val == "" {
+                       log.Printf("%v env variable missing", v.name)
+                       argsMissing = true
+               }
+       }
+
+       if argsMissing {
+               os.Exit(1)
+       }
+
+       stdinData, err := ioutil.ReadAll(os.Stdin)
+       if err != nil {
+               log.Printf("Error reading from stdin: %v", err)
+               os.Exit(1)
+       }
+
+       cmdArgs := &CmdArgs{
+               ContainerID: contID,
+               Netns:       netns,
+               IfName:      ifName,
+               Args:        args,
+               Path:        path,
+               StdinData:   stdinData,
+       }
+
+       switch cmd {
+       case "ADD":
+               err = cmdAdd(cmdArgs)
+
+       case "DEL":
+               err = cmdDel(cmdArgs)
+
+       default:
+               log.Printf("Unknown CNI_COMMAND: %v", cmd)
+               os.Exit(1)
+       }
+
+       if err != nil {
+               log.Printf("%v: %v", cmd, err)
+               os.Exit(1)
+       }
+}
diff --git a/plugins/ipam/host-local/README.md b/plugins/ipam/host-local/README.md
new file mode 100644 (file)
index 0000000..27fe928
--- /dev/null
@@ -0,0 +1,86 @@
+# host-local IP address manager
+
+host-local IPAM allocates IPv4 and IPv6 addresses out of a specified address range.
+
+## Usage
+
+### Obtain an IP
+
+Given the following network configuration:
+
+```
+{
+    "name": "default",
+    "ipam": {
+        "type": "host-local",
+        "subnet": "203.0.113.0/24"
+    }
+}
+```
+
+#### Using the command line interface
+
+```
+$ export CNI_COMMAND=ADD
+$ export CNI_CONTAINERID=f81d4fae-7dec-11d0-a765-00a0c91e6bf6
+$ ./host-local < $conf
+```
+
+```
+{
+    "ip4": {
+        "ip": "203.0.113.1/24"
+    }
+}
+```
+
+## Backends
+
+By default ipmanager stores IP allocations on the local filesystem using the IP address as the file name and the ID as contents. For example:
+
+```
+$ ls /var/lib/cni/networks/default
+```
+```
+203.0.113.1    203.0.113.2
+```
+
+```
+$ cat /var/lib/cni/networks/default/203.0.113.1
+```
+```
+f81d4fae-7dec-11d0-a765-00a0c91e6bf6
+```
+
+## Configuration Files
+
+
+```
+{
+       "name": "ipv6",
+    "ipam": {
+               "type": "host-local",
+               "subnet": "3ffe:ffff:0:01ff::/64",
+               "range-start": "3ffe:ffff:0:01ff::0010",
+               "range-end": "3ffe:ffff:0:01ff::0020",
+               "routes": [
+                       "3ffe:ffff:0:01ff::1/64"
+               ]
+       }
+}
+```
+
+```
+{
+    "name": "ipv4",
+       "ipam": {
+               "type": "host-local",
+               "subnet": "203.0.113.1/24",
+               "range-start": "203.0.113.10",
+               "range-end": "203.0.113.20",
+               "routes": [
+                       "203.0.113.0/24"
+               ]
+       }
+}
+```
diff --git a/plugins/ipam/host-local/allocator.go b/plugins/ipam/host-local/allocator.go
new file mode 100644 (file)
index 0000000..0f3699d
--- /dev/null
@@ -0,0 +1,185 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+       "fmt"
+       "net"
+
+       "github.com/appc/cni/pkg/ip"
+       "github.com/appc/cni/pkg/plugin"
+       "github.com/appc/cni/plugins/ipam/host-local/backend"
+)
+
+type IPAllocator struct {
+       start net.IP
+       end   net.IP
+       conf  *IPAMConfig
+       store backend.Store
+}
+
+func NewIPAllocator(conf *IPAMConfig, store backend.Store) (*IPAllocator, error) {
+       var (
+               start net.IP
+               end   net.IP
+               err   error
+       )
+       start, end, err = networkRange((*net.IPNet)(&conf.Subnet))
+       if err != nil {
+               return nil, err
+       }
+
+       // skip the .0 address
+       start = ip.NextIP(start)
+
+       if conf.RangeStart != nil {
+               if err := validateRangeIP(conf.RangeStart, (*net.IPNet)(&conf.Subnet)); err != nil {
+                       return nil, err
+               }
+               start = conf.RangeStart
+       }
+       if conf.RangeEnd != nil {
+               if err := validateRangeIP(conf.RangeEnd, (*net.IPNet)(&conf.Subnet)); err != nil {
+                       return nil, err
+               }
+               // RangeEnd is inclusive
+               end = ip.NextIP(conf.RangeEnd)
+       }
+
+       return &IPAllocator{start, end, conf, store}, nil
+}
+
+func validateRangeIP(ip net.IP, ipnet *net.IPNet) error {
+       if !ipnet.Contains(ip) {
+               return fmt.Errorf("%s not in network: %s", ip, ipnet)
+       }
+       return nil
+}
+
+// Returns newly allocated IP along with its config
+func (a *IPAllocator) Get(id string) (*plugin.IPConfig, error) {
+       a.store.Lock()
+       defer a.store.Unlock()
+
+       gw := a.conf.Gateway
+       if gw == nil {
+               gw = ip.NextIP(a.conf.Subnet.IP)
+       }
+
+       for cur := a.start; !cur.Equal(a.end); cur = ip.NextIP(cur) {
+               // don't allocate gateway IP
+               if gw != nil && cur.Equal(gw) {
+                       continue
+               }
+
+               reserved, err := a.store.Reserve(id, cur)
+               if err != nil {
+                       return nil, err
+               }
+               if reserved {
+                       return &plugin.IPConfig{
+                               IP:      net.IPNet{cur, a.conf.Subnet.Mask},
+                               Gateway: gw,
+                               Routes:  a.conf.Routes,
+                       }, nil
+               }
+       }
+
+       return nil, fmt.Errorf("no IP addresses available in network: %s", a.conf.Name)
+}
+
+// Allocates both an IP and the Gateway IP, i.e. a /31
+// This is used for Point-to-Point links
+func (a *IPAllocator) GetPtP(id string) (*plugin.IPConfig, error) {
+       a.store.Lock()
+       defer a.store.Unlock()
+
+       for cur := a.start; !cur.Equal(a.end); cur = ip.NextIP(cur) {
+               // we're looking for unreserved even, odd pair
+               if !evenIP(cur) {
+                       continue
+               }
+
+               gw := cur
+               reserved, err := a.store.Reserve(id, gw)
+               if err != nil {
+                       return nil, err
+               }
+               if reserved {
+                       cur = ip.NextIP(cur)
+                       if cur.Equal(a.end) {
+                               break
+                       }
+
+                       reserved, err := a.store.Reserve(id, cur)
+                       if err != nil {
+                               return nil, err
+                       }
+                       if reserved {
+                               // found them both!
+                               _, bits := a.conf.Subnet.Mask.Size()
+                               mask := net.CIDRMask(bits-1, bits)
+
+                               return &plugin.IPConfig{
+                                       IP:      net.IPNet{cur, mask},
+                                       Gateway: gw,
+                                       Routes:  a.conf.Routes,
+                               }, nil
+                       }
+               }
+       }
+
+       return nil, fmt.Errorf("no ip addresses available in network: %s", a.conf.Name)
+}
+
+// Releases all IPs allocated for the container with given ID
+func (a *IPAllocator) Release(id string) error {
+       a.store.Lock()
+       defer a.store.Unlock()
+
+       return a.store.ReleaseByID(id)
+}
+
+func networkRange(ipnet *net.IPNet) (net.IP, net.IP, error) {
+       ip := ipnet.IP.To4()
+       if ip == nil {
+               ip = ipnet.IP.To16()
+               if ip == nil {
+                       return nil, nil, fmt.Errorf("IP not v4 nor v6")
+               }
+       }
+
+       if len(ip) != len(ipnet.Mask) {
+               return nil, nil, fmt.Errorf("IPNet IP and Mask version mismatch")
+       }
+
+       var end net.IP
+       for i := 0; i < len(ip); i++ {
+               end = append(end, ip[i]|^ipnet.Mask[i])
+       }
+       return ipnet.IP, end, nil
+}
+
+func evenIP(ip net.IP) bool {
+       i := ip.To4()
+       if i == nil {
+               i = ip.To16()
+               if i == nil {
+                       panic("IP is not v4 or v6")
+               }
+       }
+
+       return i[len(i)-1]%2 == 0
+}
diff --git a/plugins/ipam/host-local/backend/disk/backend.go b/plugins/ipam/host-local/backend/disk/backend.go
new file mode 100644 (file)
index 0000000..15a67fe
--- /dev/null
@@ -0,0 +1,88 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disk
+
+import (
+       "io/ioutil"
+       "net"
+       "os"
+       "path/filepath"
+)
+
+var defaultDataDir = "/var/lib/cni/networks"
+
+type Store struct {
+       FileLock
+       dataDir string
+}
+
+func New(network string) (*Store, error) {
+       dir := filepath.Join(defaultDataDir, network)
+       if err := os.MkdirAll(dir, 0644); err != nil {
+               return nil, err
+       }
+
+       lk, err := NewFileLock(dir)
+       if err != nil {
+               return nil, err
+       }
+       return &Store{*lk, dir}, nil
+}
+
+func (s *Store) Reserve(id string, ip net.IP) (bool, error) {
+       fname := filepath.Join(s.dataDir, ip.String())
+       f, err := os.OpenFile(fname, os.O_RDWR|os.O_EXCL|os.O_CREATE, 0644)
+       if os.IsExist(err) {
+               return false, nil
+       }
+       if err != nil {
+               return false, err
+       }
+       if _, err := f.WriteString(id); err != nil {
+               f.Close()
+               os.Remove(f.Name())
+               return false, err
+       }
+       if err := f.Close(); err != nil {
+               os.Remove(f.Name())
+               return false, err
+       }
+       return true, nil
+}
+
+func (s *Store) Release(ip net.IP) error {
+       return os.Remove(filepath.Join(s.dataDir, ip.String()))
+}
+
+// N.B. This function eats errors to be tolerant and
+// release as much as possible
+func (s *Store) ReleaseByID(id string) error {
+       err := filepath.Walk(s.dataDir, func(path string, info os.FileInfo, err error) error {
+               if err != nil || info.IsDir() {
+                       return nil
+               }
+               data, err := ioutil.ReadFile(path)
+               if err != nil {
+                       return nil
+               }
+               if string(data) == id {
+                       if err := os.Remove(path); err != nil {
+                               return nil
+                       }
+               }
+               return nil
+       })
+       return err
+}
diff --git a/plugins/ipam/host-local/backend/disk/lock.go b/plugins/ipam/host-local/backend/disk/lock.go
new file mode 100644 (file)
index 0000000..febb11c
--- /dev/null
@@ -0,0 +1,50 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disk
+
+import (
+       "os"
+       "syscall"
+)
+
+// FileLock wraps os.File to be used as a lock using flock
+type FileLock struct {
+       f *os.File
+}
+
+// NewFileLock opens file/dir at path and returns unlocked FileLock object
+func NewFileLock(path string) (*FileLock, error) {
+       f, err := os.Open(path)
+       if err != nil {
+               return nil, err
+       }
+
+       return &FileLock{f}, nil
+}
+
+// Close closes underlying file
+func (l *FileLock) Close() error {
+       return l.f.Close()
+}
+
+// Lock acquires an exclusive lock
+func (l *FileLock) Lock() error {
+       return syscall.Flock(int(l.f.Fd()), syscall.LOCK_EX)
+}
+
+// Unlock releases the lock
+func (l *FileLock) Unlock() error {
+       return syscall.Flock(int(l.f.Fd()), syscall.LOCK_UN)
+}
diff --git a/plugins/ipam/host-local/backend/store.go b/plugins/ipam/host-local/backend/store.go
new file mode 100644 (file)
index 0000000..b06bd18
--- /dev/null
@@ -0,0 +1,26 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package backend
+
+import "net"
+
+type Store interface {
+       Lock() error
+       Unlock() error
+       Close() error
+       Reserve(id string, ip net.IP) (bool, error)
+       Release(ip net.IP) error
+       ReleaseByID(id string) error
+}
diff --git a/plugins/ipam/host-local/config.go b/plugins/ipam/host-local/config.go
new file mode 100644 (file)
index 0000000..1dd017c
--- /dev/null
@@ -0,0 +1,57 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+       "encoding/json"
+       "fmt"
+       "net"
+
+       "github.com/appc/cni/pkg/ip"
+       "github.com/appc/cni/pkg/plugin"
+)
+
+// IPAMConfig represents the IP related network configuration.
+type IPAMConfig struct {
+       Name       string
+       Type       string         `json:"type"`
+       RangeStart net.IP         `json:"rangeStart"`
+       RangeEnd   net.IP         `json:"rangeEnd"`
+       Subnet     ip.IPNet       `json:"subnet"`
+       Gateway    net.IP         `json:"gateway"`
+       Routes     []plugin.Route `json:"routes"`
+}
+
+type Net struct {
+       Name string      `json:"name"`
+       IPAM *IPAMConfig `json:"ipam"`
+}
+
+// NewIPAMConfig creates a NetworkConfig from the given network name.
+func LoadIPAMConfig(bytes []byte) (*IPAMConfig, error) {
+       n := Net{}
+       if err := json.Unmarshal(bytes, &n); err != nil {
+               return nil, err
+       }
+
+       if n.IPAM == nil {
+               return nil, fmt.Errorf("%q missing 'ipam' key")
+       }
+
+       // Copy net name into IPAM so not to drag Net struct around
+       n.IPAM.Name = n.Name
+
+       return n.IPAM, nil
+}
diff --git a/plugins/ipam/host-local/main.go b/plugins/ipam/host-local/main.go
new file mode 100644 (file)
index 0000000..b76d6ce
--- /dev/null
@@ -0,0 +1,85 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+       "errors"
+
+       "github.com/appc/cni/plugins/ipam/host-local/backend/disk"
+
+       "github.com/appc/cni/pkg/plugin"
+       "github.com/appc/cni/pkg/skel"
+)
+
+func main() {
+       skel.PluginMain(cmdAdd, cmdDel)
+}
+
+func cmdAdd(args *skel.CmdArgs) error {
+       ipamConf, err := LoadIPAMConfig(args.StdinData)
+       if err != nil {
+               return err
+       }
+
+       store, err := disk.New(ipamConf.Name)
+       if err != nil {
+               return err
+       }
+       defer store.Close()
+
+       allocator, err := NewIPAllocator(ipamConf, store)
+       if err != nil {
+               return err
+       }
+
+       var ipConf *plugin.IPConfig
+
+       switch ipamConf.Type {
+       case "host-local":
+               ipConf, err = allocator.Get(args.Netns)
+       case "host-local-ptp":
+               ipConf, err = allocator.GetPtP(args.Netns)
+       default:
+               return errors.New("Unsupported IPAM plugin type")
+       }
+
+       if err != nil {
+               return err
+       }
+
+       return plugin.PrintResult(&plugin.Result{
+               IP4: ipConf,
+       })
+}
+
+func cmdDel(args *skel.CmdArgs) error {
+       ipamConf, err := LoadIPAMConfig(args.StdinData)
+       if err != nil {
+               return err
+       }
+
+       store, err := disk.New(ipamConf.Name)
+       if err != nil {
+               return err
+       }
+       defer store.Close()
+
+       allocator, err := NewIPAllocator(ipamConf, store)
+       if err != nil {
+               return err
+       }
+
+       return allocator.Release(args.Netns)
+}
diff --git a/plugins/main/bridge/bridge.go b/plugins/main/bridge/bridge.go
new file mode 100644 (file)
index 0000000..9257679
--- /dev/null
@@ -0,0 +1,243 @@
+// Copyright 2014 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+       "encoding/json"
+       "errors"
+       "fmt"
+       "net"
+       "os"
+       "runtime"
+       "syscall"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink"
+       "github.com/appc/cni/pkg/ip"
+       "github.com/appc/cni/pkg/ns"
+       "github.com/appc/cni/pkg/plugin"
+       "github.com/appc/cni/pkg/skel"
+)
+
+const defaultBrName = "cni0"
+
+type NetConf struct {
+       plugin.NetConf
+       BrName string `json:"bridge"`
+       IsGW   bool   `json:"isGateway"`
+       IPMasq bool   `json:"ipMasq"`
+       MTU    int    `json:"mtu"`
+}
+
+func init() {
+       // this ensures that main runs only on main thread (thread group leader).
+       // since namespace ops (unshare, setns) are done for a single thread, we
+       // must ensure that the goroutine does not jump from OS thread to thread
+       runtime.LockOSThread()
+}
+
+func loadNetConf(bytes []byte) (*NetConf, error) {
+       n := &NetConf{
+               BrName: defaultBrName,
+       }
+       if err := json.Unmarshal(bytes, n); err != nil {
+               return nil, fmt.Errorf("failed to load netconf: %v", err)
+       }
+       return n, nil
+}
+
+func ensureBridgeAddr(br *netlink.Bridge, ipn *net.IPNet) error {
+       addrs, err := netlink.AddrList(br, syscall.AF_INET)
+       if err != nil && err != syscall.ENOENT {
+               return fmt.Errorf("could not get list of IP addresses: %v", err)
+       }
+
+       // if there're no addresses on the bridge, it's ok -- we'll add one
+       if len(addrs) > 0 {
+               ipnStr := ipn.String()
+               for _, a := range addrs {
+                       // string comp is actually easiest for doing IPNet comps
+                       if a.IPNet.String() == ipnStr {
+                               return nil
+                       }
+               }
+               return fmt.Errorf("%q already has an IP address different from %v", br.Name, ipn.String())
+       }
+
+       addr := &netlink.Addr{IPNet: ipn, Label: ""}
+       if err := netlink.AddrAdd(br, addr); err != nil {
+               return fmt.Errorf("could not add IP address to %q: %v", br.Name, err)
+       }
+       return nil
+}
+
+func bridgeByName(name string) (*netlink.Bridge, error) {
+       l, err := netlink.LinkByName(name)
+       if err != nil {
+               return nil, fmt.Errorf("could not lookup %q: %v", name, err)
+       }
+       br, ok := l.(*netlink.Bridge)
+       if !ok {
+               return nil, fmt.Errorf("%q already exists but is not a bridge", name)
+       }
+       return br, nil
+}
+
+func ensureBridge(brName string, mtu int, ipn *net.IPNet) (*netlink.Bridge, error) {
+       br := &netlink.Bridge{
+               LinkAttrs: netlink.LinkAttrs{
+                       Name: brName,
+                       MTU:  mtu,
+               },
+       }
+
+       if err := netlink.LinkAdd(br); err != nil {
+               if err != syscall.EEXIST {
+                       return nil, fmt.Errorf("could not add %q: %v", brName, err)
+               }
+
+               // it's ok if the device already exists as long as config is similar
+               br, err = bridgeByName(brName)
+               if err != nil {
+                       return nil, err
+               }
+       }
+
+       if err := netlink.LinkSetUp(br); err != nil {
+               return nil, err
+       }
+
+       if ipn != nil {
+               return br, ensureBridgeAddr(br, ipn)
+       }
+
+       return br, nil
+}
+
+func setupVeth(netns string, br *netlink.Bridge, ifName string, mtu int, pr *plugin.Result) error {
+       var hostVethName string
+
+       err := ns.WithNetNSPath(netns, func(hostNS *os.File) error {
+               // create the veth pair in the container and move host end into host netns
+               hostVeth, _, err := ip.SetupVeth(netns, ifName, mtu, hostNS)
+               if err != nil {
+                       return err
+               }
+
+               if err = plugin.ConfigureIface(ifName, pr); err != nil {
+                       return err
+               }
+
+               hostVethName = hostVeth.Attrs().Name
+               return nil
+       })
+       if err != nil {
+               return err
+       }
+
+       // need to lookup hostVeth again as its index has changed during ns move
+       hostVeth, err := netlink.LinkByName(hostVethName)
+       if err != nil {
+               return fmt.Errorf("failed to lookup %q: %v", hostVethName, err)
+       }
+
+       // connect host veth end to the bridge
+       if err = netlink.LinkSetMaster(hostVeth, br); err != nil {
+               return fmt.Errorf("failed to connect %q to bridge %v: %v", hostVethName, br.Attrs().Name, err)
+       }
+
+       return nil
+}
+
+func calcGatewayIP(ipn *net.IPNet) net.IP {
+       nid := ipn.IP.Mask(ipn.Mask)
+       return ip.NextIP(nid)
+}
+
+func setupBridge(n *NetConf, ipConf *plugin.IPConfig) (*netlink.Bridge, error) {
+       var gwn *net.IPNet
+       if n.IsGW {
+               gwn = &net.IPNet{
+                       IP:   ipConf.Gateway,
+                       Mask: ipConf.IP.Mask,
+               }
+       }
+
+       // create bridge if necessary
+       br, err := ensureBridge(n.BrName, n.MTU, gwn)
+       if err != nil {
+               return nil, fmt.Errorf("failed to create bridge %q: %v", n.BrName, err)
+       }
+
+       return br, nil
+}
+
+func cmdAdd(args *skel.CmdArgs) error {
+       n, err := loadNetConf(args.StdinData)
+       if err != nil {
+               return err
+       }
+
+       // run the IPAM plugin and get back the config to apply
+       result, err := plugin.ExecAdd(n.IPAM.Type, args.StdinData)
+       if err != nil {
+               return err
+       }
+
+       if result.IP4 == nil {
+               return errors.New("IPAM plugin returned missing IPv4 config")
+       }
+
+       if result.IP4.Gateway == nil && n.IsGW {
+               result.IP4.Gateway = calcGatewayIP(&result.IP4.IP)
+       }
+
+       br, err := setupBridge(n, result.IP4)
+       if err != nil {
+               return err
+       }
+
+       if err = setupVeth(args.Netns, br, args.IfName, n.MTU, result); err != nil {
+               return err
+       }
+
+       if n.IPMasq {
+               chain := "CNI-" + n.Name
+               if err = ip.SetupIPMasq(ip.Network(&result.IP4.IP), chain); err != nil {
+                       return err
+               }
+       }
+
+       return plugin.PrintResult(result)
+}
+
+func cmdDel(args *skel.CmdArgs) error {
+       n, err := loadNetConf(args.StdinData)
+       if err != nil {
+               return err
+       }
+
+       err = ns.WithNetNSPath(args.Netns, func(hostNS *os.File) error {
+               return ip.DelLinkByName(args.IfName)
+       })
+       if err != nil {
+               return err
+       }
+
+       return plugin.ExecDel(n.IPAM.Type, args.StdinData)
+}
+
+func main() {
+       skel.PluginMain(cmdAdd, cmdDel)
+}
diff --git a/plugins/main/macvlan/macvlan.go b/plugins/main/macvlan/macvlan.go
new file mode 100644 (file)
index 0000000..7ddfab8
--- /dev/null
@@ -0,0 +1,177 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+       "encoding/json"
+       "errors"
+       "fmt"
+       "os"
+       "runtime"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink"
+       "github.com/appc/cni/pkg/ip"
+       "github.com/appc/cni/pkg/ns"
+       "github.com/appc/cni/pkg/plugin"
+       "github.com/appc/cni/pkg/skel"
+)
+
+type NetConf struct {
+       plugin.NetConf
+       Master string `json:"master"`
+       Mode   string `json:"mode"`
+       IPMasq bool   `json:"ipMasq"`
+       MTU    int    `json:"mtu"`
+}
+
+func init() {
+       // this ensures that main runs only on main thread (thread group leader).
+       // since namespace ops (unshare, setns) are done for a single thread, we
+       // must ensure that the goroutine does not jump from OS thread to thread
+       runtime.LockOSThread()
+}
+
+func loadConf(bytes []byte) (*NetConf, error) {
+       n := &NetConf{}
+       if err := json.Unmarshal(bytes, n); err != nil {
+               return nil, fmt.Errorf("failed to load netconf: %v", err)
+       }
+       if n.Master == "" {
+               return nil, fmt.Errorf(`"master" field is required. It specifies the host interface name to virtualize`)
+       }
+       return n, nil
+}
+
+func modeFromString(s string) (netlink.MacvlanMode, error) {
+       switch s {
+       case "":
+               return netlink.MACVLAN_MODE_BRIDGE, nil
+       case "private":
+               return netlink.MACVLAN_MODE_PRIVATE, nil
+       case "vepa":
+               return netlink.MACVLAN_MODE_VEPA, nil
+       case "bridge":
+               return netlink.MACVLAN_MODE_BRIDGE, nil
+       case "passthru":
+               return netlink.MACVLAN_MODE_PASSTHRU, nil
+       default:
+               return 0, fmt.Errorf("unknown macvlan mode: %q", s)
+       }
+}
+
+func createMacvlan(conf *NetConf, ifName string, netns *os.File) error {
+       mode, err := modeFromString(conf.Mode)
+       if err != nil {
+               return err
+       }
+
+       m, err := netlink.LinkByName(conf.Master)
+       if err != nil {
+               return fmt.Errorf("failed to lookup master %q: %v", conf.Master, err)
+       }
+
+       mv := &netlink.Macvlan{
+               LinkAttrs: netlink.LinkAttrs{
+                       MTU:         conf.MTU,
+                       Name:        ifName,
+                       ParentIndex: m.Attrs().Index,
+                       Namespace:   netlink.NsFd(int(netns.Fd())),
+               },
+               Mode: mode,
+       }
+
+       if err := netlink.LinkAdd(mv); err != nil {
+               return fmt.Errorf("failed to create macvlan: %v", err)
+       }
+
+       return err
+}
+
+func cmdAdd(args *skel.CmdArgs) error {
+       n, err := loadConf(args.StdinData)
+       if err != nil {
+               return err
+       }
+
+       netns, err := os.Open(args.Netns)
+       if err != nil {
+               return fmt.Errorf("failed to open netns %q: %v", netns, err)
+       }
+       defer netns.Close()
+
+       tmpName := ip.RandomVethName(args.Netns)
+       if err = createMacvlan(n, tmpName, netns); err != nil {
+               return err
+       }
+
+       // run the IPAM plugin and get back the config to apply
+       result, err := plugin.ExecAdd(n.IPAM.Type, args.StdinData)
+       if err != nil {
+               return err
+       }
+       if result.IP4 == nil {
+               return errors.New("IPAM plugin returned missing IPv4 config")
+       }
+
+       err = ns.WithNetNS(netns, func(_ *os.File) error {
+               err := renameLink(tmpName, args.IfName)
+               if err != nil {
+                       return fmt.Errorf("failed to rename macvlan to %q: %v", args.IfName, err)
+               }
+
+               return plugin.ConfigureIface(args.IfName, result)
+       })
+       if err != nil {
+               return err
+       }
+
+       if n.IPMasq {
+               chain := "CNI-" + n.Name
+               if err = ip.SetupIPMasq(ip.Network(&result.IP4.IP), chain); err != nil {
+                       return err
+               }
+       }
+
+       return plugin.PrintResult(result)
+}
+
+func cmdDel(args *skel.CmdArgs) error {
+       n, err := loadConf(args.StdinData)
+       if err != nil {
+               return err
+       }
+
+       err = plugin.ExecDel(n.IPAM.Type, args.StdinData)
+       if err != nil {
+               return err
+       }
+
+       return ns.WithNetNSPath(args.Netns, func(hostNS *os.File) error {
+               return ip.DelLinkByName(args.IfName)
+       })
+}
+
+func renameLink(curName, newName string) error {
+       link, err := netlink.LinkByName(curName)
+       if err != nil {
+               return err
+       }
+
+       return netlink.LinkSetName(link, newName)
+}
+
+func main() {
+       skel.PluginMain(cmdAdd, cmdDel)
+}
diff --git a/plugins/main/veth/veth.go b/plugins/main/veth/veth.go
new file mode 100644 (file)
index 0000000..2e1783e
--- /dev/null
@@ -0,0 +1,158 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+       "crypto/sha512"
+       "encoding/json"
+       "errors"
+       "fmt"
+       "net"
+       "os"
+       "runtime"
+
+       "github.com/appc/cni/Godeps/_workspace/src/github.com/vishvananda/netlink"
+
+       "github.com/appc/cni/pkg/ip"
+       "github.com/appc/cni/pkg/ns"
+       "github.com/appc/cni/pkg/plugin"
+       "github.com/appc/cni/pkg/skel"
+)
+
+func init() {
+       // this ensures that main runs only on main thread (thread group leader).
+       // since namespace ops (unshare, setns) are done for a single thread, we
+       // must ensure that the goroutine does not jump from OS thread to thread
+       runtime.LockOSThread()
+}
+
+type NetConf struct {
+       plugin.NetConf
+       IPMasq bool `json:"ipMasq"`
+       MTU    int  `json:"mtu"`
+}
+
+func setupContainerVeth(netns, ifName string, mtu int, pr *plugin.Result) (string, error) {
+       var hostVethName string
+       err := ns.WithNetNSPath(netns, func(hostNS *os.File) error {
+               entropy := netns + ifName
+
+               hostVeth, _, err := ip.SetupVeth(entropy, ifName, mtu, hostNS)
+               if err != nil {
+                       return err
+               }
+
+               err = plugin.ConfigureIface(ifName, pr)
+               if err != nil {
+                       return err
+               }
+
+               hostVethName = hostVeth.Attrs().Name
+
+               return nil
+       })
+       return hostVethName, err
+}
+
+func setupHostVeth(vethName string, ipConf *plugin.IPConfig) error {
+       // hostVeth moved namespaces and may have a new ifindex
+       veth, err := netlink.LinkByName(vethName)
+       if err != nil {
+               return fmt.Errorf("failed to lookup %q: %v", vethName, err)
+       }
+
+       // TODO(eyakubovich): IPv6
+       ipn := &net.IPNet{
+               IP:   ipConf.Gateway,
+               Mask: net.CIDRMask(31, 32),
+       }
+       addr := &netlink.Addr{IPNet: ipn, Label: ""}
+       if err = netlink.AddrAdd(veth, addr); err != nil {
+               return fmt.Errorf("failed to add IP addr (%#v) to veth: %v", ipn, err)
+       }
+
+       // dst happens to be the same as IP/net of host veth
+       if err = ip.AddHostRoute(ipn, nil, veth); err != nil && !os.IsExist(err) {
+               return fmt.Errorf("failed to add route on host: %v", err)
+       }
+
+       return nil
+}
+
+func cmdAdd(args *skel.CmdArgs) error {
+       conf := NetConf{}
+       if err := json.Unmarshal(args.StdinData, &conf); err != nil {
+               return fmt.Errorf("failed to load netconf: %v", err)
+       }
+
+       // run the IPAM plugin and get back the config to apply
+       result, err := plugin.ExecAdd(conf.IPAM.Type, args.StdinData)
+       if err != nil {
+               return err
+       }
+       if result.IP4 == nil {
+               return errors.New("IPAM plugin returned missing IPv4 config")
+       }
+
+       hostVethName, err := setupContainerVeth(args.Netns, args.IfName, conf.MTU, result)
+       if err != nil {
+               return err
+       }
+
+       if err = setupHostVeth(hostVethName, result.IP4); err != nil {
+               return err
+       }
+
+       if conf.IPMasq {
+               h := sha512.Sum512([]byte(args.Netns))
+               chain := fmt.Sprintf("CNI-%s-%x", conf.Name, h[:8])
+               if err = ip.SetupIPMasq(&result.IP4.IP, chain); err != nil {
+                       return err
+               }
+       }
+
+       return plugin.PrintResult(result)
+}
+
+func cmdDel(args *skel.CmdArgs) error {
+       conf := NetConf{}
+       if err := json.Unmarshal(args.StdinData, &conf); err != nil {
+               return fmt.Errorf("failed to load netconf: %v", err)
+       }
+
+       var ipn *net.IPNet
+       err := ns.WithNetNSPath(args.Netns, func(hostNS *os.File) error {
+               var err error
+               ipn, err = ip.DelLinkByNameAddr(args.IfName, netlink.FAMILY_V4)
+               return err
+       })
+       if err != nil {
+               return err
+       }
+
+       if conf.IPMasq {
+               h := sha512.Sum512([]byte(args.Netns))
+               chain := fmt.Sprintf("CNI-%s-%x", conf.Name, h[:8])
+               if err = ip.TeardownIPMasq(ipn, chain); err != nil {
+                       return err
+               }
+       }
+
+       return plugin.ExecDel(conf.IPAM.Type, args.StdinData)
+}
+
+func main() {
+       skel.PluginMain(cmdAdd, cmdDel)
+}
diff --git a/scripts/docker-run.sh b/scripts/docker-run.sh
new file mode 100755 (executable)
index 0000000..f969b8b
--- /dev/null
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Run a docker container with network namespace set up by the
+# CNI plugins.
+
+# Example usage: ./docker-run.sh --rm busybox /sbin/ifconfig
+
+contid=$(docker run -d --net=none busybox:latest /bin/sleep 10000000)
+pid=$(docker inspect -f '{{ .State.Pid }}' $contid)
+netnspath=/proc/$pid/ns/net
+
+./exec-plugins.sh add $netnspath
+
+function cleanup() {
+       ./exec-plugins.sh del $netnspath
+       docker kill $contid >/dev/null
+}
+trap cleanup EXIT
+
+docker run --net=container:$contid $@
+
diff --git a/scripts/exec-plugins.sh b/scripts/exec-plugins.sh
new file mode 100755 (executable)
index 0000000..26453bd
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/bash -e
+
+NETCONFPATH=${NETCONFPATH-/etc/cni/net.d}
+
+function exec_plugins() {
+       i=0
+       netns=$2
+       export CNI_COMMAND=$(echo $1 | tr '[:lower:]' '[:upper:]')
+       export PATH=$CNI_PATH:$PATH
+       export CNI_NETNS=$netns
+
+       for netconf in $(echo $NETCONFPATH/*.conf | sort); do
+               plugin=$(jq -r '.type' <$netconf)
+               export CNI_IFNAME=$(printf eth%d $i)
+
+               $plugin <$netconf >/dev/null
+
+               let "i=i+1"
+       done
+}
+
+if [ $# -ne 2 ]; then
+       echo "Usage: $0 add|del NETNS-PATH"
+       echo "  Adds or deletes the container specified by NETNS-PATH to the networks"
+       echo "  specified in \$NETCONFPATH directory"
+       exit 1
+fi
+
+exec_plugins $1 $2
diff --git a/scripts/priv-net-run.sh b/scripts/priv-net-run.sh
new file mode 100755 (executable)
index 0000000..1d9fb53
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+
+# Run a command in a private network namespace
+# set up by CNI plugins
+
+netnsname=$(printf '%x%x' $RANDOM $RANDOM)
+netnspath=/var/run/netns/$netnsname
+
+ip netns add $netnsname
+ip netns exec $netnsname ip link set lo up
+./exec-plugins.sh add $netnspath
+
+
+function cleanup() {
+       ./exec-plugins.sh del $netnspath
+       ip netns delete $netnsname
+}
+trap cleanup EXIT
+
+ip netns exec $netnsname $@