From c4a34b2fe6f7317bf17fa478a734e5bdab30c278 Mon Sep 17 00:00:00 2001 From: HuabingZhao Date: Mon, 4 Sep 2017 19:33:48 +0800 Subject: Use maven to build kube2msb Issue-Id: OOM-61 Change-Id: Ic7e733c95e28b75b66535b343ae22c893db24531 Signed-off-by: HuabingZhao --- .../runc/libcontainer/cgroups/cgroups.go | 64 ++++ .../libcontainer/cgroups/cgroups_unsupported.go | 3 + .../runc/libcontainer/cgroups/fs/apply_raw.go | 402 ++++++++++++++++++++ .../runc/libcontainer/cgroups/fs/blkio.go | 237 ++++++++++++ .../runc/libcontainer/cgroups/fs/cpu.go | 94 +++++ .../runc/libcontainer/cgroups/fs/cpuacct.go | 121 ++++++ .../runc/libcontainer/cgroups/fs/cpuset.go | 139 +++++++ .../runc/libcontainer/cgroups/fs/devices.go | 78 ++++ .../runc/libcontainer/cgroups/fs/freezer.go | 61 +++ .../runc/libcontainer/cgroups/fs/fs_unsupported.go | 3 + .../runc/libcontainer/cgroups/fs/hugetlb.go | 71 ++++ .../runc/libcontainer/cgroups/fs/memory.go | 291 +++++++++++++++ .../runc/libcontainer/cgroups/fs/name.go | 40 ++ .../runc/libcontainer/cgroups/fs/net_cls.go | 41 ++ .../runc/libcontainer/cgroups/fs/net_prio.go | 41 ++ .../runc/libcontainer/cgroups/fs/perf_event.go | 35 ++ .../runc/libcontainer/cgroups/fs/pids.go | 73 ++++ .../runc/libcontainer/cgroups/fs/utils.go | 78 ++++ .../runc/libcontainer/cgroups/stats.go | 106 ++++++ .../runc/libcontainer/cgroups/utils.go | 413 +++++++++++++++++++++ .../runc/libcontainer/configs/blkio_device.go | 61 +++ .../runc/libcontainer/configs/cgroup_unix.go | 124 +++++++ .../libcontainer/configs/cgroup_unsupported.go | 6 + .../runc/libcontainer/configs/cgroup_windows.go | 6 + .../runc/libcontainer/configs/config.go | 332 +++++++++++++++++ .../runc/libcontainer/configs/config_unix.go | 51 +++ .../runc/libcontainer/configs/device.go | 57 +++ .../runc/libcontainer/configs/device_defaults.go | 125 +++++++ .../runc/libcontainer/configs/hugepage_limit.go | 9 + .../libcontainer/configs/interface_priority_map.go | 14 + .../runc/libcontainer/configs/mount.go | 30 ++ .../runc/libcontainer/configs/namespaces.go | 5 + .../libcontainer/configs/namespaces_syscall.go | 31 ++ .../configs/namespaces_syscall_unsupported.go | 15 + .../runc/libcontainer/configs/namespaces_unix.go | 127 +++++++ .../libcontainer/configs/namespaces_unsupported.go | 8 + .../runc/libcontainer/configs/network.go | 72 ++++ .../runc/libcontainer/system/linux.go | 143 +++++++ .../runc/libcontainer/system/proc.go | 27 ++ .../runc/libcontainer/system/setns_linux.go | 40 ++ .../runc/libcontainer/system/syscall_linux_386.go | 25 ++ .../runc/libcontainer/system/syscall_linux_64.go | 25 ++ .../runc/libcontainer/system/syscall_linux_arm.go | 25 ++ .../runc/libcontainer/system/sysconfig.go | 12 + .../runc/libcontainer/system/sysconfig_notcgo.go | 15 + .../runc/libcontainer/system/unsupported.go | 9 + .../runc/libcontainer/system/xattrs_linux.go | 99 +++++ .../runc/libcontainer/utils/utils.go | 121 ++++++ .../runc/libcontainer/utils/utils_unix.go | 33 ++ 49 files changed, 4038 insertions(+) create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unix.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go create mode 100644 src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go (limited to 'src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer') diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go new file mode 100644 index 0000000..274ab47 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go @@ -0,0 +1,64 @@ +// +build linux + +package cgroups + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager interface { + // Applies cgroup configuration to the process with the specified pid + Apply(pid int) error + + // Returns the PIDs inside the cgroup set + GetPids() ([]int, error) + + // Returns the PIDs inside the cgroup set & all sub-cgroups + GetAllPids() ([]int, error) + + // Returns statistics for the cgroup set + GetStats() (*Stats, error) + + // Toggles the freezer cgroup according with specified state + Freeze(state configs.FreezerState) error + + // Destroys the cgroup set + Destroy() error + + // NewCgroupManager() and LoadCgroupManager() require following attributes: + // Paths map[string]string + // Cgroups *cgroups.Cgroup + // Paths maps cgroup subsystem to path at which it is mounted. + // Cgroups specifies specific cgroup settings for the various subsystems + + // Returns cgroup paths to save in a state file and to be able to + // restore the object later. + GetPaths() map[string]string + + // Set the cgroup as configured. + Set(container *configs.Config) error +} + +type NotFoundError struct { + Subsystem string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) +} + +func NewNotFoundError(sub string) error { + return &NotFoundError{ + Subsystem: sub, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go new file mode 100644 index 0000000..278d507 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package cgroups diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go new file mode 100644 index 0000000..633ab04 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go @@ -0,0 +1,402 @@ +// +build linux + +package fs + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "sync" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" +) + +var ( + subsystems = subsystemSet{ + &CpusetGroup{}, + &DevicesGroup{}, + &MemoryGroup{}, + &CpuGroup{}, + &CpuacctGroup{}, + &PidsGroup{}, + &BlkioGroup{}, + &HugetlbGroup{}, + &NetClsGroup{}, + &NetPrioGroup{}, + &PerfEventGroup{}, + &FreezerGroup{}, + &NameGroup{GroupName: "name=systemd", Join: true}, + } + CgroupProcesses = "cgroup.procs" + HugePageSizes, _ = cgroups.GetHugePageSize() +) + +var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") + +type subsystemSet []subsystem + +func (s subsystemSet) Get(name string) (subsystem, error) { + for _, ss := range s { + if ss.Name() == name { + return ss, nil + } + } + return nil, errSubsystemDoesNotExist +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'cgroupData'. + Remove(*cgroupData) error + // Creates and joins the cgroup represented by 'cgroupData'. + Apply(*cgroupData) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +type Manager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +// The absolute path to the root of the cgroup hierarchies. +var cgroupRootLock sync.Mutex +var cgroupRoot string + +// Gets the cgroupRoot. +func getCgroupRoot() (string, error) { + cgroupRootLock.Lock() + defer cgroupRootLock.Unlock() + + if cgroupRoot != "" { + return cgroupRoot, nil + } + + root, err := cgroups.FindCgroupMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + cgroupRoot = root + return cgroupRoot, nil +} + +type cgroupData struct { + root string + innerPath string + config *configs.Cgroup + pid int +} + +func (m *Manager) Apply(pid int) (err error) { + if m.Cgroups == nil { + return nil + } + + var c = m.Cgroups + + d, err := getCgroupData(m.Cgroups, pid) + if err != nil { + return err + } + + if c.Paths != nil { + paths := make(map[string]string) + for name, path := range c.Paths { + _, err := d.path(name) + if err != nil { + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[name] = path + } + m.Paths = paths + return cgroups.EnterPid(m.Paths, pid) + } + + m.mu.Lock() + defer m.mu.Unlock() + paths := make(map[string]string) + for _, sys := range subsystems { + if err := sys.Apply(d); err != nil { + return err + } + // TODO: Apply should, ideally, be reentrant or be broken up into a separate + // create and join phase so that the cgroup hierarchy for a container can be + // created then join consists of writing the process pids to cgroup.procs + p, err := d.path(sys.Name()) + if err != nil { + // The non-presence of the devices subsystem is + // considered fatal for security reasons. + if cgroups.IsNotFound(err) && sys.Name() != "devices" { + continue + } + return err + } + paths[sys.Name()] = p + } + m.Paths = paths + return nil +} + +func (m *Manager) Destroy() error { + if m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, err := subsystems.Get(name) + if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + return stats, nil +} + +func (m *Manager) Set(container *configs.Config) error { + for _, sys := range subsystems { + // Generate fake cgroup data. + d, err := getCgroupData(container.Cgroups, -1) + if err != nil { + return err + } + // Get the path, but don't error out if the cgroup wasn't found. + path, err := d.path(sys.Name()) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + if m.Paths["cpu"] != "" { + if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { + return err + } + } + return nil +} + +// Freeze toggles the container's freezer cgroup depending on the state +// provided +func (m *Manager) Freeze(state configs.FreezerState) error { + d, err := getCgroupData(m.Cgroups, 0) + if err != nil { + return err + } + dir, err := d.path("freezer") + if err != nil { + return err + } + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state + freezer, err := subsystems.Get("freezer") + if err != nil { + return err + } + err = freezer.Set(dir, m.Cgroups) + if err != nil { + m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + dir, err := getCgroupPath(m.Cgroups) + if err != nil { + return nil, err + } + return cgroups.GetPids(dir) +} + +func (m *Manager) GetAllPids() ([]int, error) { + dir, err := getCgroupPath(m.Cgroups) + if err != nil { + return nil, err + } + return cgroups.GetAllPids(dir) +} + +func getCgroupPath(c *configs.Cgroup) (string, error) { + d, err := getCgroupData(c, 0) + if err != nil { + return "", err + } + + return d.path("devices") +} + +func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { + root, err := getCgroupRoot() + if err != nil { + return nil, err + } + + if (c.Name != "" || c.Parent != "") && c.Path != "" { + return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used") + } + + // XXX: Do not remove this code. Path safety is important! -- cyphar + cgPath := libcontainerUtils.CleanPath(c.Path) + cgParent := libcontainerUtils.CleanPath(c.Parent) + cgName := libcontainerUtils.CleanPath(c.Name) + + innerPath := cgPath + if innerPath == "" { + innerPath = filepath.Join(cgParent, cgName) + } + + return &cgroupData{ + root: root, + innerPath: innerPath, + config: c, + pid: pid, + }, nil +} + +func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) { + // Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating + // process could in container and shared pid namespace with host, and + // /proc/1/cgroup could point to whole other world of cgroups. + initPath, err := cgroups.GetThisCgroupDir(subsystem) + if err != nil { + return "", err + } + // This is needed for nested containers, because in /proc/self/cgroup we + // see pathes from host, which don't exist in container. + relDir, err := filepath.Rel(root, initPath) + if err != nil { + return "", err + } + return filepath.Join(mountpoint, relDir), nil +} + +func (raw *cgroupData) path(subsystem string) (string, error) { + mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem) + // If we didn't mount the subsystem, there is no point we make the path. + if err != nil { + return "", err + } + + // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. + if filepath.IsAbs(raw.innerPath) { + // Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'. + return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil + } + + parentPath, err := raw.parentPath(subsystem, mnt, root) + if err != nil { + return "", err + } + + return filepath.Join(parentPath, raw.innerPath), nil +} + +func (raw *cgroupData) join(subsystem string) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil { + return "", err + } + return path, nil +} + +func writeFile(dir, file, data string) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s", file) + } + if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", data, file, err) + } + return nil +} + +func readFile(dir, file string) (string, error) { + data, err := ioutil.ReadFile(filepath.Join(dir, file)) + return string(data), err +} + +func removePath(p string, err error) error { + if err != nil { + return err + } + if p != "" { + return os.RemoveAll(p) + } + return nil +} + +func CheckCpushares(path string, c int64) error { + var cpuShares int64 + + if c == 0 { + return nil + } + + fd, err := os.Open(filepath.Join(path, "cpu.shares")) + if err != nil { + return err + } + defer fd.Close() + + _, err = fmt.Fscanf(fd, "%d", &cpuShares) + if err != nil && err != io.EOF { + return err + } + + if c > cpuShares { + return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares) + } else if c < cpuShares { + return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares) + } + + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go new file mode 100644 index 0000000..a142cb9 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go @@ -0,0 +1,237 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type BlkioGroup struct { +} + +func (s *BlkioGroup) Name() string { + return "blkio" +} + +func (s *BlkioGroup) Apply(d *cgroupData) error { + _, err := d.join("blkio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.BlkioWeight != 0 { + if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil { + return err + } + } + + if cgroup.Resources.BlkioLeafWeight != 0 { + if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil { + return err + } + } + for _, wd := range cgroup.Resources.BlkioWeightDevice { + if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice { + if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice { + if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice { + if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice { + if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { + return err + } + } + + return nil +} + +func (s *BlkioGroup) Remove(d *cgroupData) error { + return removePath(d.path("blkio")) +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ + +func splitBlkioStatLine(r rune) bool { + return r == ' ' || r == ':' +} + +func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) { + var blkioStats []cgroups.BlkioStatEntry + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return blkioStats, nil + } + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) + if len(fields) < 3 { + if len(fields) == 2 && fields[0] == "Total" { + // skip total line + continue + } else { + return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) + } + } + + v, err := strconv.ParseUint(fields[0], 10, 64) + if err != nil { + return nil, err + } + major := v + + v, err = strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return nil, err + } + minor := v + + op := "" + valueField := 2 + if len(fields) == 4 { + op = fields[2] + valueField = 3 + } + v, err = strconv.ParseUint(fields[valueField], 10, 64) + if err != nil { + return nil, err + } + blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) + } + + return blkioStats, nil +} + +func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { + // Try to read CFQ stats available on all CFQ enabled kernels first + if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil { + return getCFQStats(path, stats) + } + return getStats(path, stats) // Use generic stats as fallback +} + +func getCFQStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil { + return err + } + stats.BlkioStats.SectorsRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil { + return err + } + stats.BlkioStats.IoQueuedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoWaitTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil { + return err + } + stats.BlkioStats.IoMergedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoTimeRecursive = blkioStats + + return nil +} + +func getStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go new file mode 100644 index 0000000..a4ef28a --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go @@ -0,0 +1,94 @@ +// +build linux + +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type CpuGroup struct { +} + +func (s *CpuGroup) Name() string { + return "cpu" +} + +func (s *CpuGroup) Apply(d *cgroupData) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + _, err := d.join("cpu") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuShares != 0 { + if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuPeriod != 0 { + if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuQuota != 0 { + if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuRtPeriod != 0 { + if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuRtRuntime != 0 { + if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil { + return err + } + } + + return nil +} + +func (s *CpuGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpu")) +} + +func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { + f, err := os.Open(filepath.Join(path, "cpu.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return err + } + switch t { + case "nr_periods": + stats.CpuStats.ThrottlingData.Periods = v + + case "nr_throttled": + stats.CpuStats.ThrottlingData.ThrottledPeriods = v + + case "throttled_time": + stats.CpuStats.ThrottlingData.ThrottledTime = v + } + } + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go new file mode 100644 index 0000000..53afbad --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go @@ -0,0 +1,121 @@ +// +build linux + +package fs + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +const ( + cgroupCpuacctStat = "cpuacct.stat" + nanosecondsInSecond = 1000000000 +) + +var clockTicks = uint64(system.GetClockTicks()) + +type CpuacctGroup struct { +} + +func (s *CpuacctGroup) Name() string { + return "cpuacct" +} + +func (s *CpuacctGroup) Apply(d *cgroupData) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) { + return err + } + + return nil +} + +func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *CpuacctGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpuacct")) +} + +func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { + userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) + if err != nil { + return err + } + + totalUsage, err := getCgroupParamUint(path, "cpuacct.usage") + if err != nil { + return err + } + + percpuUsage, err := getPercpuUsage(path) + if err != nil { + return err + } + + stats.CpuStats.CpuUsage.TotalUsage = totalUsage + stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage + stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage + stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage + return nil +} + +// Returns user and kernel usage breakdown in nanoseconds. +func getCpuUsageBreakdown(path string) (uint64, uint64, error) { + userModeUsage := uint64(0) + kernelModeUsage := uint64(0) + const ( + userField = "user" + systemField = "system" + ) + + // Expected format: + // user + // system + data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat)) + if err != nil { + return 0, 0, err + } + fields := strings.Fields(string(data)) + if len(fields) != 4 { + return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat)) + } + if fields[0] != userField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField) + } + if fields[2] != systemField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField) + } + if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { + return 0, 0, err + } + if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { + return 0, 0, err + } + + return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil +} + +func getPercpuUsage(path string) ([]uint64, error) { + percpuUsage := []uint64{} + data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu")) + if err != nil { + return percpuUsage, err + } + for _, value := range strings.Fields(string(data)) { + value, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) + } + percpuUsage = append(percpuUsage, value) + } + return percpuUsage, nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go new file mode 100644 index 0000000..cbe62bd --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go @@ -0,0 +1,139 @@ +// +build linux + +package fs + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" +) + +type CpusetGroup struct { +} + +func (s *CpusetGroup) Name() string { + return "cpuset" +} + +func (s *CpusetGroup) Apply(d *cgroupData) error { + dir, err := d.path("cpuset") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(dir, d.config, d.pid) +} + +func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpusetCpus != "" { + if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil { + return err + } + } + if cgroup.Resources.CpusetMems != "" { + if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpuset")) +} + +func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} + +func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpuset cgroup mounted. + // Just do nothing and don't fail. + if dir == "" { + return nil + } + root, err := getCgroupRoot() + if err != nil { + return err + } + if err := s.ensureParent(dir, root); err != nil { + return err + } + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil { + return err + } + + return nil +} + +func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { + return + } + return cpus, mems, nil +} + +// ensureParent makes sure that the parent directory of current is created +// and populated with the proper cpus and mems files copied from +// it's parent. +func (s *CpusetGroup) ensureParent(current, root string) error { + parent := filepath.Dir(current) + if libcontainerUtils.CleanPath(parent) == root { + return nil + } + // Avoid infinite recursion. + if parent == current { + return fmt.Errorf("cpuset: cgroup parent path outside cgroup root") + } + if err := s.ensureParent(parent, root); err != nil { + return err + } + if err := os.MkdirAll(current, 0755); err != nil { + return err + } + return s.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (s *CpusetGroup) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + + if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { + return err + } + if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { + return err + } + + if s.isEmpty(currentCpus) { + if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { + return err + } + } + if s.isEmpty(currentMems) { + if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go new file mode 100644 index 0000000..5f78331 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go @@ -0,0 +1,78 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +type DevicesGroup struct { +} + +func (s *DevicesGroup) Name() string { + return "devices" +} + +func (s *DevicesGroup) Apply(d *cgroupData) error { + _, err := d.join("devices") + if err != nil { + // We will return error even it's `not found` error, devices + // cgroup is hard requirement for container's security. + return err + } + return nil +} + +func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { + if system.RunningInUserNS() { + return nil + } + + devices := cgroup.Resources.Devices + if len(devices) > 0 { + for _, dev := range devices { + file := "devices.deny" + if dev.Allow { + file = "devices.allow" + } + if err := writeFile(path, file, dev.CgroupString()); err != nil { + return err + } + } + return nil + } + if !cgroup.Resources.AllowAllDevices { + if err := writeFile(path, "devices.deny", "a"); err != nil { + return err + } + + for _, dev := range cgroup.Resources.AllowedDevices { + if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { + return err + } + } + return nil + } + + if err := writeFile(path, "devices.allow", "a"); err != nil { + return err + } + + for _, dev := range cgroup.Resources.DeniedDevices { + if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *DevicesGroup) Remove(d *cgroupData) error { + return removePath(d.path("devices")) +} + +func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go new file mode 100644 index 0000000..e70dfe3 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go @@ -0,0 +1,61 @@ +// +build linux + +package fs + +import ( + "fmt" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type FreezerGroup struct { +} + +func (s *FreezerGroup) Name() string { + return "freezer" +} + +func (s *FreezerGroup) Apply(d *cgroupData) error { + _, err := d.join("freezer") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { + switch cgroup.Resources.Freezer { + case configs.Frozen, configs.Thawed: + if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { + return err + } + + for { + state, err := readFile(path, "freezer.state") + if err != nil { + return err + } + if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { + break + } + time.Sleep(1 * time.Millisecond) + } + case configs.Undefined: + return nil + default: + return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer)) + } + + return nil +} + +func (s *FreezerGroup) Remove(d *cgroupData) error { + return removePath(d.path("freezer")) +} + +func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go new file mode 100644 index 0000000..3ef9e03 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package fs diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go new file mode 100644 index 0000000..2f97277 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go @@ -0,0 +1,71 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type HugetlbGroup struct { +} + +func (s *HugetlbGroup) Name() string { + return "hugetlb" +} + +func (s *HugetlbGroup) Apply(d *cgroupData) error { + _, err := d.join("hugetlb") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, hugetlb := range cgroup.Resources.HugetlbLimit { + if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil { + return err + } + } + + return nil +} + +func (s *HugetlbGroup) Remove(d *cgroupData) error { + return removePath(d.path("hugetlb")) +} + +func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { + hugetlbStats := cgroups.HugetlbStats{} + for _, pageSize := range HugePageSizes { + usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".") + value, err := getCgroupParamUint(path, usage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", usage, err) + } + hugetlbStats.Usage = value + + maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".") + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + hugetlbStats.MaxUsage = value + + failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".") + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pageSize] = hugetlbStats + } + + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go new file mode 100644 index 0000000..b837128 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go @@ -0,0 +1,291 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type MemoryGroup struct { +} + +func (s *MemoryGroup) Name() string { + return "memory" +} + +func (s *MemoryGroup) Apply(d *cgroupData) (err error) { + path, err := d.path("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + // reset error. + err = nil + if path == "" { + // Invalid input. + return fmt.Errorf("invalid path for memory cgroups: %+v", d) + } + defer func() { + if err != nil { + os.RemoveAll(path) + } + }() + if !cgroups.PathExists(path) { + if err = os.MkdirAll(path, 0755); err != nil { + return err + } + } + if memoryAssigned(d.config) { + // We have to set kernel memory here, as we can't change it once + // processes have been attached to the cgroup. + if err = s.SetKernelMemory(path, d.config); err != nil { + return err + } + } + // We need to join memory cgroup after set memory limits, because + // kmem.limit_in_bytes can only be set when the cgroup is empty. + if _, jerr := d.join("memory"); jerr != nil && !cgroups.IsNotFound(jerr) { + err = jerr + return err + } + return nil +} + +func getModifyTime(path string) (time.Time, error) { + stat, err := os.Stat(path) + if err != nil { + return time.Time{}, fmt.Errorf("failed to get memory cgroups creation time: %v", err) + } + return stat.ModTime(), nil +} + +func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error { + // This has to be done separately because it has special + // constraints (it can only be initialized before setting up a + // hierarchy or adding a task to the cgroups. However, if + // sucessfully initialized, it can be updated anytime afterwards) + if cgroup.Resources.KernelMemory != 0 { + // Is kmem.limit_in_bytes already set? + // memory.kmem.max_usage_in_bytes is a read-only file. Use it to get cgroups creation time. + kmemCreationTime, err := getModifyTime(filepath.Join(path, "memory.kmem.max_usage_in_bytes")) + if err != nil { + return err + } + kmemLimitsUpdateTime, err := getModifyTime(filepath.Join(path, "memory.kmem.limit_in_bytes")) + if err != nil { + return err + } + // kmem.limit_in_bytes has already been set if its update time is after that of creation time. + // We use `!=` op instead of `>` because updates are losing precision compared to creation. + kmemInitialized := !kmemLimitsUpdateTime.Equal(kmemCreationTime) + if !kmemInitialized { + // If there's already tasks in the cgroup, we can't change the limit either + tasks, err := getCgroupParamString(path, "tasks") + if err != nil { + return err + } + if tasks != "" { + return fmt.Errorf("cannot set kmem.limit_in_bytes after task have joined this cgroup") + } + } + if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil { + return err + } + } + return nil +} + +func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error { + // When memory and swap memory are both set, we need to handle the cases + // for updating container. + if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 { + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + + // When update memory limit, we should adapt the write sequence + // for memory and swap memory, so it won't fail because the new + // value and the old value don't fit kernel's validation. + if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) { + if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } else { + if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + } else { + if cgroup.Resources.Memory != 0 { + if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } + if cgroup.Resources.MemorySwap > 0 { + if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + } + + return nil +} + +func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { + if err := setMemoryAndSwap(path, cgroup); err != nil { + return err + } + + if err := s.SetKernelMemory(path, cgroup); err != nil { + return err + } + + if cgroup.Resources.MemoryReservation != 0 { + if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil { + return err + } + } + if cgroup.Resources.KernelMemoryTCP != 0 { + if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil { + return err + } + } + if cgroup.Resources.OomKillDisable { + if err := writeFile(path, "memory.oom_control", "1"); err != nil { + return err + } + } + if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 { + return nil + } else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 { + if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil { + return err + } + } else { + return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness)) + } + + return nil +} + +func (s *MemoryGroup) Remove(d *cgroupData) error { + return removePath(d.path("memory")) +} + +func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { + // Set stats from memory.stat. + statsFile, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) + } + stats.MemoryStats.Stats[t] = v + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] + + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryData(path, "memsw") + if err != nil { + return err + } + stats.MemoryStats.SwapUsage = swapUsage + kernelUsage, err := getMemoryData(path, "kmem") + if err != nil { + return err + } + stats.MemoryStats.KernelUsage = kernelUsage + kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") + if err != nil { + return err + } + stats.MemoryStats.KernelTCPUsage = kernelTCPUsage + + return nil +} + +func memoryAssigned(cgroup *configs.Cgroup) bool { + return cgroup.Resources.Memory != 0 || + cgroup.Resources.MemoryReservation != 0 || + cgroup.Resources.MemorySwap > 0 || + cgroup.Resources.KernelMemory > 0 || + cgroup.Resources.KernelMemoryTCP > 0 || + cgroup.Resources.OomKillDisable || + (cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1) +} + +func getMemoryData(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".") + maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".") + failcnt := strings.Join([]string{moduleName, "failcnt"}, ".") + limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".") + + value, err := getCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) + } + memoryData.Usage = value + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + memoryData.MaxUsage = value + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + memoryData.Failcnt = value + value, err = getCgroupParamUint(path, limit) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err) + } + memoryData.Limit = value + + return memoryData, nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go new file mode 100644 index 0000000..d8cf1d8 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go @@ -0,0 +1,40 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NameGroup struct { + GroupName string + Join bool +} + +func (s *NameGroup) Name() string { + return s.GroupName +} + +func (s *NameGroup) Apply(d *cgroupData) error { + if s.Join { + // ignore errors if the named cgroup does not exist + d.join(s.GroupName) + } + return nil +} + +func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *NameGroup) Remove(d *cgroupData) error { + if s.Join { + removePath(d.path(s.GroupName)) + } + return nil +} + +func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go new file mode 100644 index 0000000..8a4054b --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go @@ -0,0 +1,41 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetClsGroup struct { +} + +func (s *NetClsGroup) Name() string { + return "net_cls" +} + +func (s *NetClsGroup) Apply(d *cgroupData) error { + _, err := d.join("net_cls") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.NetClsClassid != "" { + if err := writeFile(path, "net_cls.classid", cgroup.Resources.NetClsClassid); err != nil { + return err + } + } + + return nil +} + +func (s *NetClsGroup) Remove(d *cgroupData) error { + return removePath(d.path("net_cls")) +} + +func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go new file mode 100644 index 0000000..d0ab2af --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go @@ -0,0 +1,41 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetPrioGroup struct { +} + +func (s *NetPrioGroup) Name() string { + return "net_prio" +} + +func (s *NetPrioGroup) Apply(d *cgroupData) error { + _, err := d.join("net_prio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, prioMap := range cgroup.Resources.NetPrioIfpriomap { + if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *NetPrioGroup) Remove(d *cgroupData) error { + return removePath(d.path("net_prio")) +} + +func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go new file mode 100644 index 0000000..5693676 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go @@ -0,0 +1,35 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PerfEventGroup struct { +} + +func (s *PerfEventGroup) Name() string { + return "perf_event" +} + +func (s *PerfEventGroup) Apply(d *cgroupData) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *PerfEventGroup) Remove(d *cgroupData) error { + return removePath(d.path("perf_event")) +} + +func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go new file mode 100644 index 0000000..f1e3720 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go @@ -0,0 +1,73 @@ +// +build linux + +package fs + +import ( + "fmt" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PidsGroup struct { +} + +func (s *PidsGroup) Name() string { + return "pids" +} + +func (s *PidsGroup) Apply(d *cgroupData) error { + _, err := d.join("pids") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.PidsLimit != 0 { + // "max" is the fallback value. + limit := "max" + + if cgroup.Resources.PidsLimit > 0 { + limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10) + } + + if err := writeFile(path, "pids.max", limit); err != nil { + return err + } + } + + return nil +} + +func (s *PidsGroup) Remove(d *cgroupData) error { + return removePath(d.path("pids")) +} + +func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error { + current, err := getCgroupParamUint(path, "pids.current") + if err != nil { + return fmt.Errorf("failed to parse pids.current - %s", err) + } + + maxString, err := getCgroupParamString(path, "pids.max") + if err != nil { + return fmt.Errorf("failed to parse pids.max - %s", err) + } + + // Default if pids.max == "max" is 0 -- which represents "no limit". + var max uint64 + if maxString != "max" { + max, err = parseUint(maxString, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max")) + } + } + + stats.PidsStats.Current = current + stats.PidsStats.Limit = max + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go new file mode 100644 index 0000000..5ff0a16 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go @@ -0,0 +1,78 @@ +// +build linux + +package fs + +import ( + "errors" + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +var ( + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// Saturates negative values at zero and returns a uint64. +// Due to kernel bugs, some of the memory cgroup stats can be negative. +func parseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Parses a cgroup param and returns as name, value +// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 +func getCgroupParamKeyValue(t string) (string, uint64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := parseUint(parts[1], 10, 64) + if err != nil { + return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err) + } + + return parts[0], value, nil + default: + return "", 0, ErrNotValidFormat + } +} + +// Gets a single uint64 value from the specified cgroup file. +func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { + fileName := filepath.Join(cgroupPath, cgroupFile) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + + res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified cgroup file +func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go new file mode 100644 index 0000000..b483f1b --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go @@ -0,0 +1,106 @@ +// +build linux + +package cgroups + +type ThrottlingData struct { + // Number of periods with throttling active + Periods uint64 `json:"periods,omitempty"` + // Number of periods when the container hit its throttling limit. + ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` + // Aggregate time the container was throttled for in nanoseconds. + ThrottledTime uint64 `json:"throttled_time,omitempty"` +} + +// CpuUsage denotes the usage of a CPU. +// All CPU stats are aggregate since container inception. +type CpuUsage struct { + // Total CPU time consumed. + // Units: nanoseconds. + TotalUsage uint64 `json:"total_usage,omitempty"` + // Total CPU time consumed per core. + // Units: nanoseconds. + PercpuUsage []uint64 `json:"percpu_usage,omitempty"` + // Time spent by tasks of the cgroup in kernel mode. + // Units: nanoseconds. + UsageInKernelmode uint64 `json:"usage_in_kernelmode"` + // Time spent by tasks of the cgroup in user mode. + // Units: nanoseconds. + UsageInUsermode uint64 `json:"usage_in_usermode"` +} + +type CpuStats struct { + CpuUsage CpuUsage `json:"cpu_usage,omitempty"` + ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` +} + +type MemoryData struct { + Usage uint64 `json:"usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitempty"` + Failcnt uint64 `json:"failcnt"` + Limit uint64 `json:"limit"` +} + +type MemoryStats struct { + // memory used for cache + Cache uint64 `json:"cache,omitempty"` + // usage of memory + Usage MemoryData `json:"usage,omitempty"` + // usage of memory + swap + SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usage of kernel memory + KernelUsage MemoryData `json:"kernel_usage,omitempty"` + // usage of kernel TCP memory + KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` + + Stats map[string]uint64 `json:"stats,omitempty"` +} + +type PidsStats struct { + // number of pids in the cgroup + Current uint64 `json:"current,omitempty"` + // active pids hard limit + Limit uint64 `json:"limit,omitempty"` +} + +type BlkioStatEntry struct { + Major uint64 `json:"major,omitempty"` + Minor uint64 `json:"minor,omitempty"` + Op string `json:"op,omitempty"` + Value uint64 `json:"value,omitempty"` +} + +type BlkioStats struct { + // number of bytes tranferred to and from the block device + IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` + IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` + IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` + IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` + IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` + IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` + IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` + SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` +} + +type HugetlbStats struct { + // current res_counter usage for hugetlb + Usage uint64 `json:"usage,omitempty"` + // maximum usage ever recorded. + MaxUsage uint64 `json:"max_usage,omitempty"` + // number of times hugetlb usage allocation failure. + Failcnt uint64 `json:"failcnt"` +} + +type Stats struct { + CpuStats CpuStats `json:"cpu_stats,omitempty"` + MemoryStats MemoryStats `json:"memory_stats,omitempty"` + PidsStats PidsStats `json:"pids_stats,omitempty"` + BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + // the map is in the format "size of hugepage: stats of the hugepage" + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` +} + +func NewStats() *Stats { + memoryStats := MemoryStats{Stats: make(map[string]uint64)} + hugetlbStats := make(map[string]HugetlbStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go new file mode 100644 index 0000000..1a7c4e1 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -0,0 +1,413 @@ +// +build linux + +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/docker/go-units" +) + +const cgroupNamePrefix = "name=" + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt +func FindCgroupMountpoint(subsystem string) (string, error) { + // We are not using mount.GetMounts() because it's super-inefficient, + // parsing it directly sped up x10 times because of not using Sscanf. + // It was one of two major performance drawbacks in container start. + if !isSubsystemAvailable(subsystem) { + return "", NewNotFoundError(subsystem) + } + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Split(txt, " ") + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], nil + } + } + } + if err := scanner.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError(subsystem) +} + +func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) { + if !isSubsystemAvailable(subsystem) { + return "", "", NewNotFoundError(subsystem) + } + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Split(txt, " ") + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], fields[3], nil + } + } + } + if err := scanner.Err(); err != nil { + return "", "", err + } + + return "", "", NewNotFoundError(subsystem) +} + +func isSubsystemAvailable(subsystem string) bool { + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return false + } + _, avail := cgroups[subsystem] + return avail +} + +func FindCgroupMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + text := scanner.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "cgroup" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "cgroup" { + // Check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + return filepath.Dir(fields[4]), nil + } + } + if err := scanner.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError("cgroup") +} + +type Mount struct { + Mountpoint string + Root string + Subsystems []string +} + +func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) { + if len(m.Subsystems) == 0 { + return "", fmt.Errorf("no subsystem for mount") + } + + return getControllerPath(m.Subsystems[0], cgroups) +} + +func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) { + res := make([]Mount, 0, len(ss)) + scanner := bufio.NewScanner(mi) + numFound := 0 + for scanner.Scan() && numFound < len(ss) { + txt := scanner.Text() + sepIdx := strings.Index(txt, " - ") + if sepIdx == -1 { + return nil, fmt.Errorf("invalid mountinfo format") + } + if txt[sepIdx+3:sepIdx+9] != "cgroup" { + continue + } + fields := strings.Split(txt, " ") + m := Mount{ + Mountpoint: fields[4], + Root: fields[3], + } + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if !ss[opt] { + continue + } + if strings.HasPrefix(opt, cgroupNamePrefix) { + m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):]) + } else { + m.Subsystems = append(m.Subsystems, opt) + } + numFound++ + } + res = append(res, m) + } + if err := scanner.Err(); err != nil { + return nil, err + } + return res, nil +} + +func GetCgroupMounts() ([]Mount, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer f.Close() + + all, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + allMap := make(map[string]bool) + for s := range all { + allMap[s] = true + } + return getCgroupMountsHelper(allMap, f) +} + +// GetAllSubsystems returns all the cgroup subsystems supported by the kernel +func GetAllSubsystems() ([]string, error) { + f, err := os.Open("/proc/cgroups") + if err != nil { + return nil, err + } + defer f.Close() + + subsystems := []string{} + + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + text := s.Text() + if text[0] != '#' { + parts := strings.Fields(text) + if len(parts) >= 4 && parts[3] != "0" { + subsystems = append(subsystems, parts[0]) + } + } + } + return subsystems, nil +} + +// GetThisCgroupDir returns the relative path to the cgroup docker is running in. +func GetThisCgroupDir(subsystem string) (string, error) { + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetInitCgroupDir(subsystem string) (string, error) { + + cgroups, err := ParseCgroupFile("/proc/1/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func readProcsFile(dir string) ([]int, error) { + f, err := os.Open(filepath.Join(dir, "cgroup.procs")) + if err != nil { + return nil, err + } + defer f.Close() + + var ( + s = bufio.NewScanner(f) + out = []int{} + ) + + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + return out, nil +} + +// ParseCgroupFile parses the given cgroup file, typically from +// /proc//cgroup, into a map of subgroups to cgroup names. +func ParseCgroupFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + return parseCgroupFromReader(f) +} + +// helper function for ParseCgroupFile to make testing easier +func parseCgroupFromReader(r io.Reader) (map[string]string, error) { + s := bufio.NewScanner(r) + cgroups := make(map[string]string) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + text := s.Text() + // from cgroups(7): + // /proc/[pid]/cgroup + // ... + // For each cgroup hierarchy ... there is one entry + // containing three colon-separated fields of the form: + // hierarchy-ID:subsystem-list:cgroup-path + parts := strings.SplitN(text, ":", 3) + if len(parts) < 3 { + return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text) + } + + for _, subs := range strings.Split(parts[1], ",") { + cgroups[subs] = parts[2] + } + } + return cgroups, nil +} + +func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { + + if p, ok := cgroups[subsystem]; ok { + return p, nil + } + + if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok { + return p, nil + } + + return "", NewNotFoundError(subsystem) +} + +func PathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + return false + } + return true +} + +func EnterPid(cgroupPaths map[string]string, pid int) error { + for _, path := range cgroupPaths { + if PathExists(path) { + if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), + []byte(strconv.Itoa(pid)), 0700); err != nil { + return err + } + } + } + return nil +} + +// RemovePaths iterates over the provided paths removing them. +// We trying to remove all paths five times with increasing delay between tries. +// If after all there are not removed cgroups - appropriate error will be +// returned. +func RemovePaths(paths map[string]string) (err error) { + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + for s, p := range paths { + os.RemoveAll(p) + // TODO: here probably should be logging + _, err := os.Stat(p) + // We need this strange way of checking cgroups existence because + // RemoveAll almost always returns error, even on already removed + // cgroups + if os.IsNotExist(err) { + delete(paths, s) + } + } + if len(paths) == 0 { + return nil + } + } + return fmt.Errorf("Failed to remove paths: %v", paths) +} + +func GetHugePageSize() ([]string, error) { + var pageSizes []string + sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"} + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return pageSizes, err + } + for _, st := range files { + nameArray := strings.Split(st.Name(), "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return []string{}, err + } + sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList) + pageSizes = append(pageSizes, sizeString) + } + + return pageSizes, nil +} + +// GetPids returns all pids, that were added to cgroup at path. +func GetPids(path string) ([]int, error) { + return readProcsFile(path) +} + +// GetAllPids returns all pids, that were added to cgroup at path and to all its +// subcgroups. +func GetAllPids(path string) ([]int, error) { + var pids []int + // collect pids from all sub-cgroups + err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error { + dir, file := filepath.Split(p) + if file != "cgroup.procs" { + return nil + } + if iErr != nil { + return iErr + } + cPids, err := readProcsFile(dir) + if err != nil { + return err + } + pids = append(pids, cPids...) + return nil + }) + return pids, err +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go new file mode 100644 index 0000000..e0f3ca1 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go @@ -0,0 +1,61 @@ +package configs + +import "fmt" + +// blockIODevice holds major:minor format supported in blkio cgroup +type blockIODevice struct { + // Major is the device's major number + Major int64 `json:"major"` + // Minor is the device's minor number + Minor int64 `json:"minor"` +} + +// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair +type WeightDevice struct { + blockIODevice + // Weight is the bandwidth rate for the device, range is from 10 to 1000 + Weight uint16 `json:"weight"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + LeafWeight uint16 `json:"leafWeight"` +} + +// NewWeightDevice returns a configured WeightDevice pointer +func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { + wd := &WeightDevice{} + wd.Major = major + wd.Minor = minor + wd.Weight = weight + wd.LeafWeight = leafWeight + return wd +} + +// WeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) WeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) +} + +// LeafWeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) LeafWeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) +} + +// ThrottleDevice struct holds a `major:minor rate_per_second` pair +type ThrottleDevice struct { + blockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// NewThrottleDevice returns a configured ThrottleDevice pointer +func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { + td := &ThrottleDevice{} + td.Major = major + td.Minor = minor + td.Rate = rate + return td +} + +// String formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) String() string { + return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unix.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unix.go new file mode 100644 index 0000000..f2eff91 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unix.go @@ -0,0 +1,124 @@ +// +build linux freebsd + +package configs + +type FreezerState string + +const ( + Undefined FreezerState = "" + Frozen FreezerState = "FROZEN" + Thawed FreezerState = "THAWED" +) + +type Cgroup struct { + // Deprecated, use Path instead + Name string `json:"name,omitempty"` + + // name of parent of cgroup or slice + // Deprecated, use Path instead + Parent string `json:"parent,omitempty"` + + // Path specifies the path to cgroups that are created and/or joined by the container. + // The path is assumed to be relative to the host system cgroup mountpoint. + Path string `json:"path"` + + // ScopePrefix decribes prefix for the scope name + ScopePrefix string `json:"scope_prefix"` + + // Paths represent the absolute cgroups paths to join. + // This takes precedence over Path. + Paths map[string]string + + // Resources contains various cgroups settings to apply + *Resources +} + +type Resources struct { + // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. + // Deprecated + AllowAllDevices bool `json:"allow_all_devices,omitempty"` + // Deprecated + AllowedDevices []*Device `json:"allowed_devices,omitempty"` + // Deprecated + DeniedDevices []*Device `json:"denied_devices,omitempty"` + + Devices []*Device `json:"devices"` + + // Memory limit (in bytes) + Memory int64 `json:"memory"` + + // Memory reservation or soft_limit (in bytes) + MemoryReservation int64 `json:"memory_reservation"` + + // Total memory usage (memory + swap); set `-1` to enable unlimited swap + MemorySwap int64 `json:"memory_swap"` + + // Kernel memory limit (in bytes) + KernelMemory int64 `json:"kernel_memory"` + + // Kernel memory limit for TCP use (in bytes) + KernelMemoryTCP int64 `json:"kernel_memory_tcp"` + + // CPU shares (relative weight vs. other containers) + CpuShares int64 `json:"cpu_shares"` + + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuQuota int64 `json:"cpu_quota"` + + // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpuPeriod int64 `json:"cpu_period"` + + // How many time CPU will use in realtime scheduling (in usecs). + CpuRtRuntime int64 `json:"cpu_quota"` + + // CPU period to be used for realtime scheduling (in usecs). + CpuRtPeriod int64 `json:"cpu_period"` + + // CPU to use + CpusetCpus string `json:"cpuset_cpus"` + + // MEM to use + CpusetMems string `json:"cpuset_mems"` + + // Process limit; set <= `0' to disable limit. + PidsLimit int64 `json:"pids_limit"` + + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight uint16 `json:"blkio_weight"` + + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` + + // Weight per cgroup per device, can override BlkioWeight. + BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` + + // IO read rate limit per cgroup per device, bytes per second. + BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` + + // IO write rate limit per cgroup per divice, bytes per second. + BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` + + // IO read rate limit per cgroup per device, IO per second. + BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` + + // IO write rate limit per cgroup per device, IO per second. + BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` + + // set the freeze value for the process + Freezer FreezerState `json:"freezer"` + + // Hugetlb limit (in bytes) + HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` + + // Whether to disable OOM Killer + OomKillDisable bool `json:"oom_kill_disable"` + + // Tuning swappiness behaviour per cgroup + MemorySwappiness *int64 `json:"memory_swappiness"` + + // Set priority of network traffic for container + NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` + + // Set class identifier for container's network packets + NetClsClassid string `json:"net_cls_classid"` +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go new file mode 100644 index 0000000..95e2830 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go @@ -0,0 +1,6 @@ +// +build !windows,!linux,!freebsd + +package configs + +type Cgroup struct { +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go new file mode 100644 index 0000000..d74847b --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go @@ -0,0 +1,6 @@ +package configs + +// TODO Windows: This can ultimately be entirely factored out on Windows as +// cgroups are a Unix-specific construct. +type Cgroup struct { +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go new file mode 100644 index 0000000..806e0be --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -0,0 +1,332 @@ +package configs + +import ( + "bytes" + "encoding/json" + "fmt" + "os/exec" + "time" + + "github.com/Sirupsen/logrus" +) + +type Rlimit struct { + Type int `json:"type"` + Hard uint64 `json:"hard"` + Soft uint64 `json:"soft"` +} + +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int `json:"container_id"` + HostID int `json:"host_id"` + Size int `json:"size"` +} + +// Seccomp represents syscall restrictions +// By default, only the native architecture of the kernel is allowed to be used +// for syscalls. Additional architectures can be added by specifying them in +// Architectures. +type Seccomp struct { + DefaultAction Action `json:"default_action"` + Architectures []string `json:"architectures"` + Syscalls []*Syscall `json:"syscalls"` +} + +// Action is taken upon rule match in Seccomp +type Action int + +const ( + Kill Action = iota + 1 + Errno + Trap + Allow + Trace +) + +// Operator is a comparison operator to be used when matching syscall arguments in Seccomp +type Operator int + +const ( + EqualTo Operator = iota + 1 + NotEqualTo + GreaterThan + GreaterThanOrEqualTo + LessThan + LessThanOrEqualTo + MaskEqualTo +) + +// Arg is a rule to match a specific syscall argument in Seccomp +type Arg struct { + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"value_two"` + Op Operator `json:"op"` +} + +// Syscall is a rule to match a syscall in Seccomp +type Syscall struct { + Name string `json:"name"` + Action Action `json:"action"` + Args []*Arg `json:"args"` +} + +// TODO Windows. Many of these fields should be factored out into those parts +// which are common across platforms, and those which are platform specific. + +// Config defines configuration options for executing a process inside a contained environment. +type Config struct { + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root"` + + // ParentDeathSignal specifies the signal that is sent to the container's process in the case + // that the parent process dies. + ParentDeathSignal int `json:"parent_death_signal"` + + // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. + // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. + // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. + PivotDir string `json:"pivot_dir"` + + // Path to a directory containing the container's root filesystem. + Rootfs string `json:"rootfs"` + + // Readonlyfs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable. + Readonlyfs bool `json:"readonlyfs"` + + // Specifies the mount propagation flags to be applied to /. + RootPropagation int `json:"rootPropagation"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts []*Mount `json:"mounts"` + + // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! + Devices []*Device `json:"devices"` + + MountLabel string `json:"mount_label"` + + // Hostname optionally sets the container's hostname if provided + Hostname string `json:"hostname"` + + // Namespaces specifies the container's namespaces that it should setup when cloning the init process + // If a namespace is not provided that namespace is shared from the container's parent process + Namespaces Namespaces `json:"namespaces"` + + // Capabilities specify the capabilities to keep when executing the process inside the container + // All capbilities not specified will be dropped from the processes capability mask + Capabilities []string `json:"capabilities"` + + // Networks specifies the container's network setup to be created + Networks []*Network `json:"networks"` + + // Routes can be specified to create entries in the route table as the container is started + Routes []*Route `json:"routes"` + + // Cgroups specifies specific cgroup settings for the various subsystems that the container is + // placed into to limit the resources the container has available + Cgroups *Cgroup `json:"cgroups"` + + // AppArmorProfile specifies the profile to apply to the process running in the container and is + // change at the time the process is execed + AppArmorProfile string `json:"apparmor_profile,omitempty"` + + // ProcessLabel specifies the label to apply to the process running in the container. It is + // commonly used by selinux + ProcessLabel string `json:"process_label,omitempty"` + + // Rlimits specifies the resource limits, such as max open files, to set in the container + // If Rlimits are not set, the container will inherit rlimits from the parent process + Rlimits []Rlimit `json:"rlimits,omitempty"` + + // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores + // for a process. Valid values are between the range [-1000, '1000'], where processes with + // higher scores are preferred for being killed. + // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ + OomScoreAdj int `json:"oom_score_adj"` + + // AdditionalGroups specifies the gids that should be added to supplementary groups + // in addition to those that the user belongs to. + AdditionalGroups []string `json:"additional_groups"` + + // UidMappings is an array of User ID mappings for User Namespaces + UidMappings []IDMap `json:"uid_mappings"` + + // GidMappings is an array of Group ID mappings for User Namespaces + GidMappings []IDMap `json:"gid_mappings"` + + // MaskPaths specifies paths within the container's rootfs to mask over with a bind + // mount pointing to /dev/null as to prevent reads of the file. + MaskPaths []string `json:"mask_paths"` + + // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only + // so that these files prevent any writes. + ReadonlyPaths []string `json:"readonly_paths"` + + // Sysctl is a map of properties and their values. It is the equivalent of using + // sysctl -w my.property.name value in Linux. + Sysctl map[string]string `json:"sysctl"` + + // Seccomp allows actions to be taken whenever a syscall is made within the container. + // A number of rules are given, each having an action to be taken if a syscall matches it. + // A default action to be taken if no rules match is also given. + Seccomp *Seccomp `json:"seccomp"` + + // NoNewPrivileges controls whether processes in the container can gain additional privileges. + NoNewPrivileges bool `json:"no_new_privileges,omitempty"` + + // Hooks are a collection of actions to perform at various container lifecycle events. + // CommandHooks are serialized to JSON, but other hooks are not. + Hooks *Hooks + + // Version is the version of opencontainer specification that is supported. + Version string `json:"version"` + + // Labels are user defined metadata that is stored in the config and populated on the state + Labels []string `json:"labels"` + + // NoNewKeyring will not allocated a new session keyring for the container. It will use the + // callers keyring in this case. + NoNewKeyring bool `json:"no_new_keyring"` +} + +type Hooks struct { + // Prestart commands are executed after the container namespaces are created, + // but before the user supplied command is executed from init. + Prestart []Hook + + // Poststart commands are executed after the container init process starts. + Poststart []Hook + + // Poststop commands are executed after the container init process exits. + Poststop []Hook +} + +func (hooks *Hooks) UnmarshalJSON(b []byte) error { + var state struct { + Prestart []CommandHook + Poststart []CommandHook + Poststop []CommandHook + } + + if err := json.Unmarshal(b, &state); err != nil { + return err + } + + deserialize := func(shooks []CommandHook) (hooks []Hook) { + for _, shook := range shooks { + hooks = append(hooks, shook) + } + + return hooks + } + + hooks.Prestart = deserialize(state.Prestart) + hooks.Poststart = deserialize(state.Poststart) + hooks.Poststop = deserialize(state.Poststop) + return nil +} + +func (hooks Hooks) MarshalJSON() ([]byte, error) { + serialize := func(hooks []Hook) (serializableHooks []CommandHook) { + for _, hook := range hooks { + switch chook := hook.(type) { + case CommandHook: + serializableHooks = append(serializableHooks, chook) + default: + logrus.Warnf("cannot serialize hook of type %T, skipping", hook) + } + } + + return serializableHooks + } + + return json.Marshal(map[string]interface{}{ + "prestart": serialize(hooks.Prestart), + "poststart": serialize(hooks.Poststart), + "poststop": serialize(hooks.Poststop), + }) +} + +// HookState is the payload provided to a hook on execution. +type HookState struct { + Version string `json:"ociVersion"` + ID string `json:"id"` + Pid int `json:"pid"` + Root string `json:"root"` + BundlePath string `json:"bundlePath"` +} + +type Hook interface { + // Run executes the hook with the provided state. + Run(HookState) error +} + +// NewFunctionHook will call the provided function when the hook is run. +func NewFunctionHook(f func(HookState) error) FuncHook { + return FuncHook{ + run: f, + } +} + +type FuncHook struct { + run func(HookState) error +} + +func (f FuncHook) Run(s HookState) error { + return f.run(s) +} + +type Command struct { + Path string `json:"path"` + Args []string `json:"args"` + Env []string `json:"env"` + Dir string `json:"dir"` + Timeout *time.Duration `json:"timeout"` +} + +// NewCommandHook will execute the provided command when the hook is run. +func NewCommandHook(cmd Command) CommandHook { + return CommandHook{ + Command: cmd, + } +} + +type CommandHook struct { + Command +} + +func (c Command) Run(s HookState) error { + b, err := json.Marshal(s) + if err != nil { + return err + } + cmd := exec.Cmd{ + Path: c.Path, + Args: c.Args, + Env: c.Env, + Stdin: bytes.NewReader(b), + } + errC := make(chan error, 1) + go func() { + out, err := cmd.CombinedOutput() + if err != nil { + err = fmt.Errorf("%s: %s", err, out) + } + errC <- err + }() + if c.Timeout != nil { + select { + case err := <-errC: + return err + case <-time.After(*c.Timeout): + cmd.Process.Kill() + cmd.Wait() + return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) + } + } + return <-errC +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go new file mode 100644 index 0000000..a60554a --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go @@ -0,0 +1,51 @@ +// +build freebsd linux + +package configs + +import "fmt" + +// HostUID gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostUID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.UidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") + } + id, found := c.hostIDFromMapping(0, c.UidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil +} + +// HostGID gets the root gid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostGID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.GidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + } + id, found := c.hostIDFromMapping(0, c.GidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.") + } + return id, nil + } + // Return default root gid 0 + return 0, nil +} + +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go new file mode 100644 index 0000000..8701bb2 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go @@ -0,0 +1,57 @@ +package configs + +import ( + "fmt" + "os" +) + +const ( + Wildcard = -1 +) + +// TODO Windows: This can be factored out in the future + +type Device struct { + // Device type, block, char, etc. + Type rune `json:"type"` + + // Path to the device. + Path string `json:"path"` + + // Major is the device's major number. + Major int64 `json:"major"` + + // Minor is the device's minor number. + Minor int64 `json:"minor"` + + // Cgroup permissions format, rwm. + Permissions string `json:"permissions"` + + // FileMode permission bits for the device. + FileMode os.FileMode `json:"file_mode"` + + // Uid of the device. + Uid uint32 `json:"uid"` + + // Gid of the device. + Gid uint32 `json:"gid"` + + // Write the file to the allowed list + Allow bool `json:"allow"` +} + +func (d *Device) CgroupString() string { + return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) +} + +func (d *Device) Mkdev() int { + return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) +} + +// deviceNumberString converts the device number to a string return result. +func deviceNumberString(number int64) string { + if number == Wildcard { + return "*" + } + return fmt.Sprint(number) +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go new file mode 100644 index 0000000..ba1f437 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go @@ -0,0 +1,125 @@ +// +build linux freebsd + +package configs + +var ( + // DefaultSimpleDevices are devices that are to be both allowed and created. + DefaultSimpleDevices = []*Device{ + // /dev/null and zero + { + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, + }, + + { + Path: "/dev/full", + Type: 'c', + Major: 1, + Minor: 7, + Permissions: "rwm", + FileMode: 0666, + }, + + // consoles and ttys + { + Path: "/dev/tty", + Type: 'c', + Major: 5, + Minor: 0, + Permissions: "rwm", + FileMode: 0666, + }, + + // /dev/urandom,/dev/random + { + Path: "/dev/urandom", + Type: 'c', + Major: 1, + Minor: 9, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/random", + Type: 'c', + Major: 1, + Minor: 8, + Permissions: "rwm", + FileMode: 0666, + }, + } + DefaultAllowedDevices = append([]*Device{ + // allow mknod for any device + { + Type: 'c', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + { + Type: 'b', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + + { + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Path: "", + Type: 'c', + Major: 136, + Minor: Wildcard, + Permissions: "rwm", + }, + { + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", + }, + + // tuntap + { + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", + }, + }, DefaultSimpleDevices...) + DefaultAutoCreatedDevices = append([]*Device{ + { + // /dev/fuse is created but not allowed. + // This is to allow java to work. Because java + // Insists on there being a /dev/fuse + // https://github.com/docker/docker/issues/514 + // https://github.com/docker/docker/issues/2393 + // + Path: "/dev/fuse", + Type: 'c', + Major: 10, + Minor: 229, + Permissions: "rwm", + }, + }, DefaultSimpleDevices...) +) diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go new file mode 100644 index 0000000..d302163 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go @@ -0,0 +1,9 @@ +package configs + +type HugepageLimit struct { + // which type of hugepage to limit. + Pagesize string `json:"page_size"` + + // usage limit for hugepage. + Limit uint64 `json:"limit"` +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go new file mode 100644 index 0000000..9a0395e --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go @@ -0,0 +1,14 @@ +package configs + +import ( + "fmt" +) + +type IfPrioMap struct { + Interface string `json:"interface"` + Priority int64 `json:"priority"` +} + +func (i *IfPrioMap) CgroupString() string { + return fmt.Sprintf("%s %d", i.Interface, i.Priority) +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go new file mode 100644 index 0000000..cc770c9 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -0,0 +1,30 @@ +package configs + +type Mount struct { + // Source path for the mount. + Source string `json:"source"` + + // Destination path for the mount inside the container. + Destination string `json:"destination"` + + // Device the mount is for. + Device string `json:"device"` + + // Mount flags. + Flags int `json:"flags"` + + // Propagation Flags + PropagationFlags []int `json:"propagation_flags"` + + // Mount data applied to the mount. + Data string `json:"data"` + + // Relabel source if set, "z" indicates shared, "Z" indicates unshared. + Relabel string `json:"relabel"` + + // Optional Command to be run before Source is mounted. + PremountCmds []Command `json:"premount_cmds"` + + // Optional Command to be run after Source is mounted. + PostmountCmds []Command `json:"postmount_cmds"` +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go new file mode 100644 index 0000000..a3329a3 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go @@ -0,0 +1,5 @@ +package configs + +type NamespaceType string + +type Namespaces []Namespace diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go new file mode 100644 index 0000000..fb4b852 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -0,0 +1,31 @@ +// +build linux + +package configs + +import "syscall" + +func (n *Namespace) Syscall() int { + return namespaceInfo[n.Type] +} + +var namespaceInfo = map[NamespaceType]int{ + NEWNET: syscall.CLONE_NEWNET, + NEWNS: syscall.CLONE_NEWNS, + NEWUSER: syscall.CLONE_NEWUSER, + NEWIPC: syscall.CLONE_NEWIPC, + NEWUTS: syscall.CLONE_NEWUTS, + NEWPID: syscall.CLONE_NEWPID, +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This function returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + var flag int + for _, v := range *n { + if v.Path != "" { + continue + } + flag |= namespaceInfo[v.Type] + } + return uintptr(flag) +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go new file mode 100644 index 0000000..0547223 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go @@ -0,0 +1,15 @@ +// +build !linux,!windows + +package configs + +func (n *Namespace) Syscall() int { + panic("No namespace syscall support") + return 0 +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This function returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + panic("No namespace syscall support") + return uintptr(0) +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go new file mode 100644 index 0000000..b9c820d --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go @@ -0,0 +1,127 @@ +// +build linux freebsd + +package configs + +import ( + "fmt" + "os" + "sync" +) + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" +) + +var ( + nsLock sync.Mutex + supportedNamespaces = make(map[NamespaceType]bool) +) + +// nsToFile converts the namespace type to its filename +func nsToFile(ns NamespaceType) string { + switch ns { + case NEWNET: + return "net" + case NEWNS: + return "mnt" + case NEWPID: + return "pid" + case NEWIPC: + return "ipc" + case NEWUSER: + return "user" + case NEWUTS: + return "uts" + } + return "" +} + +// IsNamespaceSupported returns whether a namespace is available or +// not +func IsNamespaceSupported(ns NamespaceType) bool { + nsLock.Lock() + defer nsLock.Unlock() + supported, ok := supportedNamespaces[ns] + if ok { + return supported + } + nsFile := nsToFile(ns) + // if the namespace type is unknown, just return false + if nsFile == "" { + return false + } + _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile)) + // a namespace is supported if it exists and we have permissions to read it + supported = err == nil + supportedNamespaces[ns] = supported + return supported +} + +func NamespaceTypes() []NamespaceType { + return []NamespaceType{ + NEWNET, + NEWPID, + NEWNS, + NEWUTS, + NEWIPC, + NEWUSER, + } +} + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Type NamespaceType `json:"type"` + Path string `json:"path"` +} + +func (n *Namespace) GetPath(pid int) string { + if n.Path != "" { + return n.Path + } + return fmt.Sprintf("/proc/%d/ns/%s", pid, nsToFile(n.Type)) +} + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 +} + +func (n *Namespaces) PathOf(t NamespaceType) string { + i := n.index(t) + if i == -1 { + return "" + } + return (*n)[i].Path +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go new file mode 100644 index 0000000..9a74033 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go @@ -0,0 +1,8 @@ +// +build !linux,!freebsd + +package configs + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go new file mode 100644 index 0000000..ccdb228 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go @@ -0,0 +1,72 @@ +package configs + +// Network defines configuration for a container's networking stack +// +// The network configuration can be omitted from a container causing the +// container to be setup with the host's networking stack +type Network struct { + // Type sets the networks type, commonly veth and loopback + Type string `json:"type"` + + // Name of the network interface + Name string `json:"name"` + + // The bridge to use. + Bridge string `json:"bridge"` + + // MacAddress contains the MAC address to set on the network interface + MacAddress string `json:"mac_address"` + + // Address contains the IPv4 and mask to set on the network interface + Address string `json:"address"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway"` + + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address"` + + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface + IPv6Gateway string `json:"ipv6_gateway"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + Mtu int `json:"mtu"` + + // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + TxQueueLen int `json:"txqueuelen"` + + // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the + // container. + HostInterfaceName string `json:"host_interface_name"` + + // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface + // bridge port in the case of type veth + // Note: This is unsupported on some systems. + // Note: This does not apply to loopback interfaces. + HairpinMode bool `json:"hairpin_mode"` +} + +// Routes can be specified to create entries in the route table as the container is started +// +// All of destination, source, and gateway should be either IPv4 or IPv6. +// One of the three options must be present, and omitted entries will use their +// IP family default for the route table. For IPv4 for example, setting the +// gateway to 1.2.3.4 and the interface to eth0 will set up a standard +// destination of 0.0.0.0(or *) when viewed in the route table. +type Route struct { + // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 + Destination string `json:"destination"` + + // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 + Source string `json:"source"` + + // Sets the gateway. Accepts IPv4 and IPv6 + Gateway string `json:"gateway"` + + // The device to set this route up for, for example: eth0 + InterfaceName string `json:"interface_name"` +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go new file mode 100644 index 0000000..1afc52b --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go @@ -0,0 +1,143 @@ +// +build linux + +package system + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "syscall" + "unsafe" +) + +// If arg2 is nonzero, set the "child subreaper" attribute of the +// calling process; if arg2 is zero, unset the attribute. When a +// process is marked as a child subreaper, all of the children +// that it creates, and their descendants, will be marked as +// having a subreaper. In effect, a subreaper fulfills the role +// of init(1) for its descendant processes. Upon termination of +// a process that is orphaned (i.e., its immediate parent has +// already terminated) and marked as having a subreaper, the +// nearest still living ancestor subreaper will receive a SIGCHLD +// signal and be able to wait(2) on the process to discover its +// termination status. +const PR_SET_CHILD_SUBREAPER = 36 + +type ParentDeathSignal int + +func (p ParentDeathSignal) Restore() error { + if p == 0 { + return nil + } + current, err := GetParentDeathSignal() + if err != nil { + return err + } + if p == current { + return nil + } + return p.Set() +} + +func (p ParentDeathSignal) Set() error { + return SetParentDeathSignal(uintptr(p)) +} + +func Execv(cmd string, args []string, env []string) error { + name, err := exec.LookPath(cmd) + if err != nil { + return err + } + + return syscall.Exec(name, args, env) +} + +func Prlimit(pid, resource int, limit syscall.Rlimit) error { + _, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0) + if err != 0 { + return err + } + return nil +} + +func SetParentDeathSignal(sig uintptr) error { + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 { + return err + } + return nil +} + +func GetParentDeathSignal() (ParentDeathSignal, error) { + var sig int + _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0) + if err != 0 { + return -1, err + } + return ParentDeathSignal(sig), nil +} + +func SetKeepCaps() error { + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 { + return err + } + + return nil +} + +func ClearKeepCaps() error { + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 { + return err + } + + return nil +} + +func Setctty() error { + if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 { + return err + } + return nil +} + +// RunningInUserNS detects whether we are currently running in a user namespace. +// Copied from github.com/lxc/lxd/shared/util.go +func RunningInUserNS() bool { + file, err := os.Open("/proc/self/uid_map") + if err != nil { + // This kernel-provided file only exists if user namespaces are supported + return false + } + defer file.Close() + + buf := bufio.NewReader(file) + l, _, err := buf.ReadLine() + if err != nil { + return false + } + + line := string(l) + var a, b, c int64 + fmt.Sscanf(line, "%d %d %d", &a, &b, &c) + /* + * We assume we are in the initial user namespace if we have a full + * range - 4294967295 uids starting at uid 0. + */ + if a == 0 && b == 0 && c == 4294967295 { + return false + } + return true +} + +// SetSubreaper sets the value i as the subreaper setting for the calling process +func SetSubreaper(i int) error { + return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) +} + +func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) { + _, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go new file mode 100644 index 0000000..37808a2 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go @@ -0,0 +1,27 @@ +package system + +import ( + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +// look in /proc to find the process start time so that we can verify +// that this pid has started after ourself +func GetProcessStartTime(pid int) (string, error) { + data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return "", err + } + + parts := strings.Split(string(data), " ") + // the starttime is located at pos 22 + // from the man page + // + // starttime %llu (was %lu before Linux 2.6) + // (22) The time the process started after system boot. In kernels before Linux 2.6, this + // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks + // (divide by sysconf(_SC_CLK_TCK)). + return parts[22-1], nil // starts at 1 +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go new file mode 100644 index 0000000..615ff4c --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go @@ -0,0 +1,40 @@ +package system + +import ( + "fmt" + "runtime" + "syscall" +) + +// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 +// +// We need different setns values for the different platforms and arch +// We are declaring the macro here because the SETNS syscall does not exist in th stdlib +var setNsMap = map[string]uintptr{ + "linux/386": 346, + "linux/arm64": 268, + "linux/amd64": 308, + "linux/arm": 375, + "linux/ppc": 350, + "linux/ppc64": 350, + "linux/ppc64le": 350, + "linux/s390x": 339, +} + +var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] + +func SysSetns() uint32 { + return uint32(sysSetns) +} + +func Setns(fd uintptr, flags uintptr) error { + ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] + if !exists { + return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) + } + _, _, err := syscall.RawSyscall(ns, fd, flags, 0) + if err != 0 { + return err + } + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go new file mode 100644 index 0000000..c990065 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go @@ -0,0 +1,25 @@ +// +build linux,386 + +package system + +import ( + "syscall" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go new file mode 100644 index 0000000..0816bf8 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go @@ -0,0 +1,25 @@ +// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x + +package system + +import ( + "syscall" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go new file mode 100644 index 0000000..3f780f3 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go @@ -0,0 +1,25 @@ +// +build linux,arm + +package system + +import ( + "syscall" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go new file mode 100644 index 0000000..b3a07cb --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go @@ -0,0 +1,12 @@ +// +build cgo,linux cgo,freebsd + +package system + +/* +#include +*/ +import "C" + +func GetClockTicks() int { + return int(C.sysconf(C._SC_CLK_TCK)) +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go new file mode 100644 index 0000000..d93b5d5 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go @@ -0,0 +1,15 @@ +// +build !cgo windows + +package system + +func GetClockTicks() int { + // TODO figure out a better alternative for platforms where we're missing cgo + // + // TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency(). + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx + // + // An example of its usage can be found here. + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx + + return 100 +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go new file mode 100644 index 0000000..e7cfd62 --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go @@ -0,0 +1,9 @@ +// +build !linux + +package system + +// RunningInUserNS is a stub for non-Linux systems +// Always returns false +func RunningInUserNS() bool { + return false +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go new file mode 100644 index 0000000..30f74df --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go @@ -0,0 +1,99 @@ +package system + +import ( + "syscall" + "unsafe" +) + +var _zero uintptr + +// Returns the size of xattrs and nil error +// Requires path, takes allocated []byte or nil as last argument +func Llistxattr(path string, dest []byte) (size int, err error) { + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return -1, err + } + var newpathBytes unsafe.Pointer + if len(dest) > 0 { + newpathBytes = unsafe.Pointer(&dest[0]) + } else { + newpathBytes = unsafe.Pointer(&_zero) + } + + _size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0) + size = int(_size) + if errno != 0 { + return -1, errno + } + + return size, nil +} + +// Returns a []byte slice if the xattr is set and nil otherwise +// Requires path and its attribute as arguments +func Lgetxattr(path string, attr string) ([]byte, error) { + var sz int + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return nil, err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return nil, err + } + + // Start with a 128 length byte array + sz = 128 + dest := make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + _sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + + switch { + case errno == syscall.ENODATA: + return nil, errno + case errno == syscall.ENOTSUP: + return nil, errno + case errno == syscall.ERANGE: + // 128 byte array might just not be good enough, + // A dummy buffer is used ``uintptr(0)`` to get real size + // of the xattrs on disk + _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0) + sz = int(_sz) + if sz < 0 { + return nil, errno + } + dest = make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + if errno != 0 { + return nil, errno + } + case errno != 0: + return nil, errno + } + sz = int(_sz) + return dest[:sz], nil +} + +func Lsetxattr(path string, attr string, data []byte, flags int) error { + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return err + } + var dataBytes unsafe.Pointer + if len(data) > 0 { + dataBytes = unsafe.Pointer(&data[0]) + } else { + dataBytes = unsafe.Pointer(&_zero) + } + _, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0) + if errno != 0 { + return errno + } + return nil +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go new file mode 100644 index 0000000..3466bfc --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go @@ -0,0 +1,121 @@ +package utils + +import ( + "crypto/rand" + "encoding/hex" + "encoding/json" + "io" + "os" + "path/filepath" + "strings" + "syscall" +) + +const ( + exitSignalOffset = 128 +) + +// GenerateRandomName returns a new name joined with a prefix. This size +// specified is used to truncate the randomly generated value +func GenerateRandomName(prefix string, size int) (string, error) { + id := make([]byte, 32) + if _, err := io.ReadFull(rand.Reader, id); err != nil { + return "", err + } + if size > 64 { + size = 64 + } + return prefix + hex.EncodeToString(id)[:size], nil +} + +// ResolveRootfs ensures that the current working directory is +// not a symlink and returns the absolute path to the rootfs +func ResolveRootfs(uncleanRootfs string) (string, error) { + rootfs, err := filepath.Abs(uncleanRootfs) + if err != nil { + return "", err + } + return filepath.EvalSymlinks(rootfs) +} + +// ExitStatus returns the correct exit status for a process based on if it +// was signaled or exited cleanly +func ExitStatus(status syscall.WaitStatus) int { + if status.Signaled() { + return exitSignalOffset + int(status.Signal()) + } + return status.ExitStatus() +} + +// WriteJSON writes the provided struct v to w using standard json marshaling +func WriteJSON(w io.Writer, v interface{}) error { + data, err := json.Marshal(v) + if err != nil { + return err + } + _, err = w.Write(data) + return err +} + +// CleanPath makes a path safe for use with filepath.Join. This is done by not +// only cleaning the path, but also (if the path is relative) adding a leading +// '/' and cleaning it (then removing the leading '/'). This ensures that a +// path resulting from prepending another path will always resolve to lexically +// be a subdirectory of the prefixed path. This is all done lexically, so paths +// that include symlinks won't be safe as a result of using CleanPath. +func CleanPath(path string) string { + // Deal with empty strings nicely. + if path == "" { + return "" + } + + // Ensure that all paths are cleaned (especially problematic ones like + // "/../../../../../" which can cause lots of issues). + path = filepath.Clean(path) + + // If the path isn't absolute, we need to do more processing to fix paths + // such as "../../../..//some/path". We also shouldn't convert absolute + // paths to relative ones. + if !filepath.IsAbs(path) { + path = filepath.Clean(string(os.PathSeparator) + path) + // This can't fail, as (by definition) all paths are relative to root. + path, _ = filepath.Rel(string(os.PathSeparator), path) + } + + // Clean the path again for good measure. + return filepath.Clean(path) +} + +// SearchLabels searches a list of key-value pairs for the provided key and +// returns the corresponding value. The pairs must be separated with '='. +func SearchLabels(labels []string, query string) string { + for _, l := range labels { + parts := strings.SplitN(l, "=", 2) + if len(parts) < 2 { + continue + } + if parts[0] == query { + return parts[1] + } + } + return "" +} + +// Annotations returns the bundle path and user defined annotations from the +// libcontianer state. We need to remove the bundle because that is a label +// added by libcontainer. +func Annotations(labels []string) (bundle string, userAnnotations map[string]string) { + userAnnotations = make(map[string]string) + for _, l := range labels { + parts := strings.SplitN(l, "=", 2) + if len(parts) < 2 { + continue + } + if parts[0] == "bundle" { + bundle = parts[1] + } else { + userAnnotations[parts[0]] = parts[1] + } + } + return +} diff --git a/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go new file mode 100644 index 0000000..408918f --- /dev/null +++ b/src/kube2msb/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go @@ -0,0 +1,33 @@ +// +build !windows + +package utils + +import ( + "io/ioutil" + "strconv" + "syscall" +) + +func CloseExecFrom(minFd int) error { + fdList, err := ioutil.ReadDir("/proc/self/fd") + if err != nil { + return err + } + for _, fi := range fdList { + fd, err := strconv.Atoi(fi.Name()) + if err != nil { + // ignore non-numeric file names + continue + } + + if fd < minFd { + // ignore descriptors lower than our specified minimum + continue + } + + // intentionally ignore errors from syscall.CloseOnExec + syscall.CloseOnExec(fd) + // the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall) + } + return nil +} -- cgit 1.2.3-korg