Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cgroups v2 support to resourcemanager #1744

Merged
merged 11 commits into from
Sep 22, 2023
Merged
101 changes: 60 additions & 41 deletions arbnode/resourcemanager/resource_management.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ var (
limitCheckDurationHistogram = metrics.NewRegisteredHistogram("arb/rpc/limitcheck/duration", nil, metrics.NewBoundedHistogramSample())
limitCheckSuccessCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/success", nil)
limitCheckFailureCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/failure", nil)
errNotSupported = errors.New("not supported")
)

// Init adds the resource manager's httpServer to a custom hook in geth.
Expand All @@ -33,7 +34,14 @@ var (
func Init(conf *Config) {
if conf.MemLimitPercent > 0 {
node.WrapHTTPHandler = func(srv http.Handler) (http.Handler, error) {
return newHttpServer(srv, newLimitChecker(conf)), nil
var c limitChecker
c, err := newCgroupsMemoryLimitCheckerIfSupported(conf)
if errors.Is(err, errNotSupported) {
log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling")
c = &trivialLimitChecker{}
}

return newHttpServer(srv, c), nil
}
}
}
Expand Down Expand Up @@ -90,20 +98,27 @@ type limitChecker interface {
String() string
}

// newLimitChecker attempts to auto-discover the mechanism by which it
// can check system limits. Currently Cgroups V1 is supported,
// with Cgroups V2 likely to be implmemented next. If no supported
// mechanism is discovered, it logs an error and fails open, ie
// it creates a trivialLimitChecker that does no checks.
func newLimitChecker(conf *Config) limitChecker {
c := newCgroupsV1MemoryLimitChecker(DefaultCgroupsV1MemoryDirectory, conf.MemLimitPercent)
func isSupported(c limitChecker) bool {
_, err := c.isLimitExceeded()
return err == nil
}

// newCgroupsMemoryLimitCheckerIfSupported attempts to auto-discover whether
// Cgroups V1 or V2 is supported for checking system memory limits.
func newCgroupsMemoryLimitCheckerIfSupported(conf *Config) (*cgroupsMemoryLimitChecker, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: go tends to encourage shorter names. You can drop "IfSupported" suffix and just add in the comment above the function that this returns errNotSupported if it's not supported.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what to rename it to since newCgroupsMemoryLimitChecker is already the factory method for the CgroupsMemoryLimitChecker and this version handles auto-detecting cgroups v1 or v2.

c := newCgroupsMemoryLimitChecker(cgroupsV1MemoryFiles, conf.MemLimitPercent)
if isSupported(c) {
log.Info("Cgroups v1 detected, enabling memory limit RPC throttling")
return c
return c, nil
}

c = newCgroupsMemoryLimitChecker(cgroupsV2MemoryFiles, conf.MemLimitPercent)
if isSupported(c) {
log.Info("Cgroups v2 detected, enabling memory limit RPC throttling")
return c, nil
}

log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling")
return &trivialLimitChecker{}
return nil, errNotSupported
}

// trivialLimitChecker checks no limits, so its limits are never exceeded.
Expand All @@ -115,28 +130,37 @@ func (_ trivialLimitChecker) isLimitExceeded() (bool, error) {

func (_ trivialLimitChecker) String() string { return "trivial" }

const DefaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/"
type cgroupsMemoryFiles struct {
limitFile, usageFile, statsFile string
inactiveRe *regexp.Regexp
}

type cgroupsV1MemoryLimitChecker struct {
cgroupDir string
memoryLimitPercent int
const defaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/"
const defaultCgroupsV2MemoryDirectory = "/sys/fs/cgroup/"

limitFile, usageFile, statsFile string
var cgroupsV1MemoryFiles = cgroupsMemoryFiles{
limitFile: defaultCgroupsV1MemoryDirectory + "/memory.limit_in_bytes",
usageFile: defaultCgroupsV1MemoryDirectory + "/memory.usage_in_bytes",
statsFile: defaultCgroupsV1MemoryDirectory + "/memory.stat",
inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`),
}
var cgroupsV2MemoryFiles = cgroupsMemoryFiles{
limitFile: defaultCgroupsV2MemoryDirectory + "/memory.max",
usageFile: defaultCgroupsV2MemoryDirectory + "/memory.current",
statsFile: defaultCgroupsV2MemoryDirectory + "/memory.stat",
inactiveRe: regexp.MustCompile(`inactive_file (\d+)`),
}

func newCgroupsV1MemoryLimitChecker(cgroupDir string, memoryLimitPercent int) *cgroupsV1MemoryLimitChecker {
return &cgroupsV1MemoryLimitChecker{
cgroupDir: cgroupDir,
memoryLimitPercent: memoryLimitPercent,
limitFile: cgroupDir + "/memory.limit_in_bytes",
usageFile: cgroupDir + "/memory.usage_in_bytes",
statsFile: cgroupDir + "/memory.stat",
}
type cgroupsMemoryLimitChecker struct {
files cgroupsMemoryFiles
memoryLimitPercent int
}

func isSupported(c limitChecker) bool {
_, err := c.isLimitExceeded()
return err == nil
func newCgroupsMemoryLimitChecker(files cgroupsMemoryFiles, memoryLimitPercent int) *cgroupsMemoryLimitChecker {
return &cgroupsMemoryLimitChecker{
files: files,
memoryLimitPercent: memoryLimitPercent,
}
}

// isLimitExceeded checks if the system memory used exceeds the limit
Expand All @@ -145,24 +169,25 @@ func isSupported(c limitChecker) bool {
// See the following page for details of calculating the memory used,
// which is reported as container_memory_working_set_bytes in prometheus:
// https://mihai-albert.com/2022/02/13/out-of-memory-oom-in-kubernetes-part-3-memory-metrics-sources-and-tools-to-collect-them/
func (c *cgroupsV1MemoryLimitChecker) isLimitExceeded() (bool, error) {
func (c *cgroupsMemoryLimitChecker) isLimitExceeded() (bool, error) {
var limit, usage, inactive int
var err error
limit, err = readIntFromFile(c.limitFile)
if err != nil {
if limit, err = readIntFromFile(c.files.limitFile); err != nil {
return false, err
}
usage, err = readIntFromFile(c.usageFile)
if err != nil {
if usage, err = readIntFromFile(c.files.usageFile); err != nil {
return false, err
}
inactive, err = readInactive(c.statsFile)
if err != nil {
if inactive, err = readInactive(c.files.statsFile, c.files.inactiveRe); err != nil {
return false, err
}
return usage-inactive >= ((limit * c.memoryLimitPercent) / 100), nil
}

func (c cgroupsMemoryLimitChecker) String() string {
return "CgroupsMemoryLimitChecker"
}

func readIntFromFile(fileName string) (int, error) {
file, err := os.Open(fileName)
if err != nil {
Expand All @@ -176,9 +201,7 @@ func readIntFromFile(fileName string) (int, error) {
return limit, nil
}

var re = regexp.MustCompile(`total_inactive_file (\d+)`)

func readInactive(fileName string) (int, error) {
func readInactive(fileName string, re *regexp.Regexp) (int, error) {
file, err := os.Open(fileName)
if err != nil {
return 0, err
Expand All @@ -201,7 +224,3 @@ func readInactive(fileName string) (int, error) {

return 0, errors.New("total_inactive_file not found in " + fileName)
}

func (c cgroupsV1MemoryLimitChecker) String() string {
return "CgroupsV1MemoryLimitChecker"
}
92 changes: 53 additions & 39 deletions arbnode/resourcemanager/resource_management_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,28 @@ package resourcemanager
import (
"fmt"
"os"
"regexp"
"testing"
)

func updateFakeCgroupv1Files(c *cgroupsV1MemoryLimitChecker, limit, usage, inactive int) error {
limitFile, err := os.Create(c.limitFile)
func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive int) error {
limitFile, err := os.Create(c.files.limitFile)
if err != nil {
return err
}
_, err = fmt.Fprintf(limitFile, "%d\n", limit)
if err != nil {
if _, err = fmt.Fprintf(limitFile, "%d\n", limit); err != nil {
return err
}

usageFile, err := os.Create(c.usageFile)
usageFile, err := os.Create(c.files.usageFile)
if err != nil {
return err
}
_, err = fmt.Fprintf(usageFile, "%d\n", usage)
if err != nil {
if _, err = fmt.Fprintf(usageFile, "%d\n", usage); err != nil {
return err
}

statsFile, err := os.Create(c.statsFile)
statsFile, err := os.Create(c.files.statsFile)
if err != nil {
return err
}
Expand All @@ -37,42 +36,57 @@ total_rss 1016209408
total_inactive_file %d
total_active_file 321544192
`, inactive)
if err != nil {
return err
}
return nil
return err
}

func TestCgroupsv1MemoryLimit(t *testing.T) {
cgroupDir := t.TempDir()
c := newCgroupsV1MemoryLimitChecker(cgroupDir, 95)
_, err := c.isLimitExceeded()
if err == nil {
t.Error("Should fail open if can't read files")
func makeCgroupsTestDir(cgroupDir string) cgroupsMemoryFiles {
return cgroupsMemoryFiles{
limitFile: cgroupDir + "/memory.limit_in_bytes",
usageFile: cgroupDir + "/memory.usage_in_bytes",
statsFile: cgroupDir + "/memory.stat",
inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`),
}
}

err = updateFakeCgroupv1Files(c, 1000, 1000, 51)
if err != nil {
t.Error(err)
}
exceeded, err := c.isLimitExceeded()
if err != nil {
t.Error(err)
}
if exceeded {
t.Error("Expected under limit")
func TestCgroupsFailIfCantOpen(t *testing.T) {
Copy link
Contributor

@anodar anodar Jul 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

func TestCgroupsv1MemoryLimit(t *testing.T) {

  for _, tc := range []struct {
        desc string
        inactive int
        want bool
} {
    {  
       desc: "limit should be exceeded", 
      inactive: 51,
      want: true,

    },
    {  
       desc: "limit should not be exceeded", 
      inactive: 51,
      want: false,

    },
} {
   t.Run(tc.desc, func(t *testing.T) {
         dir := makeCgroupsTestDir(t.TempDir())
         if err := updateFakeCgroupFiles(c, 1000, 1000, tc.inactive); err != nil {
      		t.Fatal("Updating cgroup files: %v", err)
         }
         exceeded, err := c.isLimitExceeded()
         if err != nil {
            t.Fatalf("Checking if limit exceeded: %v", err)
         }
         if exceeded != tc.want {
            t.Errorf("isLimitExceeded() = %t, want %t", exceeded, tc.want)
         }
	}
   })
}
}

You can either also have TestCgroupsFailIfCantOpen as another case in the test, or leave as is.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I'll use this pattern more often. Took your code largely as suggested.

testFiles := makeCgroupsTestDir(t.TempDir())
c := newCgroupsMemoryLimitChecker(testFiles, 95)
if _, err := c.isLimitExceeded(); err == nil {
t.Fatal("Should fail open if can't read files")
}
}

err = updateFakeCgroupv1Files(c, 1000, 1000, 50)
if err != nil {
t.Error(err)
func TestCgroupsMemoryLimit(t *testing.T) {
for _, tc := range []struct {
desc string
inactive int
want bool
}{
{
desc: "limit should be exceeded",
inactive: 50,
want: true,
},
{
desc: "limit should not be exceeded",
inactive: 51,
want: false,
},
} {
t.Run(tc.desc, func(t *testing.T) {
testFiles := makeCgroupsTestDir(t.TempDir())
c := newCgroupsMemoryLimitChecker(testFiles, 95)
if err := updateFakeCgroupFiles(c, 1000, 1000, tc.inactive); err != nil {
t.Fatalf("Updating cgroup files: %v", err)
}
exceeded, err := c.isLimitExceeded()
if err != nil {
t.Fatalf("Checking if limit exceeded: %v", err)
}
if exceeded != tc.want {
t.Errorf("isLimitExceeded() = %t, want %t", exceeded, tc.want)
}
},
)
}
exceeded, err = c.isLimitExceeded()
if err != nil {
t.Error(err)
}
if !exceeded {
t.Error("Expected over limit")
}

}
Loading