diff --git a/arbnode/resourcemanager/resource_management.go b/arbnode/resourcemanager/resource_management.go index 1c0d798098..88b7d65094 100644 --- a/arbnode/resourcemanager/resource_management.go +++ b/arbnode/resourcemanager/resource_management.go @@ -23,6 +23,7 @@ var ( limitCheckDurationHistogram = metrics.NewRegisteredHistogram("arb/rpc/limitcheck/duration", nil, metrics.NewBoundedHistogramSample()) limitCheckSuccessCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/success", nil) limitCheckFailureCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/failure", nil) + errNotSupported = errors.New("not supported") ) // Init adds the resource manager's httpServer to a custom hook in geth. @@ -33,7 +34,14 @@ var ( func Init(conf *Config) { if conf.MemLimitPercent > 0 { node.WrapHTTPHandler = func(srv http.Handler) (http.Handler, error) { - return newHttpServer(srv, newLimitChecker(conf)), nil + var c limitChecker + c, err := newCgroupsMemoryLimitCheckerIfSupported(conf) + if errors.Is(err, errNotSupported) { + log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling") + c = &trivialLimitChecker{} + } + + return newHttpServer(srv, c), nil } } } @@ -90,20 +98,27 @@ type limitChecker interface { String() string } -// newLimitChecker attempts to auto-discover the mechanism by which it -// can check system limits. Currently Cgroups V1 is supported, -// with Cgroups V2 likely to be implmemented next. If no supported -// mechanism is discovered, it logs an error and fails open, ie -// it creates a trivialLimitChecker that does no checks. -func newLimitChecker(conf *Config) limitChecker { - c := newCgroupsV1MemoryLimitChecker(DefaultCgroupsV1MemoryDirectory, conf.MemLimitPercent) +func isSupported(c limitChecker) bool { + _, err := c.isLimitExceeded() + return err == nil +} + +// newCgroupsMemoryLimitCheckerIfSupported attempts to auto-discover whether +// Cgroups V1 or V2 is supported for checking system memory limits. +func newCgroupsMemoryLimitCheckerIfSupported(conf *Config) (*cgroupsMemoryLimitChecker, error) { + c := newCgroupsMemoryLimitChecker(cgroupsV1MemoryFiles, conf.MemLimitPercent) if isSupported(c) { log.Info("Cgroups v1 detected, enabling memory limit RPC throttling") - return c + return c, nil + } + + c = newCgroupsMemoryLimitChecker(cgroupsV2MemoryFiles, conf.MemLimitPercent) + if isSupported(c) { + log.Info("Cgroups v2 detected, enabling memory limit RPC throttling") + return c, nil } - log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling") - return &trivialLimitChecker{} + return nil, errNotSupported } // trivialLimitChecker checks no limits, so its limits are never exceeded. @@ -115,28 +130,37 @@ func (_ trivialLimitChecker) isLimitExceeded() (bool, error) { func (_ trivialLimitChecker) String() string { return "trivial" } -const DefaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/" +type cgroupsMemoryFiles struct { + limitFile, usageFile, statsFile string + inactiveRe *regexp.Regexp +} -type cgroupsV1MemoryLimitChecker struct { - cgroupDir string - memoryLimitPercent int +const defaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/" +const defaultCgroupsV2MemoryDirectory = "/sys/fs/cgroup/" - limitFile, usageFile, statsFile string +var cgroupsV1MemoryFiles = cgroupsMemoryFiles{ + limitFile: defaultCgroupsV1MemoryDirectory + "/memory.limit_in_bytes", + usageFile: defaultCgroupsV1MemoryDirectory + "/memory.usage_in_bytes", + statsFile: defaultCgroupsV1MemoryDirectory + "/memory.stat", + inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`), +} +var cgroupsV2MemoryFiles = cgroupsMemoryFiles{ + limitFile: defaultCgroupsV2MemoryDirectory + "/memory.max", + usageFile: defaultCgroupsV2MemoryDirectory + "/memory.current", + statsFile: defaultCgroupsV2MemoryDirectory + "/memory.stat", + inactiveRe: regexp.MustCompile(`inactive_file (\d+)`), } -func newCgroupsV1MemoryLimitChecker(cgroupDir string, memoryLimitPercent int) *cgroupsV1MemoryLimitChecker { - return &cgroupsV1MemoryLimitChecker{ - cgroupDir: cgroupDir, - memoryLimitPercent: memoryLimitPercent, - limitFile: cgroupDir + "/memory.limit_in_bytes", - usageFile: cgroupDir + "/memory.usage_in_bytes", - statsFile: cgroupDir + "/memory.stat", - } +type cgroupsMemoryLimitChecker struct { + files cgroupsMemoryFiles + memoryLimitPercent int } -func isSupported(c limitChecker) bool { - _, err := c.isLimitExceeded() - return err == nil +func newCgroupsMemoryLimitChecker(files cgroupsMemoryFiles, memoryLimitPercent int) *cgroupsMemoryLimitChecker { + return &cgroupsMemoryLimitChecker{ + files: files, + memoryLimitPercent: memoryLimitPercent, + } } // isLimitExceeded checks if the system memory used exceeds the limit @@ -145,24 +169,25 @@ func isSupported(c limitChecker) bool { // See the following page for details of calculating the memory used, // which is reported as container_memory_working_set_bytes in prometheus: // https://mihai-albert.com/2022/02/13/out-of-memory-oom-in-kubernetes-part-3-memory-metrics-sources-and-tools-to-collect-them/ -func (c *cgroupsV1MemoryLimitChecker) isLimitExceeded() (bool, error) { +func (c *cgroupsMemoryLimitChecker) isLimitExceeded() (bool, error) { var limit, usage, inactive int var err error - limit, err = readIntFromFile(c.limitFile) - if err != nil { + if limit, err = readIntFromFile(c.files.limitFile); err != nil { return false, err } - usage, err = readIntFromFile(c.usageFile) - if err != nil { + if usage, err = readIntFromFile(c.files.usageFile); err != nil { return false, err } - inactive, err = readInactive(c.statsFile) - if err != nil { + if inactive, err = readInactive(c.files.statsFile, c.files.inactiveRe); err != nil { return false, err } return usage-inactive >= ((limit * c.memoryLimitPercent) / 100), nil } +func (c cgroupsMemoryLimitChecker) String() string { + return "CgroupsMemoryLimitChecker" +} + func readIntFromFile(fileName string) (int, error) { file, err := os.Open(fileName) if err != nil { @@ -176,9 +201,7 @@ func readIntFromFile(fileName string) (int, error) { return limit, nil } -var re = regexp.MustCompile(`total_inactive_file (\d+)`) - -func readInactive(fileName string) (int, error) { +func readInactive(fileName string, re *regexp.Regexp) (int, error) { file, err := os.Open(fileName) if err != nil { return 0, err @@ -201,7 +224,3 @@ func readInactive(fileName string) (int, error) { return 0, errors.New("total_inactive_file not found in " + fileName) } - -func (c cgroupsV1MemoryLimitChecker) String() string { - return "CgroupsV1MemoryLimitChecker" -} diff --git a/arbnode/resourcemanager/resource_management_test.go b/arbnode/resourcemanager/resource_management_test.go index fe470e706b..ba791fd729 100644 --- a/arbnode/resourcemanager/resource_management_test.go +++ b/arbnode/resourcemanager/resource_management_test.go @@ -6,29 +6,28 @@ package resourcemanager import ( "fmt" "os" + "regexp" "testing" ) -func updateFakeCgroupv1Files(c *cgroupsV1MemoryLimitChecker, limit, usage, inactive int) error { - limitFile, err := os.Create(c.limitFile) +func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive int) error { + limitFile, err := os.Create(c.files.limitFile) if err != nil { return err } - _, err = fmt.Fprintf(limitFile, "%d\n", limit) - if err != nil { + if _, err = fmt.Fprintf(limitFile, "%d\n", limit); err != nil { return err } - usageFile, err := os.Create(c.usageFile) + usageFile, err := os.Create(c.files.usageFile) if err != nil { return err } - _, err = fmt.Fprintf(usageFile, "%d\n", usage) - if err != nil { + if _, err = fmt.Fprintf(usageFile, "%d\n", usage); err != nil { return err } - statsFile, err := os.Create(c.statsFile) + statsFile, err := os.Create(c.files.statsFile) if err != nil { return err } @@ -37,42 +36,57 @@ total_rss 1016209408 total_inactive_file %d total_active_file 321544192 `, inactive) - if err != nil { - return err - } - return nil + return err } -func TestCgroupsv1MemoryLimit(t *testing.T) { - cgroupDir := t.TempDir() - c := newCgroupsV1MemoryLimitChecker(cgroupDir, 95) - _, err := c.isLimitExceeded() - if err == nil { - t.Error("Should fail open if can't read files") +func makeCgroupsTestDir(cgroupDir string) cgroupsMemoryFiles { + return cgroupsMemoryFiles{ + limitFile: cgroupDir + "/memory.limit_in_bytes", + usageFile: cgroupDir + "/memory.usage_in_bytes", + statsFile: cgroupDir + "/memory.stat", + inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`), } +} - err = updateFakeCgroupv1Files(c, 1000, 1000, 51) - if err != nil { - t.Error(err) - } - exceeded, err := c.isLimitExceeded() - if err != nil { - t.Error(err) - } - if exceeded { - t.Error("Expected under limit") +func TestCgroupsFailIfCantOpen(t *testing.T) { + testFiles := makeCgroupsTestDir(t.TempDir()) + c := newCgroupsMemoryLimitChecker(testFiles, 95) + if _, err := c.isLimitExceeded(); err == nil { + t.Fatal("Should fail open if can't read files") } +} - err = updateFakeCgroupv1Files(c, 1000, 1000, 50) - if err != nil { - t.Error(err) +func TestCgroupsMemoryLimit(t *testing.T) { + for _, tc := range []struct { + desc string + inactive int + want bool + }{ + { + desc: "limit should be exceeded", + inactive: 50, + want: true, + }, + { + desc: "limit should not be exceeded", + inactive: 51, + want: false, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + testFiles := makeCgroupsTestDir(t.TempDir()) + c := newCgroupsMemoryLimitChecker(testFiles, 95) + if err := updateFakeCgroupFiles(c, 1000, 1000, tc.inactive); err != nil { + t.Fatalf("Updating cgroup files: %v", err) + } + exceeded, err := c.isLimitExceeded() + if err != nil { + t.Fatalf("Checking if limit exceeded: %v", err) + } + if exceeded != tc.want { + t.Errorf("isLimitExceeded() = %t, want %t", exceeded, tc.want) + } + }, + ) } - exceeded, err = c.isLimitExceeded() - if err != nil { - t.Error(err) - } - if !exceeded { - t.Error("Expected over limit") - } - }