Skip to content

Commit

Permalink
Merge pull request #1744 from OffchainLabs/cgroupsv2-resourcemanager
Browse files Browse the repository at this point in the history
Add cgroups v2 support to resourcemanager
  • Loading branch information
Tristan-Wilson committed Sep 22, 2023
2 parents b5c02f9 + 56a6f11 commit affaf25
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 80 deletions.
101 changes: 60 additions & 41 deletions arbnode/resourcemanager/resource_management.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ var (
limitCheckDurationHistogram = metrics.NewRegisteredHistogram("arb/rpc/limitcheck/duration", nil, metrics.NewBoundedHistogramSample())
limitCheckSuccessCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/success", nil)
limitCheckFailureCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/failure", nil)
errNotSupported = errors.New("not supported")
)

// Init adds the resource manager's httpServer to a custom hook in geth.
Expand All @@ -33,7 +34,14 @@ var (
func Init(conf *Config) {
if conf.MemLimitPercent > 0 {
node.WrapHTTPHandler = func(srv http.Handler) (http.Handler, error) {
return newHttpServer(srv, newLimitChecker(conf)), nil
var c limitChecker
c, err := newCgroupsMemoryLimitCheckerIfSupported(conf)
if errors.Is(err, errNotSupported) {
log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling")
c = &trivialLimitChecker{}
}

return newHttpServer(srv, c), nil
}
}
}
Expand Down Expand Up @@ -90,20 +98,27 @@ type limitChecker interface {
String() string
}

// newLimitChecker attempts to auto-discover the mechanism by which it
// can check system limits. Currently Cgroups V1 is supported,
// with Cgroups V2 likely to be implmemented next. If no supported
// mechanism is discovered, it logs an error and fails open, ie
// it creates a trivialLimitChecker that does no checks.
func newLimitChecker(conf *Config) limitChecker {
c := newCgroupsV1MemoryLimitChecker(DefaultCgroupsV1MemoryDirectory, conf.MemLimitPercent)
func isSupported(c limitChecker) bool {
_, err := c.isLimitExceeded()
return err == nil
}

// newCgroupsMemoryLimitCheckerIfSupported attempts to auto-discover whether
// Cgroups V1 or V2 is supported for checking system memory limits.
func newCgroupsMemoryLimitCheckerIfSupported(conf *Config) (*cgroupsMemoryLimitChecker, error) {
c := newCgroupsMemoryLimitChecker(cgroupsV1MemoryFiles, conf.MemLimitPercent)
if isSupported(c) {
log.Info("Cgroups v1 detected, enabling memory limit RPC throttling")
return c
return c, nil
}

c = newCgroupsMemoryLimitChecker(cgroupsV2MemoryFiles, conf.MemLimitPercent)
if isSupported(c) {
log.Info("Cgroups v2 detected, enabling memory limit RPC throttling")
return c, nil
}

log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling")
return &trivialLimitChecker{}
return nil, errNotSupported
}

// trivialLimitChecker checks no limits, so its limits are never exceeded.
Expand All @@ -115,28 +130,37 @@ func (_ trivialLimitChecker) isLimitExceeded() (bool, error) {

func (_ trivialLimitChecker) String() string { return "trivial" }

const DefaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/"
type cgroupsMemoryFiles struct {
limitFile, usageFile, statsFile string
inactiveRe *regexp.Regexp
}

type cgroupsV1MemoryLimitChecker struct {
cgroupDir string
memoryLimitPercent int
const defaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/"
const defaultCgroupsV2MemoryDirectory = "/sys/fs/cgroup/"

limitFile, usageFile, statsFile string
var cgroupsV1MemoryFiles = cgroupsMemoryFiles{
limitFile: defaultCgroupsV1MemoryDirectory + "/memory.limit_in_bytes",
usageFile: defaultCgroupsV1MemoryDirectory + "/memory.usage_in_bytes",
statsFile: defaultCgroupsV1MemoryDirectory + "/memory.stat",
inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`),
}
var cgroupsV2MemoryFiles = cgroupsMemoryFiles{
limitFile: defaultCgroupsV2MemoryDirectory + "/memory.max",
usageFile: defaultCgroupsV2MemoryDirectory + "/memory.current",
statsFile: defaultCgroupsV2MemoryDirectory + "/memory.stat",
inactiveRe: regexp.MustCompile(`inactive_file (\d+)`),
}

func newCgroupsV1MemoryLimitChecker(cgroupDir string, memoryLimitPercent int) *cgroupsV1MemoryLimitChecker {
return &cgroupsV1MemoryLimitChecker{
cgroupDir: cgroupDir,
memoryLimitPercent: memoryLimitPercent,
limitFile: cgroupDir + "/memory.limit_in_bytes",
usageFile: cgroupDir + "/memory.usage_in_bytes",
statsFile: cgroupDir + "/memory.stat",
}
type cgroupsMemoryLimitChecker struct {
files cgroupsMemoryFiles
memoryLimitPercent int
}

func isSupported(c limitChecker) bool {
_, err := c.isLimitExceeded()
return err == nil
func newCgroupsMemoryLimitChecker(files cgroupsMemoryFiles, memoryLimitPercent int) *cgroupsMemoryLimitChecker {
return &cgroupsMemoryLimitChecker{
files: files,
memoryLimitPercent: memoryLimitPercent,
}
}

// isLimitExceeded checks if the system memory used exceeds the limit
Expand All @@ -145,24 +169,25 @@ func isSupported(c limitChecker) bool {
// See the following page for details of calculating the memory used,
// which is reported as container_memory_working_set_bytes in prometheus:
// https://mihai-albert.com/2022/02/13/out-of-memory-oom-in-kubernetes-part-3-memory-metrics-sources-and-tools-to-collect-them/
func (c *cgroupsV1MemoryLimitChecker) isLimitExceeded() (bool, error) {
func (c *cgroupsMemoryLimitChecker) isLimitExceeded() (bool, error) {
var limit, usage, inactive int
var err error
limit, err = readIntFromFile(c.limitFile)
if err != nil {
if limit, err = readIntFromFile(c.files.limitFile); err != nil {
return false, err
}
usage, err = readIntFromFile(c.usageFile)
if err != nil {
if usage, err = readIntFromFile(c.files.usageFile); err != nil {
return false, err
}
inactive, err = readInactive(c.statsFile)
if err != nil {
if inactive, err = readInactive(c.files.statsFile, c.files.inactiveRe); err != nil {
return false, err
}
return usage-inactive >= ((limit * c.memoryLimitPercent) / 100), nil
}

func (c cgroupsMemoryLimitChecker) String() string {
return "CgroupsMemoryLimitChecker"
}

func readIntFromFile(fileName string) (int, error) {
file, err := os.Open(fileName)
if err != nil {
Expand All @@ -176,9 +201,7 @@ func readIntFromFile(fileName string) (int, error) {
return limit, nil
}

var re = regexp.MustCompile(`total_inactive_file (\d+)`)

func readInactive(fileName string) (int, error) {
func readInactive(fileName string, re *regexp.Regexp) (int, error) {
file, err := os.Open(fileName)
if err != nil {
return 0, err
Expand All @@ -201,7 +224,3 @@ func readInactive(fileName string) (int, error) {

return 0, errors.New("total_inactive_file not found in " + fileName)
}

func (c cgroupsV1MemoryLimitChecker) String() string {
return "CgroupsV1MemoryLimitChecker"
}
92 changes: 53 additions & 39 deletions arbnode/resourcemanager/resource_management_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,28 @@ package resourcemanager
import (
"fmt"
"os"
"regexp"
"testing"
)

func updateFakeCgroupv1Files(c *cgroupsV1MemoryLimitChecker, limit, usage, inactive int) error {
limitFile, err := os.Create(c.limitFile)
func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive int) error {
limitFile, err := os.Create(c.files.limitFile)
if err != nil {
return err
}
_, err = fmt.Fprintf(limitFile, "%d\n", limit)
if err != nil {
if _, err = fmt.Fprintf(limitFile, "%d\n", limit); err != nil {
return err
}

usageFile, err := os.Create(c.usageFile)
usageFile, err := os.Create(c.files.usageFile)
if err != nil {
return err
}
_, err = fmt.Fprintf(usageFile, "%d\n", usage)
if err != nil {
if _, err = fmt.Fprintf(usageFile, "%d\n", usage); err != nil {
return err
}

statsFile, err := os.Create(c.statsFile)
statsFile, err := os.Create(c.files.statsFile)
if err != nil {
return err
}
Expand All @@ -37,42 +36,57 @@ total_rss 1016209408
total_inactive_file %d
total_active_file 321544192
`, inactive)
if err != nil {
return err
}
return nil
return err
}

func TestCgroupsv1MemoryLimit(t *testing.T) {
cgroupDir := t.TempDir()
c := newCgroupsV1MemoryLimitChecker(cgroupDir, 95)
_, err := c.isLimitExceeded()
if err == nil {
t.Error("Should fail open if can't read files")
func makeCgroupsTestDir(cgroupDir string) cgroupsMemoryFiles {
return cgroupsMemoryFiles{
limitFile: cgroupDir + "/memory.limit_in_bytes",
usageFile: cgroupDir + "/memory.usage_in_bytes",
statsFile: cgroupDir + "/memory.stat",
inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`),
}
}

err = updateFakeCgroupv1Files(c, 1000, 1000, 51)
if err != nil {
t.Error(err)
}
exceeded, err := c.isLimitExceeded()
if err != nil {
t.Error(err)
}
if exceeded {
t.Error("Expected under limit")
func TestCgroupsFailIfCantOpen(t *testing.T) {
testFiles := makeCgroupsTestDir(t.TempDir())
c := newCgroupsMemoryLimitChecker(testFiles, 95)
if _, err := c.isLimitExceeded(); err == nil {
t.Fatal("Should fail open if can't read files")
}
}

err = updateFakeCgroupv1Files(c, 1000, 1000, 50)
if err != nil {
t.Error(err)
func TestCgroupsMemoryLimit(t *testing.T) {
for _, tc := range []struct {
desc string
inactive int
want bool
}{
{
desc: "limit should be exceeded",
inactive: 50,
want: true,
},
{
desc: "limit should not be exceeded",
inactive: 51,
want: false,
},
} {
t.Run(tc.desc, func(t *testing.T) {
testFiles := makeCgroupsTestDir(t.TempDir())
c := newCgroupsMemoryLimitChecker(testFiles, 95)
if err := updateFakeCgroupFiles(c, 1000, 1000, tc.inactive); err != nil {
t.Fatalf("Updating cgroup files: %v", err)
}
exceeded, err := c.isLimitExceeded()
if err != nil {
t.Fatalf("Checking if limit exceeded: %v", err)
}
if exceeded != tc.want {
t.Errorf("isLimitExceeded() = %t, want %t", exceeded, tc.want)
}
},
)
}
exceeded, err = c.isLimitExceeded()
if err != nil {
t.Error(err)
}
if !exceeded {
t.Error("Expected over limit")
}

}

0 comments on commit affaf25

Please sign in to comment.