From 57d146c35e86aa9dc9cbb8875a01461627fd10af Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 5 Jul 2023 19:10:57 -0700 Subject: [PATCH 1/7] Reject RPC requests if memory limit is exceeded If the --node.resource-mgmt.mem-limit-percent option is used then if system memory usage exceds that limit, reject new RPC requests with a HTTP 429 error. If the option is used, then Nitro attempts on startup to discover what method is available to check the system memory usage and limit. Currently Cgroups V1 is the only supported method, and if it is not detected then an error is logged once and the limit will not be enforced. --- arbnode/node.go | 3 + arbnode/resource_management.go | 171 ++++++++++++++++++++++++++++ arbnode/resource_management_test.go | 55 +++++++++ cmd/nitro/nitro.go | 2 + go-ethereum | 2 +- 5 files changed, 232 insertions(+), 1 deletion(-) create mode 100644 arbnode/resource_management.go create mode 100644 arbnode/resource_management_test.go diff --git a/arbnode/node.go b/arbnode/node.go index f1fc8e9e63..e78780ef0e 100644 --- a/arbnode/node.go +++ b/arbnode/node.go @@ -329,6 +329,7 @@ type Config struct { TxLookupLimit uint64 `koanf:"tx-lookup-limit"` TransactionStreamer TransactionStreamerConfig `koanf:"transaction-streamer" reload:"hot"` Maintenance MaintenanceConfig `koanf:"maintenance" reload:"hot"` + ResourceManagement ResourceManagementConfig `koanf:"resource-mgmt" reload:"hot"` } func (c *Config) Validate() error { @@ -402,6 +403,7 @@ func ConfigAddOptions(prefix string, f *flag.FlagSet, feedInputEnable bool, feed f.Uint64(prefix+".tx-lookup-limit", ConfigDefault.TxLookupLimit, "retain the ability to lookup transactions by hash for the past N blocks (0 = all blocks)") TransactionStreamerConfigAddOptions(prefix+".transaction-streamer", f) MaintenanceConfigAddOptions(prefix+".maintenance", f) + ResourceManagementConfigAddOptions(prefix+".resource-mgmt", f) archiveMsg := fmt.Sprintf("retain past block state (deprecated, please use %v.caching.archive)", prefix) f.Bool(prefix+".archive", ConfigDefault.Archive, archiveMsg) @@ -428,6 +430,7 @@ var ConfigDefault = Config{ TxLookupLimit: 126_230_400, // 1 year at 4 blocks per second Caching: execution.DefaultCachingConfig, TransactionStreamer: DefaultTransactionStreamerConfig, + ResourceManagement: DefaultResourceManagementConfig, } func ConfigDefaultL1Test() *Config { diff --git a/arbnode/resource_management.go b/arbnode/resource_management.go new file mode 100644 index 0000000000..12dd2d2cf3 --- /dev/null +++ b/arbnode/resource_management.go @@ -0,0 +1,171 @@ +// Copyright 2023, Offchain Labs, Inc. +// For license information, see https://github.com/nitro/blob/master/LICENSE + +package arbnode + +import ( + "bufio" + "errors" + "fmt" + "net/http" + "os" + "regexp" + "strconv" + + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/node" + flag "github.com/spf13/pflag" +) + +func InitResourceManagement(conf *ResourceManagementConfig) { + if conf.MemoryLimitPercent > 0 { + node.WrapHTTPHandler = func(srv http.Handler) (http.Handler, error) { + return newResourceManagementHttpServer(srv, newLimitChecker(conf)), nil + } + } +} + +type ResourceManagementConfig struct { + MemoryLimitPercent int `koanf:"mem-limit-percent" reload:"hot"` +} + +var DefaultResourceManagementConfig = ResourceManagementConfig{ + MemoryLimitPercent: 0, +} + +func ResourceManagementConfigAddOptions(prefix string, f *flag.FlagSet) { + f.Int(prefix+".mem-limit-percent", DefaultResourceManagementConfig.MemoryLimitPercent, "RPC calls are throttled if system memory utilization exceeds this percent value, zero (default) is disabled") +} + +type resourceManagementHttpServer struct { + inner http.Handler + c limitChecker +} + +func newResourceManagementHttpServer(inner http.Handler, c limitChecker) *resourceManagementHttpServer { + return &resourceManagementHttpServer{inner: inner, c: c} +} + +func (s *resourceManagementHttpServer) ServeHTTP(w http.ResponseWriter, req *http.Request) { + exceeded, err := s.c.isLimitExceeded() + if err != nil { + log.Error("Error checking memory limit", "err", err, "checker", s.c) + } else if exceeded { + http.Error(w, "Too many requests", http.StatusTooManyRequests) + return + } + + log.Info("Limit not exceeded, serving request.") + s.inner.ServeHTTP(w, req) +} + +type limitChecker interface { + isLimitExceeded() (bool, error) + String() string +} + +func newLimitChecker(conf *ResourceManagementConfig) limitChecker { + { + c := newCgroupsV1MemoryLimitChecker(DefaultCgroupsV1MemoryDirectory, conf.MemoryLimitPercent) + if isSupported(c) { + log.Info("Cgroups v1 detected, enabling memory limit RPC throttling") + return c + } + } + + log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling") + return &trivialLimitChecker{} +} + +type trivialLimitChecker struct{} + +func (_ trivialLimitChecker) isLimitExceeded() (bool, error) { + return false, nil +} + +func (_ trivialLimitChecker) String() string { return "trivial" } + +const DefaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/" + +type cgroupsV1MemoryLimitChecker struct { + cgroupDir string + memoryLimitPercent int + + limitFile, usageFile, statsFile string +} + +func newCgroupsV1MemoryLimitChecker(cgroupDir string, memoryLimitPercent int) *cgroupsV1MemoryLimitChecker { + return &cgroupsV1MemoryLimitChecker{ + cgroupDir: cgroupDir, + memoryLimitPercent: memoryLimitPercent, + limitFile: cgroupDir + "/memory.limit_in_bytes", + usageFile: cgroupDir + "/memory.usage_in_bytes", + statsFile: cgroupDir + "/memory.stat", + } +} + +func isSupported(c limitChecker) bool { + _, err := c.isLimitExceeded() + return err == nil +} + +func (c *cgroupsV1MemoryLimitChecker) isLimitExceeded() (bool, error) { + var limit, usage, inactive int + var err error + limit, err = c.getIntFromFile(c.limitFile) + if err != nil { + return false, err + } + usage, err = c.getIntFromFile(c.usageFile) + if err != nil { + return false, err + } + inactive, err = c.getInactive() + if err != nil { + return false, err + } + return usage-inactive >= ((limit * c.memoryLimitPercent) / 100), nil +} + +func (c cgroupsV1MemoryLimitChecker) getIntFromFile(fileName string) (int, error) { + file, err := os.Open(fileName) + if err != nil { + return 0, err + } + + var limit int + _, err = fmt.Fscanf(file, "%d", &limit) + if err != nil { + return 0, err + } + return limit, nil +} + +func (c cgroupsV1MemoryLimitChecker) getInactive() (int, error) { + file, err := os.Open(c.statsFile) + if err != nil { + return 0, err + } + + scanner := bufio.NewScanner(file) + re := regexp.MustCompile(`total_inactive_file (\d+)`) + for scanner.Scan() { + line := scanner.Text() + + matches := re.FindStringSubmatch(line) + + if len(matches) >= 2 { + inactive, err := strconv.Atoi(matches[1]) + if err != nil { + return 0, err + } + return inactive, nil + } + } + + return 0, errors.New("total_inactive_file not found in " + c.statsFile) +} + +func (c cgroupsV1MemoryLimitChecker) String() string { + return "CgroupsV1MemoryLimitChecker" +} diff --git a/arbnode/resource_management_test.go b/arbnode/resource_management_test.go new file mode 100644 index 0000000000..161256958d --- /dev/null +++ b/arbnode/resource_management_test.go @@ -0,0 +1,55 @@ +// Copyright 2023, Offchain Labs, Inc. +// For license information, see https://github.com/nitro/blob/master/LICENSE + +package arbnode + +import ( + "fmt" + "os" + "testing" +) + +func updateFakeCgroupv1Files(t *testing.T, c *cgroupsV1MemoryLimitChecker, limit, usage, inactive int) { + limitFile, err := os.Create(c.limitFile) + Require(t, err) + _, err = fmt.Fprintf(limitFile, "%d\n", limit) + Require(t, err) + + usageFile, err := os.Create(c.usageFile) + Require(t, err) + _, err = fmt.Fprintf(usageFile, "%d\n", usage) + Require(t, err) + + statsFile, err := os.Create(c.statsFile) + Require(t, err) + _, err = fmt.Fprintf(statsFile, `total_cache 1029980160 +total_rss 1016209408 +total_inactive_file %d +total_active_file 321544192 +`, inactive) + Require(t, err) +} + +func TestCgroupsv1MemoryLimit(t *testing.T) { + cgroupDir := t.TempDir() + c := newCgroupsV1MemoryLimitChecker(cgroupDir, 95) + _, err := c.isLimitExceeded() + if err == nil { + Fail(t, "Should fail open if can't read files") + } + + updateFakeCgroupv1Files(t, c, 1000, 1000, 51) + exceeded, err := c.isLimitExceeded() + Require(t, err) + if exceeded { + Fail(t, "Expected under limit") + } + + updateFakeCgroupv1Files(t, c, 1000, 1000, 50) + exceeded, err = c.isLimitExceeded() + Require(t, err) + if !exceeded { + Fail(t, "Expected over limit") + } + +} diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 0035171078..4c6e6eab3a 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -321,6 +321,8 @@ func mainImpl() int { nodeConfig.Node.TxLookupLimit = 0 } + arbnode.InitResourceManagement(&nodeConfig.Node.ResourceManagement) + stack, err := node.New(&stackConf) if err != nil { flag.Usage() diff --git a/go-ethereum b/go-ethereum index 8e6a8ad494..28127f5941 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit 8e6a8ad4942591011e833e6ebceca6bd668f3db0 +Subproject commit 28127f5941faec6fe5227c29443d2074639495d0 From 687306f80cd20c9733925cc8f51127a9522c4728 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Thu, 6 Jul 2023 11:16:06 -0700 Subject: [PATCH 2/7] Add some metrics around limit checking --- arbnode/resource_management.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arbnode/resource_management.go b/arbnode/resource_management.go index 12dd2d2cf3..8551567e15 100644 --- a/arbnode/resource_management.go +++ b/arbnode/resource_management.go @@ -11,12 +11,20 @@ import ( "os" "regexp" "strconv" + "time" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/node" flag "github.com/spf13/pflag" ) +var ( + limitCheckDurationHistogram = metrics.NewRegisteredHistogram("arb/rpc/limitcheck/duration", nil, metrics.NewBoundedHistogramSample()) + limitCheckSuccessCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/success", nil) + limitCheckFailureCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/failure", nil) +) + func InitResourceManagement(conf *ResourceManagementConfig) { if conf.MemoryLimitPercent > 0 { node.WrapHTTPHandler = func(srv http.Handler) (http.Handler, error) { @@ -47,15 +55,18 @@ func newResourceManagementHttpServer(inner http.Handler, c limitChecker) *resour } func (s *resourceManagementHttpServer) ServeHTTP(w http.ResponseWriter, req *http.Request) { + start := time.Now() exceeded, err := s.c.isLimitExceeded() + limitCheckDurationHistogram.Update(time.Since(start).Nanoseconds()) if err != nil { log.Error("Error checking memory limit", "err", err, "checker", s.c) } else if exceeded { http.Error(w, "Too many requests", http.StatusTooManyRequests) + limitCheckFailureCounter.Inc(1) return } - log.Info("Limit not exceeded, serving request.") + limitCheckSuccessCounter.Inc(1) s.inner.ServeHTTP(w, req) } From cc28f2ed911af2cb10551bdd25b762be52d36428 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Thu, 6 Jul 2023 11:37:58 -0700 Subject: [PATCH 3/7] Make separate resourcemanagement package --- arbnode/node.go | 7 ++--- .../resource_management.go | 26 +++++++++---------- .../resource_management_test.go | 14 +++++++++- cmd/nitro/nitro.go | 3 ++- 4 files changed, 32 insertions(+), 18 deletions(-) rename arbnode/{ => resourcemanager}/resource_management.go (81%) rename arbnode/{ => resourcemanager}/resource_management_test.go (80%) diff --git a/arbnode/node.go b/arbnode/node.go index e78780ef0e..02b9857877 100644 --- a/arbnode/node.go +++ b/arbnode/node.go @@ -28,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/rpc" "github.com/offchainlabs/nitro/arbnode/execution" + "github.com/offchainlabs/nitro/arbnode/resourcemanager" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/broadcastclient" "github.com/offchainlabs/nitro/broadcastclients" @@ -329,7 +330,7 @@ type Config struct { TxLookupLimit uint64 `koanf:"tx-lookup-limit"` TransactionStreamer TransactionStreamerConfig `koanf:"transaction-streamer" reload:"hot"` Maintenance MaintenanceConfig `koanf:"maintenance" reload:"hot"` - ResourceManagement ResourceManagementConfig `koanf:"resource-mgmt" reload:"hot"` + ResourceManagement resourcemanager.Config `koanf:"resource-mgmt" reload:"hot"` } func (c *Config) Validate() error { @@ -403,7 +404,7 @@ func ConfigAddOptions(prefix string, f *flag.FlagSet, feedInputEnable bool, feed f.Uint64(prefix+".tx-lookup-limit", ConfigDefault.TxLookupLimit, "retain the ability to lookup transactions by hash for the past N blocks (0 = all blocks)") TransactionStreamerConfigAddOptions(prefix+".transaction-streamer", f) MaintenanceConfigAddOptions(prefix+".maintenance", f) - ResourceManagementConfigAddOptions(prefix+".resource-mgmt", f) + resourcemanager.ConfigAddOptions(prefix+".resource-mgmt", f) archiveMsg := fmt.Sprintf("retain past block state (deprecated, please use %v.caching.archive)", prefix) f.Bool(prefix+".archive", ConfigDefault.Archive, archiveMsg) @@ -430,7 +431,7 @@ var ConfigDefault = Config{ TxLookupLimit: 126_230_400, // 1 year at 4 blocks per second Caching: execution.DefaultCachingConfig, TransactionStreamer: DefaultTransactionStreamerConfig, - ResourceManagement: DefaultResourceManagementConfig, + ResourceManagement: resourcemanager.DefaultConfig, } func ConfigDefaultL1Test() *Config { diff --git a/arbnode/resource_management.go b/arbnode/resourcemanager/resource_management.go similarity index 81% rename from arbnode/resource_management.go rename to arbnode/resourcemanager/resource_management.go index 8551567e15..e0d8177308 100644 --- a/arbnode/resource_management.go +++ b/arbnode/resourcemanager/resource_management.go @@ -1,7 +1,7 @@ // Copyright 2023, Offchain Labs, Inc. // For license information, see https://github.com/nitro/blob/master/LICENSE -package arbnode +package resourcemanager import ( "bufio" @@ -16,7 +16,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/node" - flag "github.com/spf13/pflag" + "github.com/spf13/pflag" ) var ( @@ -25,36 +25,36 @@ var ( limitCheckFailureCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/failure", nil) ) -func InitResourceManagement(conf *ResourceManagementConfig) { +func Init(conf *Config) { if conf.MemoryLimitPercent > 0 { node.WrapHTTPHandler = func(srv http.Handler) (http.Handler, error) { - return newResourceManagementHttpServer(srv, newLimitChecker(conf)), nil + return newHttpServer(srv, newLimitChecker(conf)), nil } } } -type ResourceManagementConfig struct { +type Config struct { MemoryLimitPercent int `koanf:"mem-limit-percent" reload:"hot"` } -var DefaultResourceManagementConfig = ResourceManagementConfig{ +var DefaultConfig = Config{ MemoryLimitPercent: 0, } -func ResourceManagementConfigAddOptions(prefix string, f *flag.FlagSet) { - f.Int(prefix+".mem-limit-percent", DefaultResourceManagementConfig.MemoryLimitPercent, "RPC calls are throttled if system memory utilization exceeds this percent value, zero (default) is disabled") +func ConfigAddOptions(prefix string, f *pflag.FlagSet) { + f.Int(prefix+".mem-limit-percent", DefaultConfig.MemoryLimitPercent, "RPC calls are throttled if system memory utilization exceeds this percent value, zero (default) is disabled") } -type resourceManagementHttpServer struct { +type httpServer struct { inner http.Handler c limitChecker } -func newResourceManagementHttpServer(inner http.Handler, c limitChecker) *resourceManagementHttpServer { - return &resourceManagementHttpServer{inner: inner, c: c} +func newHttpServer(inner http.Handler, c limitChecker) *httpServer { + return &httpServer{inner: inner, c: c} } -func (s *resourceManagementHttpServer) ServeHTTP(w http.ResponseWriter, req *http.Request) { +func (s *httpServer) ServeHTTP(w http.ResponseWriter, req *http.Request) { start := time.Now() exceeded, err := s.c.isLimitExceeded() limitCheckDurationHistogram.Update(time.Since(start).Nanoseconds()) @@ -75,7 +75,7 @@ type limitChecker interface { String() string } -func newLimitChecker(conf *ResourceManagementConfig) limitChecker { +func newLimitChecker(conf *Config) limitChecker { { c := newCgroupsV1MemoryLimitChecker(DefaultCgroupsV1MemoryDirectory, conf.MemoryLimitPercent) if isSupported(c) { diff --git a/arbnode/resource_management_test.go b/arbnode/resourcemanager/resource_management_test.go similarity index 80% rename from arbnode/resource_management_test.go rename to arbnode/resourcemanager/resource_management_test.go index 161256958d..99103e97aa 100644 --- a/arbnode/resource_management_test.go +++ b/arbnode/resourcemanager/resource_management_test.go @@ -1,12 +1,14 @@ // Copyright 2023, Offchain Labs, Inc. // For license information, see https://github.com/nitro/blob/master/LICENSE -package arbnode +package resourcemanager import ( "fmt" "os" "testing" + + "github.com/offchainlabs/nitro/util/testhelpers" ) func updateFakeCgroupv1Files(t *testing.T, c *cgroupsV1MemoryLimitChecker, limit, usage, inactive int) { @@ -53,3 +55,13 @@ func TestCgroupsv1MemoryLimit(t *testing.T) { } } + +func Require(t *testing.T, err error, printables ...interface{}) { + t.Helper() + testhelpers.RequireImpl(t, err, printables...) +} + +func Fail(t *testing.T, printables ...interface{}) { + t.Helper() + testhelpers.FailImpl(t, printables...) +} diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 4c6e6eab3a..3074ca7f87 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -38,6 +38,7 @@ import ( "github.com/offchainlabs/nitro/arbnode" "github.com/offchainlabs/nitro/arbnode/execution" + "github.com/offchainlabs/nitro/arbnode/resourcemanager" "github.com/offchainlabs/nitro/cmd/chaininfo" "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/genericconf" @@ -321,7 +322,7 @@ func mainImpl() int { nodeConfig.Node.TxLookupLimit = 0 } - arbnode.InitResourceManagement(&nodeConfig.Node.ResourceManagement) + resourcemanager.Init(&nodeConfig.Node.ResourceManagement) stack, err := node.New(&stackConf) if err != nil { From a9a34bf2bd378fb954f4bcad482de7a5f755c8f8 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Thu, 6 Jul 2023 11:42:48 -0700 Subject: [PATCH 4/7] Change get prefix to read for sysfs reading fns --- arbnode/resourcemanager/resource_management.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arbnode/resourcemanager/resource_management.go b/arbnode/resourcemanager/resource_management.go index e0d8177308..317239ff6b 100644 --- a/arbnode/resourcemanager/resource_management.go +++ b/arbnode/resourcemanager/resource_management.go @@ -123,22 +123,22 @@ func isSupported(c limitChecker) bool { func (c *cgroupsV1MemoryLimitChecker) isLimitExceeded() (bool, error) { var limit, usage, inactive int var err error - limit, err = c.getIntFromFile(c.limitFile) + limit, err = c.readIntFromFile(c.limitFile) if err != nil { return false, err } - usage, err = c.getIntFromFile(c.usageFile) + usage, err = c.readIntFromFile(c.usageFile) if err != nil { return false, err } - inactive, err = c.getInactive() + inactive, err = c.readInactive() if err != nil { return false, err } return usage-inactive >= ((limit * c.memoryLimitPercent) / 100), nil } -func (c cgroupsV1MemoryLimitChecker) getIntFromFile(fileName string) (int, error) { +func (c cgroupsV1MemoryLimitChecker) readIntFromFile(fileName string) (int, error) { file, err := os.Open(fileName) if err != nil { return 0, err @@ -152,7 +152,7 @@ func (c cgroupsV1MemoryLimitChecker) getIntFromFile(fileName string) (int, error return limit, nil } -func (c cgroupsV1MemoryLimitChecker) getInactive() (int, error) { +func (c cgroupsV1MemoryLimitChecker) readInactive() (int, error) { file, err := os.Open(c.statsFile) if err != nil { return 0, err From cdcf0ffa4bdbf25f440d96a437d7cd3f675cd68c Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Thu, 6 Jul 2023 12:17:33 -0700 Subject: [PATCH 5/7] Removed use of Require/Fail, other minor fixes --- .../resourcemanager/resource_management.go | 6 +- .../resource_management_test.go | 63 +++++++++++-------- 2 files changed, 40 insertions(+), 29 deletions(-) diff --git a/arbnode/resourcemanager/resource_management.go b/arbnode/resourcemanager/resource_management.go index 317239ff6b..f8f6457e95 100644 --- a/arbnode/resourcemanager/resource_management.go +++ b/arbnode/resourcemanager/resource_management.go @@ -145,13 +145,14 @@ func (c cgroupsV1MemoryLimitChecker) readIntFromFile(fileName string) (int, erro } var limit int - _, err = fmt.Fscanf(file, "%d", &limit) - if err != nil { + if _, err = fmt.Fscanf(file, "%d", &limit); err != nil { return 0, err } return limit, nil } +var re = regexp.MustCompile(`total_inactive_file (\d+)`) + func (c cgroupsV1MemoryLimitChecker) readInactive() (int, error) { file, err := os.Open(c.statsFile) if err != nil { @@ -159,7 +160,6 @@ func (c cgroupsV1MemoryLimitChecker) readInactive() (int, error) { } scanner := bufio.NewScanner(file) - re := regexp.MustCompile(`total_inactive_file (\d+)`) for scanner.Scan() { line := scanner.Text() diff --git a/arbnode/resourcemanager/resource_management_test.go b/arbnode/resourcemanager/resource_management_test.go index 99103e97aa..fe470e706b 100644 --- a/arbnode/resourcemanager/resource_management_test.go +++ b/arbnode/resourcemanager/resource_management_test.go @@ -7,29 +7,40 @@ import ( "fmt" "os" "testing" - - "github.com/offchainlabs/nitro/util/testhelpers" ) -func updateFakeCgroupv1Files(t *testing.T, c *cgroupsV1MemoryLimitChecker, limit, usage, inactive int) { +func updateFakeCgroupv1Files(c *cgroupsV1MemoryLimitChecker, limit, usage, inactive int) error { limitFile, err := os.Create(c.limitFile) - Require(t, err) + if err != nil { + return err + } _, err = fmt.Fprintf(limitFile, "%d\n", limit) - Require(t, err) + if err != nil { + return err + } usageFile, err := os.Create(c.usageFile) - Require(t, err) + if err != nil { + return err + } _, err = fmt.Fprintf(usageFile, "%d\n", usage) - Require(t, err) + if err != nil { + return err + } statsFile, err := os.Create(c.statsFile) - Require(t, err) + if err != nil { + return err + } _, err = fmt.Fprintf(statsFile, `total_cache 1029980160 total_rss 1016209408 total_inactive_file %d total_active_file 321544192 `, inactive) - Require(t, err) + if err != nil { + return err + } + return nil } func TestCgroupsv1MemoryLimit(t *testing.T) { @@ -37,31 +48,31 @@ func TestCgroupsv1MemoryLimit(t *testing.T) { c := newCgroupsV1MemoryLimitChecker(cgroupDir, 95) _, err := c.isLimitExceeded() if err == nil { - Fail(t, "Should fail open if can't read files") + t.Error("Should fail open if can't read files") } - updateFakeCgroupv1Files(t, c, 1000, 1000, 51) + err = updateFakeCgroupv1Files(c, 1000, 1000, 51) + if err != nil { + t.Error(err) + } exceeded, err := c.isLimitExceeded() - Require(t, err) + if err != nil { + t.Error(err) + } if exceeded { - Fail(t, "Expected under limit") + t.Error("Expected under limit") } - updateFakeCgroupv1Files(t, c, 1000, 1000, 50) + err = updateFakeCgroupv1Files(c, 1000, 1000, 50) + if err != nil { + t.Error(err) + } exceeded, err = c.isLimitExceeded() - Require(t, err) + if err != nil { + t.Error(err) + } if !exceeded { - Fail(t, "Expected over limit") + t.Error("Expected over limit") } } - -func Require(t *testing.T, err error, printables ...interface{}) { - t.Helper() - testhelpers.RequireImpl(t, err, printables...) -} - -func Fail(t *testing.T, printables ...interface{}) { - t.Helper() - testhelpers.FailImpl(t, printables...) -} From 0c1b5bd83ede06ed5fcc7f428eaaf33b0007db5c Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Thu, 6 Jul 2023 16:13:57 -0700 Subject: [PATCH 6/7] Add comments to resourcemanager --- .../resourcemanager/resource_management.go | 37 ++++++++++++++++--- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/arbnode/resourcemanager/resource_management.go b/arbnode/resourcemanager/resource_management.go index f8f6457e95..da73bcdb67 100644 --- a/arbnode/resourcemanager/resource_management.go +++ b/arbnode/resourcemanager/resource_management.go @@ -25,6 +25,11 @@ var ( limitCheckFailureCounter = metrics.NewRegisteredCounter("arb/rpc/limitcheck/failure", nil) ) +// Init adds the resource manager's httpServer to a custom hook in geth. +// Geth will add it to the stack of http.Handlers so that it is run +// prior to RPC request handling. +// +// Must be run before the go-ethereum stack is set up (ethereum/go-ethereum/node.New). func Init(conf *Config) { if conf.MemoryLimitPercent > 0 { node.WrapHTTPHandler = func(srv http.Handler) (http.Handler, error) { @@ -33,18 +38,26 @@ func Init(conf *Config) { } } +// Config contains the configuration for resourcemanager functionality. +// Currently only a memory limit is supported, other limits may be added +// in the future. type Config struct { MemoryLimitPercent int `koanf:"mem-limit-percent" reload:"hot"` } +// DefaultConfig has the defaul resourcemanager configuration, +// all limits are disabled. var DefaultConfig = Config{ MemoryLimitPercent: 0, } +// ConfigAddOptions adds the configuration options for resourcemanager. func ConfigAddOptions(prefix string, f *pflag.FlagSet) { f.Int(prefix+".mem-limit-percent", DefaultConfig.MemoryLimitPercent, "RPC calls are throttled if system memory utilization exceeds this percent value, zero (default) is disabled") } +// httpServer implements http.Handler and wraps calls to inner with a resource +// limit check. type httpServer struct { inner http.Handler c limitChecker @@ -54,6 +67,8 @@ func newHttpServer(inner http.Handler, c limitChecker) *httpServer { return &httpServer{inner: inner, c: c} } +// ServeHTTP passes req to inner unless any configured system resource +// limit is exceeded, in which case it returns a HTTP 429 error. func (s *httpServer) ServeHTTP(w http.ResponseWriter, req *http.Request) { start := time.Now() exceeded, err := s.c.isLimitExceeded() @@ -75,19 +90,23 @@ type limitChecker interface { String() string } +// newLimitChecker attempts to auto-discover the mechanism by which it +// can check system limits. Currently Cgroups V1 is supported, +// with Cgroups V2 likely to be implmemented next. If no supported +// mechanism is discovered, it logs an error and fails open, ie +// it creates a trivialLimitChecker that does no checks. func newLimitChecker(conf *Config) limitChecker { - { - c := newCgroupsV1MemoryLimitChecker(DefaultCgroupsV1MemoryDirectory, conf.MemoryLimitPercent) - if isSupported(c) { - log.Info("Cgroups v1 detected, enabling memory limit RPC throttling") - return c - } + c := newCgroupsV1MemoryLimitChecker(DefaultCgroupsV1MemoryDirectory, conf.MemoryLimitPercent) + if isSupported(c) { + log.Info("Cgroups v1 detected, enabling memory limit RPC throttling") + return c } log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling") return &trivialLimitChecker{} } +// trivialLimitChecker checks no limits, so its limits are never exceeded. type trivialLimitChecker struct{} func (_ trivialLimitChecker) isLimitExceeded() (bool, error) { @@ -120,6 +139,12 @@ func isSupported(c limitChecker) bool { return err == nil } +// isLimitExceeded checks if the system memory used exceeds the limit +// scaled by the configured memoryLimitPercent. +// +// See the following page for details of calculating the memory used, +// which is reported as container_memory_working_set_bytes in prometheus: +// https://mihai-albert.com/2022/02/13/out-of-memory-oom-in-kubernetes-part-3-memory-metrics-sources-and-tools-to-collect-them/ func (c *cgroupsV1MemoryLimitChecker) isLimitExceeded() (bool, error) { var limit, usage, inactive int var err error From 95aeec7389e611095950096d144695ebba63fdbf Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Thu, 6 Jul 2023 16:22:09 -0700 Subject: [PATCH 7/7] Remove unnecessary receivers --- arbnode/resourcemanager/resource_management.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arbnode/resourcemanager/resource_management.go b/arbnode/resourcemanager/resource_management.go index da73bcdb67..acb5355987 100644 --- a/arbnode/resourcemanager/resource_management.go +++ b/arbnode/resourcemanager/resource_management.go @@ -148,22 +148,22 @@ func isSupported(c limitChecker) bool { func (c *cgroupsV1MemoryLimitChecker) isLimitExceeded() (bool, error) { var limit, usage, inactive int var err error - limit, err = c.readIntFromFile(c.limitFile) + limit, err = readIntFromFile(c.limitFile) if err != nil { return false, err } - usage, err = c.readIntFromFile(c.usageFile) + usage, err = readIntFromFile(c.usageFile) if err != nil { return false, err } - inactive, err = c.readInactive() + inactive, err = readInactive(c.statsFile) if err != nil { return false, err } return usage-inactive >= ((limit * c.memoryLimitPercent) / 100), nil } -func (c cgroupsV1MemoryLimitChecker) readIntFromFile(fileName string) (int, error) { +func readIntFromFile(fileName string) (int, error) { file, err := os.Open(fileName) if err != nil { return 0, err @@ -178,8 +178,8 @@ func (c cgroupsV1MemoryLimitChecker) readIntFromFile(fileName string) (int, erro var re = regexp.MustCompile(`total_inactive_file (\d+)`) -func (c cgroupsV1MemoryLimitChecker) readInactive() (int, error) { - file, err := os.Open(c.statsFile) +func readInactive(fileName string) (int, error) { + file, err := os.Open(fileName) if err != nil { return 0, err } @@ -199,7 +199,7 @@ func (c cgroupsV1MemoryLimitChecker) readInactive() (int, error) { } } - return 0, errors.New("total_inactive_file not found in " + c.statsFile) + return 0, errors.New("total_inactive_file not found in " + fileName) } func (c cgroupsV1MemoryLimitChecker) String() string {