diff --git a/cmd/conf/init.go b/cmd/conf/init.go index bebf1955b7..8a6c5096fb 100644 --- a/cmd/conf/init.go +++ b/cmd/conf/init.go @@ -3,41 +3,44 @@ package conf import ( "time" + "github.com/ethereum/go-ethereum/log" "github.com/spf13/pflag" ) type InitConfig struct { - Force bool `koanf:"force"` - Url string `koanf:"url"` - DownloadPath string `koanf:"download-path"` - DownloadPoll time.Duration `koanf:"download-poll"` - DevInit bool `koanf:"dev-init"` - DevInitAddress string `koanf:"dev-init-address"` - DevInitBlockNum uint64 `koanf:"dev-init-blocknum"` - Empty bool `koanf:"empty"` - AccountsPerSync uint `koanf:"accounts-per-sync"` - ImportFile string `koanf:"import-file"` - ThenQuit bool `koanf:"then-quit"` - Prune string `koanf:"prune"` - PruneBloomSize uint64 `koanf:"prune-bloom-size"` - ResetToMessage int64 `koanf:"reset-to-message"` + Force bool `koanf:"force"` + Url string `koanf:"url"` + DownloadPath string `koanf:"download-path"` + DownloadPoll time.Duration `koanf:"download-poll"` + DevInit bool `koanf:"dev-init"` + DevInitAddress string `koanf:"dev-init-address"` + DevInitBlockNum uint64 `koanf:"dev-init-blocknum"` + Empty bool `koanf:"empty"` + AccountsPerSync uint `koanf:"accounts-per-sync"` + ImportFile string `koanf:"import-file"` + ThenQuit bool `koanf:"then-quit"` + Prune string `koanf:"prune"` + PruneBloomSize uint64 `koanf:"prune-bloom-size"` + ResetToMessage int64 `koanf:"reset-to-message"` + RecreateMissingStateFrom uint64 `koanf:"recreate-missing-state-from"` } var InitConfigDefault = InitConfig{ - Force: false, - Url: "", - DownloadPath: "/tmp/", - DownloadPoll: time.Minute, - DevInit: false, - DevInitAddress: "", - DevInitBlockNum: 0, - Empty: false, - ImportFile: "", - AccountsPerSync: 100000, - ThenQuit: false, - Prune: "", - PruneBloomSize: 2048, - ResetToMessage: -1, + Force: false, + Url: "", + DownloadPath: "/tmp/", + DownloadPoll: time.Minute, + DevInit: false, + DevInitAddress: "", + DevInitBlockNum: 0, + Empty: false, + ImportFile: "", + AccountsPerSync: 100000, + ThenQuit: false, + Prune: "", + PruneBloomSize: 2048, + ResetToMessage: -1, + RecreateMissingStateFrom: 0, // 0 = disabled } func InitConfigAddOptions(prefix string, f *pflag.FlagSet) { @@ -55,4 +58,12 @@ func InitConfigAddOptions(prefix string, f *pflag.FlagSet) { f.String(prefix+".prune", InitConfigDefault.Prune, "pruning for a given use: \"full\" for full nodes serving RPC requests, or \"validator\" for validators") f.Uint64(prefix+".prune-bloom-size", InitConfigDefault.PruneBloomSize, "the amount of memory in megabytes to use for the pruning bloom filter (higher values prune better)") f.Int64(prefix+".reset-to-message", InitConfigDefault.ResetToMessage, "forces a reset to an old message height. Also set max-reorg-resequence-depth=0 to force re-reading messages") + f.Uint64(prefix+".recreate-missing-state-from", InitConfigDefault.RecreateMissingStateFrom, "block number to start recreating missing states from (0 = disabled)") +} + +func (c *InitConfig) Validate() error { + if c.Force && c.RecreateMissingStateFrom > 0 { + log.Warn("force init enabled, recreate-missing-state-from will have no effect") + } + return nil } diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index 4cf5dcda06..ebc57b13b8 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -34,6 +34,7 @@ import ( "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/ipfshelper" "github.com/offchainlabs/nitro/cmd/pruning" + "github.com/offchainlabs/nitro/cmd/staterecovery" "github.com/offchainlabs/nitro/cmd/util" "github.com/offchainlabs/nitro/execution/gethexec" "github.com/offchainlabs/nitro/statetransfer" @@ -183,6 +184,13 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo if err != nil { return chainDb, l2BlockChain, err } + if config.Init.RecreateMissingStateFrom > 0 { + err = staterecovery.RecreateMissingStates(chainDb, l2BlockChain, cacheConfig, config.Init.RecreateMissingStateFrom) + if err != nil { + return chainDb, l2BlockChain, fmt.Errorf("failed to recreate missing states: %w", err) + } + } + return chainDb, l2BlockChain, nil } readOnlyDb.Close() diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 45f539488d..e5b97d4a1c 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -781,6 +781,12 @@ func (c *NodeConfig) CanReload(new *NodeConfig) error { } func (c *NodeConfig) Validate() error { + if c.Init.RecreateMissingStateFrom > 0 && !c.Execution.Caching.Archive { + return errors.New("recreate-missing-state-from enabled for a non-archive node") + } + if err := c.Init.Validate(); err != nil { + return err + } if err := c.ParentChain.Validate(); err != nil { return err } diff --git a/cmd/staterecovery/staterecovery.go b/cmd/staterecovery/staterecovery.go new file mode 100644 index 0000000000..6390826a91 --- /dev/null +++ b/cmd/staterecovery/staterecovery.go @@ -0,0 +1,88 @@ +package staterecovery + +import ( + "fmt" + "time" + + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/vm" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" +) + +func RecreateMissingStates(chainDb ethdb.Database, bc *core.BlockChain, cacheConfig *core.CacheConfig, startBlock uint64) error { + start := time.Now() + currentHeader := bc.CurrentBlock() + if currentHeader == nil { + return fmt.Errorf("current header is nil") + } + target := currentHeader.Number.Uint64() + current := startBlock + genesis := bc.Config().ArbitrumChainParams.GenesisBlockNum + if current < genesis+1 { + current = genesis + 1 + log.Warn("recreate-missing-states-from before genesis+1, starting from genesis+1", "configured", startBlock, "override", current) + } + previousBlock := bc.GetBlockByNumber(current - 1) + if previousBlock == nil { + return fmt.Errorf("start block parent is missing, parent block number: %d", current-1) + } + hashConfig := *hashdb.Defaults + hashConfig.CleanCacheSize = cacheConfig.TrieCleanLimit + trieConfig := &trie.Config{ + Preimages: false, + HashDB: &hashConfig, + } + database := state.NewDatabaseWithConfig(chainDb, trieConfig) + defer database.TrieDB().Close() + previousState, err := state.New(previousBlock.Root(), database, nil) + if err != nil { + return fmt.Errorf("state of start block parent is missing: %w", err) + } + // we don't need to reference states with `trie.Database.Reference` here, because: + // * either the state nodes will be read from disk and then cached in cleans cache + // * or they will be recreated, saved to disk and then also cached in cleans cache + logged := time.Unix(0, 0) + recreated := 0 + for { + currentBlock := bc.GetBlockByNumber(current) + if currentBlock == nil { + break + } + if time.Since(logged) > 1*time.Minute { + log.Info("Recreating missing states", "block", current, "target", target, "remaining", int64(target)-int64(current), "elapsed", time.Since(start), "recreated", recreated) + logged = time.Now() + } + currentState, err := state.New(currentBlock.Root(), database, nil) + if err != nil { + _, _, _, err := bc.Processor().Process(currentBlock, previousState, vm.Config{}) + if err != nil { + return fmt.Errorf("processing block %d failed: %w", current, err) + } + root, err := previousState.Commit(current, bc.Config().IsEIP158(currentBlock.Number())) + if err != nil { + return fmt.Errorf("StateDB commit failed, number %d root %v: %w", current, currentBlock.Root(), err) + } + if root.Cmp(currentBlock.Root()) != 0 { + return fmt.Errorf("reached different state root after processing block %d, have %v, want %v", current, root, currentBlock.Root()) + } + // commit to disk + err = database.TrieDB().Commit(root, false) + if err != nil { + return fmt.Errorf("TrieDB commit failed, number %d root %v: %w", current, root, err) + } + currentState, err = state.New(currentBlock.Root(), database, nil) + if err != nil { + return fmt.Errorf("state reset after block %d failed: %w", current, err) + } + recreated++ + } + current++ + previousState = currentState + } + log.Info("Finished recreating missing states", "elapsed", time.Since(start), "recreated", recreated) + return nil +} diff --git a/execution/gethexec/blockchain.go b/execution/gethexec/blockchain.go index a85224b635..2a20c3da26 100644 --- a/execution/gethexec/blockchain.go +++ b/execution/gethexec/blockchain.go @@ -67,6 +67,7 @@ var DefaultCachingConfig = CachingConfig{ MaxAmountOfGasToSkipStateSaving: 0, } +// TODO remove stack from parameters as it is no longer needed here func DefaultCacheConfigFor(stack *node.Node, cachingConfig *CachingConfig) *core.CacheConfig { baseConf := ethconfig.Defaults if cachingConfig.Archive { diff --git a/precompiles/precompile.go b/precompiles/precompile.go index 5d2ecce745..5a16a1f903 100644 --- a/precompiles/precompile.go +++ b/precompiles/precompile.go @@ -560,6 +560,7 @@ func Precompiles() map[addr]ArbosPrecompile { ArbOwnerPublic.methodsByName["GetInfraFeeAccount"].arbosVersion = 5 ArbOwnerPublic.methodsByName["RectifyChainOwner"].arbosVersion = 11 ArbOwnerPublic.methodsByName["GetBrotliCompressionLevel"].arbosVersion = 20 + ArbOwnerPublic.methodsByName["GetScheduledUpgrade"].arbosVersion = 20 ArbRetryableImpl := &ArbRetryableTx{Address: types.ArbRetryableTxAddress} ArbRetryable := insert(MakePrecompile(templates.ArbRetryableTxMetaData, ArbRetryableImpl)) diff --git a/system_tests/staterecovery_test.go b/system_tests/staterecovery_test.go new file mode 100644 index 0000000000..ac30038cc1 --- /dev/null +++ b/system_tests/staterecovery_test.go @@ -0,0 +1,95 @@ +package arbtest + +import ( + "context" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/node" + "github.com/ethereum/go-ethereum/trie" + "github.com/offchainlabs/nitro/cmd/staterecovery" + "github.com/offchainlabs/nitro/execution/gethexec" +) + +func TestRectreateMissingStates(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + builder := NewNodeBuilder(ctx).DefaultConfig(t, true) + builder.execConfig.Caching.Archive = true + builder.execConfig.Caching.MaxNumberOfBlocksToSkipStateSaving = 16 + builder.execConfig.Caching.SnapshotCache = 0 // disable snapshots + _ = builder.Build(t) + l2cleanupDone := false + defer func() { + if !l2cleanupDone { + builder.L2.cleanup() + } + builder.L1.cleanup() + }() + builder.L2Info.GenerateAccount("User2") + var txs []*types.Transaction + for i := uint64(0); i < 200; i++ { + tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) + txs = append(txs, tx) + err := builder.L2.Client.SendTransaction(ctx, tx) + Require(t, err) + } + for _, tx := range txs { + _, err := builder.L2.EnsureTxSucceeded(tx) + Require(t, err) + } + lastBlock, err := builder.L2.Client.BlockNumber(ctx) + Require(t, err) + l2cleanupDone = true + builder.L2.cleanup() + t.Log("stopped l2 node") + func() { + stack, err := node.New(builder.l2StackConfig) + Require(t, err) + defer stack.Close() + chainDb, err := stack.OpenDatabase("chaindb", 0, 0, "", false) + Require(t, err) + defer chainDb.Close() + cacheConfig := gethexec.DefaultCacheConfigFor(stack, &gethexec.DefaultCachingConfig) + bc, err := gethexec.GetBlockChain(chainDb, cacheConfig, builder.chainConfig, builder.execConfig.TxLookupLimit) + Require(t, err) + err = staterecovery.RecreateMissingStates(chainDb, bc, cacheConfig, 1) + Require(t, err) + }() + + testClient, cleanup := builder.Build2ndNode(t, &SecondNodeParams{stackConfig: builder.l2StackConfig}) + defer cleanup() + + currentBlock := uint64(0) + // wait for the chain to catch up + for currentBlock < lastBlock { + currentBlock, err = testClient.Client.BlockNumber(ctx) + Require(t, err) + time.Sleep(20 * time.Millisecond) + } + + currentBlock, err = testClient.Client.BlockNumber(ctx) + Require(t, err) + bc := testClient.ExecNode.Backend.ArbInterface().BlockChain() + triedb := bc.StateCache().TrieDB() + for i := uint64(0); i <= currentBlock; i++ { + header := bc.GetHeaderByNumber(i) + _, err := bc.StateAt(header.Root) + Require(t, err) + tr, err := trie.New(trie.TrieID(header.Root), triedb) + Require(t, err) + it, err := tr.NodeIterator(nil) + Require(t, err) + for it.Next(true) { + } + Require(t, it.Error()) + } + + tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) + err = testClient.Client.SendTransaction(ctx, tx) + Require(t, err) + _, err = testClient.EnsureTxSucceeded(tx) + Require(t, err) +} diff --git a/util/rpcclient/rpcclient.go b/util/rpcclient/rpcclient.go index dee6e9252a..dbc145d490 100644 --- a/util/rpcclient/rpcclient.go +++ b/util/rpcclient/rpcclient.go @@ -155,13 +155,26 @@ func (c *RpcClient) CallContext(ctx_in context.Context, result interface{}, meth ctx, cancelCtx = context.WithCancel(ctx_in) } err = c.client.CallContext(ctx, result, method, args...) + cancelCtx() logger := log.Trace limit := int(c.config().ArgLogLimit) if err != nil && err.Error() != "already known" { logger = log.Info } - logger("rpc response", "method", method, "logId", logId, "err", err, "result", limitedMarshal{limit, result}, "attempt", i, "args", limitedArgumentsMarshal{limit, args}) + logEntry := []interface{}{ + "method", method, + "logId", logId, + "err", err, + "result", limitedMarshal{limit, result}, + "attempt", i, + "args", limitedArgumentsMarshal{limit, args}, + } + var dataErr rpc.DataError + if errors.As(err, &dataErr) { + logEntry = append(logEntry, "errorData", limitedMarshal{limit, dataErr.ErrorData()}) + } + logger("rpc response", logEntry...) if err == nil { return nil }