Skip to content

Commit

Permalink
Merge pull request #2114 from OffchainLabs/state-recovery
Browse files Browse the repository at this point in the history
add option to recreate missing states in archive node
  • Loading branch information
PlasmaPower authored Feb 2, 2024
2 parents b9043c4 + f632620 commit 4128f24
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 28 deletions.
67 changes: 39 additions & 28 deletions cmd/conf/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,44 @@ package conf
import (
"time"

"github.com/ethereum/go-ethereum/log"
"github.com/spf13/pflag"
)

type InitConfig struct {
Force bool `koanf:"force"`
Url string `koanf:"url"`
DownloadPath string `koanf:"download-path"`
DownloadPoll time.Duration `koanf:"download-poll"`
DevInit bool `koanf:"dev-init"`
DevInitAddress string `koanf:"dev-init-address"`
DevInitBlockNum uint64 `koanf:"dev-init-blocknum"`
Empty bool `koanf:"empty"`
AccountsPerSync uint `koanf:"accounts-per-sync"`
ImportFile string `koanf:"import-file"`
ThenQuit bool `koanf:"then-quit"`
Prune string `koanf:"prune"`
PruneBloomSize uint64 `koanf:"prune-bloom-size"`
ResetToMessage int64 `koanf:"reset-to-message"`
Force bool `koanf:"force"`
Url string `koanf:"url"`
DownloadPath string `koanf:"download-path"`
DownloadPoll time.Duration `koanf:"download-poll"`
DevInit bool `koanf:"dev-init"`
DevInitAddress string `koanf:"dev-init-address"`
DevInitBlockNum uint64 `koanf:"dev-init-blocknum"`
Empty bool `koanf:"empty"`
AccountsPerSync uint `koanf:"accounts-per-sync"`
ImportFile string `koanf:"import-file"`
ThenQuit bool `koanf:"then-quit"`
Prune string `koanf:"prune"`
PruneBloomSize uint64 `koanf:"prune-bloom-size"`
ResetToMessage int64 `koanf:"reset-to-message"`
RecreateMissingStateFrom uint64 `koanf:"recreate-missing-state-from"`
}

var InitConfigDefault = InitConfig{
Force: false,
Url: "",
DownloadPath: "/tmp/",
DownloadPoll: time.Minute,
DevInit: false,
DevInitAddress: "",
DevInitBlockNum: 0,
Empty: false,
ImportFile: "",
AccountsPerSync: 100000,
ThenQuit: false,
Prune: "",
PruneBloomSize: 2048,
ResetToMessage: -1,
Force: false,
Url: "",
DownloadPath: "/tmp/",
DownloadPoll: time.Minute,
DevInit: false,
DevInitAddress: "",
DevInitBlockNum: 0,
Empty: false,
ImportFile: "",
AccountsPerSync: 100000,
ThenQuit: false,
Prune: "",
PruneBloomSize: 2048,
ResetToMessage: -1,
RecreateMissingStateFrom: 0, // 0 = disabled
}

func InitConfigAddOptions(prefix string, f *pflag.FlagSet) {
Expand All @@ -55,4 +58,12 @@ func InitConfigAddOptions(prefix string, f *pflag.FlagSet) {
f.String(prefix+".prune", InitConfigDefault.Prune, "pruning for a given use: \"full\" for full nodes serving RPC requests, or \"validator\" for validators")
f.Uint64(prefix+".prune-bloom-size", InitConfigDefault.PruneBloomSize, "the amount of memory in megabytes to use for the pruning bloom filter (higher values prune better)")
f.Int64(prefix+".reset-to-message", InitConfigDefault.ResetToMessage, "forces a reset to an old message height. Also set max-reorg-resequence-depth=0 to force re-reading messages")
f.Uint64(prefix+".recreate-missing-state-from", InitConfigDefault.RecreateMissingStateFrom, "block number to start recreating missing states from (0 = disabled)")
}

func (c *InitConfig) Validate() error {
if c.Force && c.RecreateMissingStateFrom > 0 {
log.Warn("force init enabled, recreate-missing-state-from will have no effect")
}
return nil
}
8 changes: 8 additions & 0 deletions cmd/nitro/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/offchainlabs/nitro/cmd/conf"
"github.com/offchainlabs/nitro/cmd/ipfshelper"
"github.com/offchainlabs/nitro/cmd/pruning"
"github.com/offchainlabs/nitro/cmd/staterecovery"
"github.com/offchainlabs/nitro/cmd/util"
"github.com/offchainlabs/nitro/execution/gethexec"
"github.com/offchainlabs/nitro/statetransfer"
Expand Down Expand Up @@ -183,6 +184,13 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo
if err != nil {
return chainDb, l2BlockChain, err
}
if config.Init.RecreateMissingStateFrom > 0 {
err = staterecovery.RecreateMissingStates(chainDb, l2BlockChain, cacheConfig, config.Init.RecreateMissingStateFrom)
if err != nil {
return chainDb, l2BlockChain, fmt.Errorf("failed to recreate missing states: %w", err)
}
}

return chainDb, l2BlockChain, nil
}
readOnlyDb.Close()
Expand Down
6 changes: 6 additions & 0 deletions cmd/nitro/nitro.go
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,12 @@ func (c *NodeConfig) CanReload(new *NodeConfig) error {
}

func (c *NodeConfig) Validate() error {
if c.Init.RecreateMissingStateFrom > 0 && !c.Execution.Caching.Archive {
return errors.New("recreate-missing-state-from enabled for a non-archive node")
}
if err := c.Init.Validate(); err != nil {
return err
}
if err := c.ParentChain.Validate(); err != nil {
return err
}
Expand Down
88 changes: 88 additions & 0 deletions cmd/staterecovery/staterecovery.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package staterecovery

import (
"fmt"
"time"

"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/core/vm"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/triedb/hashdb"
)

func RecreateMissingStates(chainDb ethdb.Database, bc *core.BlockChain, cacheConfig *core.CacheConfig, startBlock uint64) error {
start := time.Now()
currentHeader := bc.CurrentBlock()
if currentHeader == nil {
return fmt.Errorf("current header is nil")
}
target := currentHeader.Number.Uint64()
current := startBlock
genesis := bc.Config().ArbitrumChainParams.GenesisBlockNum
if current < genesis+1 {
current = genesis + 1
log.Warn("recreate-missing-states-from before genesis+1, starting from genesis+1", "configured", startBlock, "override", current)
}
previousBlock := bc.GetBlockByNumber(current - 1)
if previousBlock == nil {
return fmt.Errorf("start block parent is missing, parent block number: %d", current-1)
}
hashConfig := *hashdb.Defaults
hashConfig.CleanCacheSize = cacheConfig.TrieCleanLimit
trieConfig := &trie.Config{
Preimages: false,
HashDB: &hashConfig,
}
database := state.NewDatabaseWithConfig(chainDb, trieConfig)
defer database.TrieDB().Close()
previousState, err := state.New(previousBlock.Root(), database, nil)
if err != nil {
return fmt.Errorf("state of start block parent is missing: %w", err)
}
// we don't need to reference states with `trie.Database.Reference` here, because:
// * either the state nodes will be read from disk and then cached in cleans cache
// * or they will be recreated, saved to disk and then also cached in cleans cache
logged := time.Unix(0, 0)
recreated := 0
for {
currentBlock := bc.GetBlockByNumber(current)
if currentBlock == nil {
break
}
if time.Since(logged) > 1*time.Minute {
log.Info("Recreating missing states", "block", current, "target", target, "remaining", int64(target)-int64(current), "elapsed", time.Since(start), "recreated", recreated)
logged = time.Now()
}
currentState, err := state.New(currentBlock.Root(), database, nil)
if err != nil {
_, _, _, err := bc.Processor().Process(currentBlock, previousState, vm.Config{})
if err != nil {
return fmt.Errorf("processing block %d failed: %w", current, err)
}
root, err := previousState.Commit(current, bc.Config().IsEIP158(currentBlock.Number()))
if err != nil {
return fmt.Errorf("StateDB commit failed, number %d root %v: %w", current, currentBlock.Root(), err)
}
if root.Cmp(currentBlock.Root()) != 0 {
return fmt.Errorf("reached different state root after processing block %d, have %v, want %v", current, root, currentBlock.Root())
}
// commit to disk
err = database.TrieDB().Commit(root, false)
if err != nil {
return fmt.Errorf("TrieDB commit failed, number %d root %v: %w", current, root, err)
}
currentState, err = state.New(currentBlock.Root(), database, nil)
if err != nil {
return fmt.Errorf("state reset after block %d failed: %w", current, err)
}
recreated++
}
current++
previousState = currentState
}
log.Info("Finished recreating missing states", "elapsed", time.Since(start), "recreated", recreated)
return nil
}
1 change: 1 addition & 0 deletions execution/gethexec/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ var DefaultCachingConfig = CachingConfig{
MaxAmountOfGasToSkipStateSaving: 0,
}

// TODO remove stack from parameters as it is no longer needed here
func DefaultCacheConfigFor(stack *node.Node, cachingConfig *CachingConfig) *core.CacheConfig {
baseConf := ethconfig.Defaults
if cachingConfig.Archive {
Expand Down
95 changes: 95 additions & 0 deletions system_tests/staterecovery_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package arbtest

import (
"context"
"testing"
"time"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/node"
"github.com/ethereum/go-ethereum/trie"
"github.com/offchainlabs/nitro/cmd/staterecovery"
"github.com/offchainlabs/nitro/execution/gethexec"
)

func TestRectreateMissingStates(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
builder := NewNodeBuilder(ctx).DefaultConfig(t, true)
builder.execConfig.Caching.Archive = true
builder.execConfig.Caching.MaxNumberOfBlocksToSkipStateSaving = 16
builder.execConfig.Caching.SnapshotCache = 0 // disable snapshots
_ = builder.Build(t)
l2cleanupDone := false
defer func() {
if !l2cleanupDone {
builder.L2.cleanup()
}
builder.L1.cleanup()
}()
builder.L2Info.GenerateAccount("User2")
var txs []*types.Transaction
for i := uint64(0); i < 200; i++ {
tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil)
txs = append(txs, tx)
err := builder.L2.Client.SendTransaction(ctx, tx)
Require(t, err)
}
for _, tx := range txs {
_, err := builder.L2.EnsureTxSucceeded(tx)
Require(t, err)
}
lastBlock, err := builder.L2.Client.BlockNumber(ctx)
Require(t, err)
l2cleanupDone = true
builder.L2.cleanup()
t.Log("stopped l2 node")
func() {
stack, err := node.New(builder.l2StackConfig)
Require(t, err)
defer stack.Close()
chainDb, err := stack.OpenDatabase("chaindb", 0, 0, "", false)
Require(t, err)
defer chainDb.Close()
cacheConfig := gethexec.DefaultCacheConfigFor(stack, &gethexec.DefaultCachingConfig)
bc, err := gethexec.GetBlockChain(chainDb, cacheConfig, builder.chainConfig, builder.execConfig.TxLookupLimit)
Require(t, err)
err = staterecovery.RecreateMissingStates(chainDb, bc, cacheConfig, 1)
Require(t, err)
}()

testClient, cleanup := builder.Build2ndNode(t, &SecondNodeParams{stackConfig: builder.l2StackConfig})
defer cleanup()

currentBlock := uint64(0)
// wait for the chain to catch up
for currentBlock < lastBlock {
currentBlock, err = testClient.Client.BlockNumber(ctx)
Require(t, err)
time.Sleep(20 * time.Millisecond)
}

currentBlock, err = testClient.Client.BlockNumber(ctx)
Require(t, err)
bc := testClient.ExecNode.Backend.ArbInterface().BlockChain()
triedb := bc.StateCache().TrieDB()
for i := uint64(0); i <= currentBlock; i++ {
header := bc.GetHeaderByNumber(i)
_, err := bc.StateAt(header.Root)
Require(t, err)
tr, err := trie.New(trie.TrieID(header.Root), triedb)
Require(t, err)
it, err := tr.NodeIterator(nil)
Require(t, err)
for it.Next(true) {
}
Require(t, it.Error())
}

tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil)
err = testClient.Client.SendTransaction(ctx, tx)
Require(t, err)
_, err = testClient.EnsureTxSucceeded(tx)
Require(t, err)
}

0 comments on commit 4128f24

Please sign in to comment.